{-
  Copyright 2016 Awake Networks

  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

      http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
-}

-- | This module extends the "Data.ByteString.Builder" module by memoizing the
-- resulting length of each `Builder`
--
-- Example use:
--
-- >>> Data.ByteString.Lazy.unpack (toLazyByteString (word32BE 42 <> charUtf8 'λ'))
-- [0,0,0,42,206,187]

{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}

module Proto3.Wire.Builder
    (
      -- * `Builder` type
      Builder

      -- * Create `Builder`s
    , byteString
    , lazyByteString
    , shortByteString
    , word8
    , word16BE
    , word16LE
    , word32BE
    , word32LE
    , word64BE
    , word64LE
    , word64Base128LEVar
    , int8
    , int16BE
    , int16LE
    , int32BE
    , int32LE
    , int64BE
    , int64LE
    , floatBE
    , floatLE
    , doubleBE
    , doubleLE
    , char7
    , string7
    , char8
    , string8
    , charUtf8
    , stringUtf8

      -- * Consume `Builder`s
    , builderLength
    , rawBuilder
    , toLazyByteString
    , hPutBuilder

    -- * Internal API
    , unsafeMakeBuilder
    ) where

import           Data.Bits                     ((.|.), shiftR)
import qualified Data.ByteString               as B
import qualified Data.ByteString.Builder       as BB
import qualified Data.ByteString.Builder.Extra as BB
import qualified Data.ByteString.Lazy          as BL
import qualified Data.ByteString.Short         as BS
import           Data.Char                     ( ord )
import           Data.Int                      ( Int8, Int16, Int32, Int64 )
import           Data.Semigroup                ( Semigroup(..), Sum(..) )
import           Data.Word                     ( Word8, Word16, Word32, Word64 )
import           System.IO                     ( Handle )

-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import Data.Semigroup

-- | A `Builder` is like a @"Data.ByteString.Builder".`BB.Builder`@, but also
-- memoizes the resulting length so that we can efficiently encode nested
-- embedded messages.
--
-- You create a `Builder` by using one of the primitives provided in the
-- \"Create `Builder`s\" section.
--
-- You combine `Builder`s using the `Monoid` and `Semigroup` instances.
--
-- You consume a `Builder` by using one of the utilities provided in the
-- \"Consume `Builder`s\" section.
data Builder = Builder {-# UNPACK #-} !(Sum Word) BB.Builder

instance Semigroup Builder where
  Builder s b <> Builder s1 b1 = Builder (s <> s1) (b <> b1)

instance Monoid Builder where
  mempty = Builder mempty mempty
  mappend = (<>)

instance Show Builder where
  showsPrec prec builder =
      showParen (prec > 10)
        (showString "Proto3.Wire.Builder.lazyByteString " . shows bytes)
    where
      bytes = toLazyByteString builder

-- | Retrieve the length of a `Builder`
--
-- > builderLength (x <> y) = builderLength x + builderLength y
-- >
-- > builderLength mempty = 0
--
-- >>> builderLength (word32BE 42)
-- 4
-- >>> builderLength (stringUtf8 "ABC")
-- 3
builderLength :: Builder -> Word
builderLength (Builder x _) = getSum x

-- | Retrieve the underlying @"Data.ByteString.Builder".`BB.Builder`@
--
-- > rawBuilder (x <> y) = rawBuilder x <> rawBuilder y
-- >
-- > rawBuilder mempty = mempty
--
-- >>> Data.ByteString.Builder.toLazyByteString (rawBuilder (stringUtf8 "ABC"))
-- "ABC"
rawBuilder :: Builder -> BB.Builder
rawBuilder (Builder _ x) = x

-- | Create a `Builder` from a @"Data.ByteString.Builder".`BB.Builder`@ and a
-- length.  This is unsafe because you are responsible for ensuring that the
-- provided length value matches the length of the
-- @"Data.ByteString.Builder".`BB.Builder`@
--
-- >>> unsafeMakeBuilder 3 (Data.ByteString.Builder.stringUtf8 "ABC")
-- Proto3.Wire.Builder.lazyByteString "ABC"
unsafeMakeBuilder :: Word -> BB.Builder -> Builder
unsafeMakeBuilder len bldr = Builder (Sum len) bldr

-- | Create a lazy `BL.ByteString` from a `Builder`
--
-- > toLazyByteString (x <> y) = toLazyByteString x <> toLazyByteString y
-- >
-- > toLazyByteString mempty = mempty
--
-- >>> toLazyByteString (stringUtf8 "ABC")
-- "ABC"
toLazyByteString :: Builder -> BL.ByteString
toLazyByteString (Builder (Sum len) bb) =
    BB.toLazyByteStringWith strat BL.empty bb
  where
    -- If the supplied length is accurate then we will perform just
    -- one allocation.  An inaccurate length would indicate a bug
    -- in one of the primitives that produces a 'Builder'.
    strat = BB.safeStrategy (fromIntegral len) BB.defaultChunkSize
{-# NOINLINE toLazyByteString #-}
  -- NOINLINE to avoid bloating caller; see docs for 'BB.toLazyByteStringWith'.

-- | Write a `Builder` to a `Handle`
--
-- > hPutBuilder handle (x <> y) = hPutBuilder handle x <> hPutBuilder handle y
-- >
-- > hPutBuilder handle mempty = mempty
--
-- >>> hPutBuilder System.IO.stdout (stringUtf8 "ABC\n")
-- ABC
hPutBuilder :: Handle -> Builder -> IO ()
hPutBuilder handle = BB.hPutBuilder handle . rawBuilder

-- | Convert a strict `B.ByteString` to a `Builder`
--
-- > byteString (x <> y) = byteString x <> byteString y
-- >
-- > byteString mempty = mempty
--
-- >>> byteString "ABC"
-- Proto3.Wire.Builder.lazyByteString "ABC"
byteString :: B.ByteString -> Builder
byteString bs =
  Builder (Sum (fromIntegral (B.length bs))) (BB.byteStringCopy bs)
    -- NOTE: We want 'toLazyByteString' to produce a single chunk (unless
    -- incorrect uses of 'unsafeMakeBuilder' sabotage the length prediction).
    --
    -- To that end, 'toLazyByteString' allocates a first chunk of exactly the
    -- builder length.  That length should be accurate unless there is a bug,
    -- either within this library or in some arguments to 'unsafeMakeBuilder'.
    --
    -- If the given 'bs :: B.ByteString' is longer than a certain threshold,
    -- then passing it to 'BB.byteString' would produce a builder that closes
    -- the current chunk and appends 'bs' as its own chunk, without copying.
    -- That would waste some of the chunk allocated by 'toLazyByteString'.
    --
    -- Therefore we force copying of 'bs' by using 'BB.byteStringCopy' here.

-- | Convert a lazy `BL.ByteString` to a `Builder`
--
-- Warning: evaluating the length will force the lazy `BL.ByteString`'s chunks,
-- and they will remain allocated until you finish using the builder.
--
-- > lazyByteString (x <> y) = lazyByteString x <> lazyByteString y
-- >
-- > lazyByteString mempty = mempty
--
-- > lazyByteString . toLazyByteString = id
-- >
-- > toLazyByteString . lazyByteString = id
--
-- >>> lazyByteString "ABC"
-- Proto3.Wire.Builder.lazyByteString "ABC"
lazyByteString :: BL.ByteString -> Builder
lazyByteString bl =
  Builder (Sum (fromIntegral (BL.length bl))) (BB.lazyByteStringCopy bl)
    -- NOTE: We use 'BB.lazyByteStringCopy' here for the same reason
    -- that 'byteString' uses 'BB.byteStringCopy'.  For the rationale,
    -- please see the comments in the implementation of 'byteString'.

-- | Convert a `BS.ShortByteString` to a `Builder`
--
-- > shortByteString (x <> y) = shortByteString x <> shortByteString y
-- >
-- > shortByteString mempty = mempty
--
-- >>> shortByteString "ABC"
-- Proto3.Wire.Builder.lazyByteString "ABC"
shortByteString :: BS.ShortByteString -> Builder
shortByteString bs =
  Builder (Sum (fromIntegral (BS.length bs))) (BB.shortByteString bs)

-- | Convert a `Word8` to a `Builder`
--
-- >>> Data.ByteString.Lazy.unpack (toLazyByteString (word8 42))
-- [42]
word8 :: Word8 -> Builder
word8 w = Builder (Sum 1) (BB.word8 w)

-- | Convert a `Int8` to a `Builder`
--
-- >>> Data.ByteString.Lazy.unpack (toLazyByteString (int8 (-5)))
-- [251]
int8 :: Int8 -> Builder
int8 w = Builder (Sum 1) (BB.int8 w)

-- | Convert a `Word16` to a `Builder` by storing the bytes in big-endian order
--
-- In other words, the most significant byte is stored first and the least
-- significant byte is stored last
--
-- >>> Data.ByteString.Lazy.unpack (toLazyByteString (word16BE 42))
-- [0,42]
word16BE :: Word16 -> Builder
word16BE w = Builder (Sum 2) (BB.word16BE w)

-- | Convert a `Word16` to a `Builder` by storing the bytes in little-endian
-- order
--
-- In other words, the least significant byte is stored first and the most
-- significant byte is stored last
--
-- >>> Data.ByteString.Lazy.unpack (toLazyByteString (word16LE 42))
-- [42,0]
word16LE :: Word16 -> Builder
word16LE w = Builder (Sum 2) (BB.word16LE w)

-- | Convert an `Int16` to a `Builder` by storing the bytes in big-endian order
--
-- In other words, the most significant byte is stored first and the least
-- significant byte is stored last
--
-- >>> Data.ByteString.Lazy.unpack (toLazyByteString (int16BE (-5)))
-- [255,251]
int16BE :: Int16 -> Builder
int16BE w = Builder (Sum 2) (BB.int16BE w)

-- | Convert an `Int16` to a `Builder` by storing the bytes in little-endian
-- order
--
-- In other words, the least significant byte is stored first and the most
-- significant byte is stored last
--
-- >>> Data.ByteString.Lazy.unpack (toLazyByteString (int16LE (-5)))
-- [251,255]
int16LE :: Int16 -> Builder
int16LE w = Builder (Sum 2) (BB.int16LE w)

-- | Convert a `Word32` to a `Builder` by storing the bytes in big-endian order
--
-- In other words, the most significant byte is stored first and the least
-- significant byte is stored last
--
-- >>> Data.ByteString.Lazy.unpack (toLazyByteString (word32BE 42))
-- [0,0,0,42]
word32BE :: Word32 -> Builder
word32BE w = Builder (Sum 4) (BB.word32BE w)

-- | Convert a `Word32` to a `Builder` by storing the bytes in little-endian
-- order
--
-- In other words, the least significant byte is stored first and the most
-- significant byte is stored last
--
-- >>> Data.ByteString.Lazy.unpack (toLazyByteString (word32LE 42))
-- [42,0,0,0]
word32LE :: Word32 -> Builder
word32LE w = Builder (Sum 4) (BB.word32LE w)

-- | Convert an `Int32` to a `Builder` by storing the bytes in big-endian order
--
-- In other words, the most significant byte is stored first and the least
-- significant byte is stored last
--
-- >>> Data.ByteString.Lazy.unpack (toLazyByteString (int32BE (-5)))
-- [255,255,255,251]
int32BE :: Int32 -> Builder
int32BE w = Builder (Sum 4) (BB.int32BE w)

-- | Convert an `Int32` to a `Builder` by storing the bytes in little-endian
-- order
--
-- In other words, the least significant byte is stored first and the most
-- significant byte is stored last
--
-- >>> Data.ByteString.Lazy.unpack (toLazyByteString (int32LE (-5)))
-- [251,255,255,255]
int32LE :: Int32 -> Builder
int32LE w = Builder (Sum 4) (BB.int32LE w)

-- | Convert a `Float` to a `Builder` by storing the bytes in IEEE-754 format in
-- big-endian order
--
-- In other words, the most significant byte is stored first and the least
-- significant byte is stored last
--
-- >>> Data.ByteString.Lazy.unpack (toLazyByteString (floatBE 4.2))
-- [64,134,102,102]
floatBE :: Float -> Builder
floatBE f = Builder (Sum 4) (BB.floatBE f)

-- | Convert a `Float` to a `Builder` by storing the bytes in IEEE-754 format in
-- little-endian order
--
-- In other words, the least significant byte is stored first and the most
-- significant byte is stored last
--
-- >>> Data.ByteString.Lazy.unpack (toLazyByteString (floatLE 4.2))
-- [102,102,134,64]
floatLE :: Float -> Builder
floatLE f = Builder (Sum 4) (BB.floatLE f)

-- | Convert a `Word64` to a `Builder` by storing the bytes in big-endian order
--
-- In other words, the most significant byte is stored first and the least
-- significant byte is stored last
--
-- >>> Data.ByteString.Lazy.unpack (toLazyByteString (word64BE 42))
-- [0,0,0,0,0,0,0,42]
word64BE :: Word64 -> Builder
word64BE w = Builder (Sum 8) (BB.word64BE w)

-- | Convert a `Word64` to a `Builder` by storing the bytes in little-endian
-- order
--
-- In other words, the least significant byte is stored first and the most
-- significant byte is stored last
--
-- >>> Data.ByteString.Lazy.unpack (toLazyByteString (word64LE 42))
-- [42,0,0,0,0,0,0,0]
word64LE :: Word64 -> Builder
word64LE w = Builder (Sum 8) (BB.word64LE w)

-- | Convert a `Word64` to a `Builder` using this variable-length encoding:
--
--   1. Convert the given value to a base 128 representation
--   without unnecessary digits (that is, omit zero digits
--   unless they are less significant than nonzero digits).
--
--   2. Present those base-128 digits in order of increasing
--   significance (that is, in little-endian order).
--
--   3. Add 128 to every digit except the most significant digit,
--   yielding a sequence of octets terminated by one that is <= 127.
--
-- This encoding is used in the wire format of Protocol Buffers version 3.
word64Base128LEVar :: Word64 -> Builder
{-
Prelude Data.Bits Numeric> map (("0x"++) .($"").showHex) $ map bit $ take 11 [0,7..]
["0x1","0x80","0x4000","0x200000","0x10000000","0x800000000","0x40000000000","0x2000000000000","0x100000000000000","0x8000000000000000","0x400000000000000000"]
-}
word64Base128LEVar i
    | i < 0x80         = word8 (fromIntegral i)
    | i < 0x4000       = Builder (Sum 2) (BB.word8 (fromIntegral i .|. 0x80) <>
                                          BB.word8 (fromIntegral (i `shiftR` 7)))
    | i < 0x200000     = Builder (Sum 3) (BB.word8 (fromIntegral i .|. 0x80) <>
                                          BB.word8 (fromIntegral (i `shiftR` 7) .|. 0x80) <>
                                          BB.word8 (fromIntegral (i `shiftR` 14)))
    | i < 0x10000000   = Builder (Sum 4) (BB.word8 (fromIntegral i .|. 0x80) <>
                                          BB.word8 (fromIntegral (i `shiftR` 7) .|. 0x80) <>
                                          BB.word8 (fromIntegral (i `shiftR` 14) .|. 0x80) <>
                                          BB.word8 (fromIntegral (i `shiftR` 21)))
    | i < 0x800000000  = Builder (Sum 5) (BB.word8 (fromIntegral i .|. 0x80) <>
                                          BB.word8 (fromIntegral (i `shiftR` 7) .|. 0x80) <>
                                          BB.word8 (fromIntegral (i `shiftR` 14) .|. 0x80) <>
                                          BB.word8 (fromIntegral (i `shiftR` 21) .|. 0x80) <>
                                          BB.word8 (fromIntegral (i `shiftR` 28)))
    | i < 0x40000000000      = Builder (Sum 6) (BB.word8 (fromIntegral i .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 7) .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 14) .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 21) .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 28) .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 35)))
    | i < 0x2000000000000    = Builder (Sum 7) (BB.word8 (fromIntegral i .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 7) .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 14) .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 21) .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 28) .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 35) .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 42)))
    | i < 0x100000000000000  = Builder (Sum 8) (BB.word8 (fromIntegral i .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 7) .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 14) .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 21) .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 28) .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 35) .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 42) .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 49)))
    | i < 0x8000000000000000 = Builder (Sum 9) (BB.word8 (fromIntegral i .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 7) .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 14) .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 21) .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 28) .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 35) .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 42) .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 49) .|. 0x80) <>
                                                BB.word8 (fromIntegral (i `shiftR` 56)))
    | otherwise              = Builder (Sum 10) (BB.word8 (fromIntegral i .|. 0x80) <>
                                                 BB.word8 (fromIntegral (i `shiftR` 7) .|. 0x80) <>
                                                 BB.word8 (fromIntegral (i `shiftR` 14) .|. 0x80) <>
                                                 BB.word8 (fromIntegral (i `shiftR` 21) .|. 0x80) <>
                                                 BB.word8 (fromIntegral (i `shiftR` 28) .|. 0x80) <>
                                                 BB.word8 (fromIntegral (i `shiftR` 35) .|. 0x80) <>
                                                 BB.word8 (fromIntegral (i `shiftR` 42) .|. 0x80) <>
                                                 BB.word8 (fromIntegral (i `shiftR` 49) .|. 0x80) <>
                                                 BB.word8 (fromIntegral (i `shiftR` 56) .|. 0x80) <>
                                                 BB.word8 (fromIntegral (i `shiftR` 63)))


-- | Convert an `Int64` to a `Builder` by storing the bytes in big-endian order
--
-- In other words, the most significant byte is stored first and the least
-- significant byte is stored last
--
-- >>> Data.ByteString.Lazy.unpack (toLazyByteString (int64BE (-5)))
-- [255,255,255,255,255,255,255,251]
int64BE :: Int64 -> Builder
int64BE w = Builder (Sum 8) (BB.int64BE w)

-- | Convert an `Int64` to a `Builder` by storing the bytes in little-endian
-- order
--
-- In other words, the least significant byte is stored first and the most
-- significant byte is stored last
--
-- >>> Data.ByteString.Lazy.unpack (toLazyByteString (int64LE (-5)))
-- [251,255,255,255,255,255,255,255]
int64LE :: Int64 -> Builder
int64LE w = Builder (Sum 8) (BB.int64LE w)

-- | Convert a `Double` to a `Builder` by storing the bytes in IEEE-754 format
-- in big-endian order
--
-- In other words, the most significant byte is stored first and the least
-- significant byte is stored last
--
-- >>> Data.ByteString.Lazy.unpack (toLazyByteString (doubleBE 4.2))
-- [64,16,204,204,204,204,204,205]
doubleBE :: Double -> Builder
doubleBE f = Builder (Sum 8) (BB.doubleBE f)

-- | Convert a `Double` to a `Builder` by storing the bytes in IEEE-754 format
-- in little-endian order
--
-- In other words, the least significant byte is stored first and the most
-- significant byte is stored last
--
-- >>> Data.ByteString.Lazy.unpack (toLazyByteString (doubleLE 4.2))
-- [205,204,204,204,204,204,16,64]
doubleLE :: Double -> Builder
doubleLE f = Builder (Sum 8) (BB.doubleLE f)

-- | Convert an @ASCII@ `Char` to a `Builder`
--
-- __Careful:__ If you provide a Unicode character that is not part of the
-- @ASCII@ alphabet this will only encode the lowest 7 bits
--
-- >>> char7 ';'
-- Proto3.Wire.Builder.lazyByteString ";"
-- >>> char7 'λ' -- Example of truncation
-- Proto3.Wire.Builder.lazyByteString ";"
char7 :: Char -> Builder
char7 c = Builder (Sum 1) (BB.char7 c)

-- | Convert an @ASCII@ `String` to a `Builder`
--
-- __Careful:__ If you provide a Unicode `String` that has non-@ASCII@
-- characters then this will only encode the lowest 7 bits of each character
--
-- > string7 (x <> y) = string7 x <> string7 y
-- >
-- > string7 mempty = mempty
--
-- >>> string7 "ABC"
-- Proto3.Wire.Builder.lazyByteString "ABC"
-- >>> string7 "←↑→↓" -- Example of truncation
-- Proto3.Wire.Builder.lazyByteString "\DLE\DC1\DC2\DC3"
string7 :: String -> Builder
string7 s = Builder (Sum (fromIntegral (length s))) (BB.string7 s)

-- | Convert an @ISO/IEC 8859-1@ `Char` to a `Builder`
--
-- __Careful:__ If you provide a Unicode character that is not part of the
-- @ISO/IEC 8859-1@ alphabet then this will only encode the lowest 8 bits
--
-- >>> char8 ';'
-- Proto3.Wire.Builder.lazyByteString ";"
-- >>> char8 'λ' -- Example of truncation
-- Proto3.Wire.Builder.lazyByteString "\187"
char8 :: Char -> Builder
char8 c = Builder (Sum 1) (BB.char8 c)

-- | Convert an @ISO/IEC 8859-1@ `String` to a `Builder`
--
-- __Careful:__ If you provide a Unicode `String` that has non-@ISO/IEC 8859-1@
-- characters then this will only encode the lowest 8 bits of each character
--
-- > string8 (x <> y) = string8 x <> string8 y
-- >
-- > string8 mempty = mempty
--
-- >>> string8 "ABC"
-- Proto3.Wire.Builder.lazyByteString "ABC"
-- >>> string8 "←↑→↓" -- Example of truncation
-- Proto3.Wire.Builder.lazyByteString "\144\145\146\147"
string8 :: String -> Builder
string8 s = Builder (Sum (fromIntegral (length s))) (BB.string8 s)

-- | Convert a Unicode `Char` to a `Builder` using a @UTF-8@ encoding
--
-- >>> charUtf8 'A'
-- Proto3.Wire.Builder.lazyByteString "A"
-- >>> charUtf8 'λ'
-- Proto3.Wire.Builder.lazyByteString "\206\187"
-- >>> hPutBuilder System.IO.stdout (charUtf8 'λ' <> charUtf8 '\n')
-- λ
charUtf8 :: Char -> Builder
charUtf8 c = Builder (Sum (utf8Width c)) (BB.charUtf8 c)

-- | Convert a Unicode `String` to a `Builder` using a @UTF-8@ encoding
--
-- > stringUtf8 (x <> y) = stringUtf8 x <> stringUtf8 y
-- >
-- > stringUtf8 mempty = mempty
--
-- >>> stringUtf8 "ABC"
-- Proto3.Wire.Builder.lazyByteString "ABC"
-- >>> stringUtf8 "←↑→↓"
-- Proto3.Wire.Builder.lazyByteString "\226\134\144\226\134\145\226\134\146\226\134\147"
-- >>> hPutBuilder System.IO.stdout (stringUtf8 "←↑→↓\n")
-- ←↑→↓
stringUtf8 :: String -> Builder
stringUtf8 s = Builder (Sum (len 0 s)) (BB.stringUtf8 s)
  where
    len !n []      = n
    len !n (h : t) = len (n + utf8Width h) t
{-# INLINABLE stringUtf8 #-}
  -- INLINABLE so that if the input is constant, the
  -- compiler has the opportunity to precompute its length.

utf8Width :: Char -> Word
utf8Width c = case ord c of
  o | o <= 0x007F -> 1
    | o <= 0x07FF -> 2
    | o <= 0xFFFF -> 3
    | otherwise   -> 4
{-# INLINE utf8Width #-}