module Z.Data.Builder.Base
(
AllocateStrategy(..)
, Buffer(..)
, BuildStep
, Builder(..)
, append
, buildBytes
, buildBytesWith
, buildBytesList
, buildBytesListWith
, buildAndRun
, buildAndRunWith
, bytes
, ensureN
, atMost
, writeN
, doubleBuffer
, insertChunk
, oneShotAction
, encodePrim
, encodePrimLE
, encodePrimBE
, stringModifiedUTF8, charModifiedUTF8, stringUTF8, charUTF8, string7, char7, string8, char8, text
, paren, curly, square, angle, quotes, squotes, colon, comma, intercalateVec, intercalateList
) where
import Control.Monad
import Control.Monad.Primitive
import Control.Monad.ST
import Control.Monad.ST.Unsafe (unsafeInterleaveST)
import Data.Bits (shiftL, shiftR, (.&.))
import Data.Primitive.Ptr (copyPtrToMutablePrimArray)
import Data.Word
import Data.Int
import GHC.CString (unpackCString#, unpackCStringUtf8#)
import GHC.Exts
import qualified Z.Data.Array as A
import Z.Data.Array.Unaligned
import qualified Z.Data.Text.Base as T
import qualified Z.Data.Text.UTF8Codec as T
import qualified Z.Data.Vector.Base as V
import qualified Z.Data.Vector as V
import System.IO.Unsafe
import Test.QuickCheck.Arbitrary (Arbitrary(..), CoArbitrary(..))
data AllocateStrategy s
= DoubleBuffer
| InsertChunk {-# UNPACK #-} !Int
| OneShotAction (V.Bytes -> ST s ())
data Buffer s = Buffer {-# UNPACK #-} !(A.MutablePrimArray s Word8)
{-# UNPACK #-} !Int
type BuildStep s = Buffer s -> ST s [V.Bytes]
newtype Builder a = Builder
{ runBuilder :: forall s. AllocateStrategy s -> (a -> BuildStep s) -> BuildStep s}
instance Show (Builder a) where
show = show . buildBytes
instance Functor Builder where
{-# INLINE fmap #-}
fmap f (Builder b) = Builder (\ al k -> b al (k . f))
{-# INLINE (<$) #-}
a <$ (Builder b) = Builder (\ al k -> b al (\ _ -> k a))
instance Applicative Builder where
{-# INLINE pure #-}
pure x = Builder (\ _ k -> k x)
{-# INLINE (<*>) #-}
(Builder f) <*> (Builder b) = Builder (\ al k -> f al ( \ ab -> b al (k . ab)))
{-# INLINE (*>) #-}
(*>) = append
instance Monad Builder where
{-# INLINE (>>=) #-}
(Builder b) >>= f = Builder (\ al k -> b al ( \ a -> runBuilder (f a) al k))
{-# INLINE (>>) #-}
(>>) = append
instance Semigroup (Builder ()) where
(<>) = append
{-# INLINE (<>) #-}
instance Monoid (Builder ()) where
mempty = pure ()
{-# INLINE mempty #-}
mappend = append
{-# INLINE mappend #-}
mconcat = foldr append (pure ())
{-# INLINE mconcat #-}
instance (a ~ ()) => IsString (Builder a) where
{-# INLINE fromString #-}
fromString = stringModifiedUTF8
instance Arbitrary (Builder ()) where
arbitrary = bytes <$> arbitrary
shrink b = (bytes . V.pack) <$> shrink (V.unpack (buildBytes b))
instance CoArbitrary (Builder ()) where
coarbitrary = coarbitrary . buildBytes
stringModifiedUTF8 :: String -> Builder ()
{-# INLINE CONLIKE [0] stringModifiedUTF8 #-}
{-# RULES
"stringModifiedUTF8/packAddrModified" forall addr . stringModifiedUTF8 (unpackCString# addr) = packAddrModified addr
#-}
{-# RULES
"stringModifiedUTF8/packAddrModified" forall addr . stringModifiedUTF8 (unpackCStringUtf8# addr) = packAddrModified addr
#-}
stringModifiedUTF8 = mapM_ charModifiedUTF8
charModifiedUTF8 :: Char -> Builder ()
{-# INLINE charModifiedUTF8 #-}
charModifiedUTF8 chr = do
ensureN 4
Builder (\ _ k (Buffer mba i) -> do
i' <- T.encodeCharModifiedUTF8 mba i chr
k () (Buffer mba i'))
packAddrModified :: Addr# -> Builder ()
packAddrModified addr0# = copy addr0#
where
len = fromIntegral . unsafeDupablePerformIO $ V.c_strlen addr0#
copy addr# = do
ensureN len
Builder (\ _ k (Buffer mba i) -> do
copyPtrToMutablePrimArray mba i (Ptr addr#) len
k () (Buffer mba (i + len)))
append :: Builder a -> Builder b -> Builder b
{-# INLINE append #-}
append (Builder f) (Builder g) = Builder (\ al k -> f al ( \ _ -> g al k))
bytes :: V.Bytes -> Builder ()
{-# INLINE bytes #-}
bytes bs@(V.PrimVector arr s l) = Builder (\ strategy k buffer@(Buffer buf offset) ->
case strategy of
DoubleBuffer -> copy strategy k buffer
InsertChunk chunkSiz
| l <= chunkSiz `shiftR` 1 ->
copy strategy k buffer
| offset /= 0 ->
insertChunk chunkSiz 0 (\ buffer' -> (bs:) `fmap` k () buffer') buffer
| otherwise -> (bs:) `fmap` k () buffer
OneShotAction action -> do
chunkSiz <- A.sizeofMutableArr buf
case () of
_
| l <= chunkSiz `shiftR` 1 ->
copy strategy k buffer
| offset /= 0 ->
oneShotAction action 0 (\ buffer' -> action bs >> k () buffer') buffer
| otherwise -> action bs >> k () buffer)
where
copy :: forall s. AllocateStrategy s -> (() -> BuildStep s) -> BuildStep s
copy strategy k =
runBuilder (ensureN l) strategy ( \ _ (Buffer buf offset) -> do
A.copyArr buf offset arr s l
k () (Buffer buf (offset+l)))
{-# INLINE copy #-}
ensureN :: Int -> Builder ()
{-# INLINE ensureN #-}
ensureN !n = Builder $ \ strategy k buffer@(Buffer buf offset) -> do
siz <- A.sizeofMutableArr buf
if siz - offset >= n
then k () buffer
else handleBoundary strategy n k buffer
where
{-# NOINLINE handleBoundary #-}
handleBoundary DoubleBuffer n' k buffer = doubleBuffer n' (k ()) buffer
handleBoundary (InsertChunk chunkSiz) n' k buffer = insertChunk chunkSiz n' (k ()) buffer
handleBoundary (OneShotAction action) n' k buffer = oneShotAction action n' (k ()) buffer
doubleBuffer :: Int -> BuildStep s -> BuildStep s
doubleBuffer !wantSiz k (Buffer buf offset) = do
!siz <- A.sizeofMutableArr buf
let !siz' = max (offset + wantSiz `shiftL` 1)
(siz `shiftL` 1)
buf' <- A.resizeMutableArr buf siz'
k (Buffer buf' offset)
{-# INLINE doubleBuffer #-}
insertChunk :: Int -> Int -> BuildStep s -> BuildStep s
{-# INLINE insertChunk #-}
insertChunk !chunkSiz !wantSiz k (Buffer buf offset) = do
!siz <- A.sizeofMutableArr buf
case () of
_
| offset /= 0 -> do
when (offset < siz)
(A.shrinkMutableArr buf offset)
arr <- A.unsafeFreezeArr buf
buf' <- A.newArr (max wantSiz chunkSiz)
xs <- unsafeInterleaveST (k (Buffer buf' 0))
let v = V.fromArr arr 0 offset
v `seq` pure (v : xs)
| wantSiz <= siz -> k (Buffer buf 0)
| otherwise -> do
buf' <- A.newArr (max wantSiz chunkSiz)
k (Buffer buf' 0)
oneShotAction :: (V.Bytes -> ST s ()) -> Int -> BuildStep s -> BuildStep s
{-# INLINE oneShotAction #-}
oneShotAction action !wantSiz k (Buffer buf offset) = do
!siz <- A.sizeofMutableArr buf
case () of
_
| offset /= 0 -> do
arr <- A.unsafeFreezeArr buf
action (V.PrimVector arr 0 offset)
if wantSiz <= siz
then k (Buffer buf 0)
else do
buf' <- A.newArr wantSiz
k (Buffer buf' 0)
| wantSiz <= siz -> k (Buffer buf 0)
| otherwise -> do
buf' <- A.newArr wantSiz
k (Buffer buf' 0 )
buildBytes :: Builder a -> V.Bytes
{-# INLINE buildBytes #-}
buildBytes = buildBytesWith V.defaultInitSize
buildBytesWith :: Int -> Builder a -> V.Bytes
{-# INLINABLE buildBytesWith #-}
buildBytesWith initSiz (Builder b) = runST $ do
buf <- A.newArr initSiz
[bs] <- b DoubleBuffer lastStep (Buffer buf 0 )
pure bs
where
lastStep _ (Buffer buf offset) = do
siz <- A.sizeofMutableArr buf
when (offset < siz) (A.shrinkMutableArr buf offset)
arr <- A.unsafeFreezeArr buf
pure [V.PrimVector arr 0 offset]
buildBytesList :: Builder a -> [V.Bytes]
{-# INLINE buildBytesList #-}
buildBytesList = buildBytesListWith V.smallChunkSize V.defaultChunkSize
buildBytesListWith :: Int -> Int -> Builder a -> [V.Bytes]
{-# INLINABLE buildBytesListWith #-}
buildBytesListWith initSiz chunkSiz (Builder b) = runST $ do
buf <- A.newArr initSiz
b (InsertChunk chunkSiz) lastStep (Buffer buf 0)
where
lastStep _ (Buffer buf offset) = do
arr <- A.unsafeFreezeArr buf
pure [V.PrimVector arr 0 offset]
buildAndRun :: (V.Bytes -> IO ()) -> Builder a -> IO ()
buildAndRun = buildAndRunWith V.defaultChunkSize
buildAndRunWith :: Int -> (V.Bytes -> IO ()) -> Builder a -> IO ()
buildAndRunWith chunkSiz action (Builder b) = do
buf <- A.newArr chunkSiz
_ <- stToIO (b (OneShotAction (\ bs -> ioToPrim (action bs))) lastStep (Buffer buf 0))
pure ()
where
lastStep :: a -> BuildStep RealWorld
lastStep _ (Buffer buf offset) = do
arr <- A.unsafeFreezeArr buf
ioToPrim (action (V.PrimVector arr 0 offset))
pure []
{-# INLINABLE buildAndRun #-}
atMost :: Int
-> (forall s. A.MutablePrimArray s Word8 -> Int -> ST s Int)
-> Builder ()
{-# INLINE atMost #-}
atMost n f = ensureN n `append`
Builder (\ _ k (Buffer buf offset ) ->
f buf offset >>= \ offset' -> k () (Buffer buf offset'))
writeN :: Int
-> (forall s. A.MutablePrimArray s Word8 -> Int -> ST s ())
-> Builder ()
{-# INLINE writeN #-}
writeN n f = ensureN n `append`
Builder (\ _ k (Buffer buf offset ) ->
f buf offset >> k () (Buffer buf (offset+n)))
encodePrim :: forall a. Unaligned a => a -> Builder ()
{-# INLINE encodePrim #-}
{-# SPECIALIZE INLINE encodePrim :: Word -> Builder () #-}
{-# SPECIALIZE INLINE encodePrim :: Word64 -> Builder () #-}
{-# SPECIALIZE INLINE encodePrim :: Word32 -> Builder () #-}
{-# SPECIALIZE INLINE encodePrim :: Word16 -> Builder () #-}
{-# SPECIALIZE INLINE encodePrim :: Word8 -> Builder () #-}
{-# SPECIALIZE INLINE encodePrim :: Int -> Builder () #-}
{-# SPECIALIZE INLINE encodePrim :: Int64 -> Builder () #-}
{-# SPECIALIZE INLINE encodePrim :: Int32 -> Builder () #-}
{-# SPECIALIZE INLINE encodePrim :: Int16 -> Builder () #-}
{-# SPECIALIZE INLINE encodePrim :: Int8 -> Builder () #-}
encodePrim x = do
ensureN n
Builder (\ _ k (Buffer mpa i) -> do
writePrimWord8ArrayAs mpa i x
k () (Buffer mpa (i + n)))
where
n = unalignedSize (undefined :: a)
encodePrimLE :: forall a. Unaligned (LE a) => a -> Builder ()
{-# INLINE encodePrimLE #-}
{-# SPECIALIZE INLINE encodePrimLE :: Word -> Builder () #-}
{-# SPECIALIZE INLINE encodePrimLE :: Word64 -> Builder () #-}
{-# SPECIALIZE INLINE encodePrimLE :: Word32 -> Builder () #-}
{-# SPECIALIZE INLINE encodePrimLE :: Word16 -> Builder () #-}
{-# SPECIALIZE INLINE encodePrimLE :: Int -> Builder () #-}
{-# SPECIALIZE INLINE encodePrimLE :: Int64 -> Builder () #-}
{-# SPECIALIZE INLINE encodePrimLE :: Int32 -> Builder () #-}
{-# SPECIALIZE INLINE encodePrimLE :: Int16 -> Builder () #-}
encodePrimLE = encodePrim . LE
encodePrimBE :: forall a. Unaligned (BE a) => a -> Builder ()
{-# INLINE encodePrimBE #-}
{-# SPECIALIZE INLINE encodePrimBE :: Word -> Builder () #-}
{-# SPECIALIZE INLINE encodePrimBE :: Word64 -> Builder () #-}
{-# SPECIALIZE INLINE encodePrimBE :: Word32 -> Builder () #-}
{-# SPECIALIZE INLINE encodePrimBE :: Word16 -> Builder () #-}
{-# SPECIALIZE INLINE encodePrimBE :: Int -> Builder () #-}
{-# SPECIALIZE INLINE encodePrimBE :: Int64 -> Builder () #-}
{-# SPECIALIZE INLINE encodePrimBE :: Int32 -> Builder () #-}
{-# SPECIALIZE INLINE encodePrimBE :: Int16 -> Builder () #-}
encodePrimBE = encodePrim . BE
stringUTF8 :: String -> Builder ()
{-# INLINE CONLIKE [0] stringUTF8 #-}
{-# RULES
"stringUTF8/packASCIIAddr" forall addr . stringUTF8 (unpackCString# addr) = packASCIIAddr addr
#-}
{-# RULES
"stringUTF8/packUTF8Addr" forall addr . stringUTF8 (unpackCString# addr) = packUTF8Addr addr
#-}
stringUTF8 = mapM_ charUTF8
packASCIIAddr :: Addr# -> Builder ()
packASCIIAddr addr0# = copy addr0#
where
len = fromIntegral . unsafeDupablePerformIO $ V.c_strlen addr0#
copy addr# = do
ensureN len
Builder (\ _ k (Buffer mba i) -> do
copyPtrToMutablePrimArray mba i (Ptr addr#) len
k () (Buffer mba (i + len)))
packUTF8Addr :: Addr# -> Builder ()
packUTF8Addr addr0# = validateAndCopy addr0#
where
len = fromIntegral . unsafeDupablePerformIO $ V.c_strlen addr0#
valid = unsafeDupablePerformIO $ T.c_utf8_validate_addr addr0# len
validateAndCopy addr#
| valid == 0 = mapM_ charUTF8 (unpackCString# addr#)
| otherwise = do
ensureN len
Builder (\ _ k (Buffer mba i) -> do
copyPtrToMutablePrimArray mba i (Ptr addr#) len
k () (Buffer mba (i + len)))
charUTF8 :: Char -> Builder ()
{-# INLINE charUTF8 #-}
charUTF8 chr = do
ensureN 4
Builder (\ _ k (Buffer mba i) -> do
i' <- T.encodeChar mba i chr
k () (Buffer mba i'))
string7 :: String -> Builder ()
{-# INLINE string7 #-}
string7 = mapM_ char7
char7 :: Char -> Builder ()
{-# INLINE char7 #-}
char7 chr = do
ensureN 1
Builder (\ _ k (Buffer mpa i) -> do
let x = V.c2w chr .&. 0x7F
writePrimWord8ArrayAs mpa i x
k () (Buffer mpa (i+1)))
string8 :: String -> Builder ()
{-# INLINE string8 #-}
string8 = mapM_ char8
char8 :: Char -> Builder ()
{-# INLINE char8 #-}
char8 chr = do
ensureN 1
Builder (\ _ k (Buffer mpa i) -> do
let x = V.c2w chr
writePrimWord8ArrayAs mpa i x
k () (Buffer mpa (i+1)))
text :: T.Text -> Builder ()
{-# INLINE text #-}
text (T.Text bs) = bytes bs
#define BACKSLASH 92
#define CLOSE_ANGLE 62
#define CLOSE_CURLY 125
#define CLOSE_PAREN 41
#define CLOSE_SQUARE 93
#define COMMA 44
#define COLON 58
#define DOUBLE_QUOTE 34
#define OPEN_ANGLE 60
#define OPEN_CURLY 123
#define OPEN_PAREN 40
#define OPEN_SQUARE 91
#define SINGLE_QUOTE 39
paren :: Builder () -> Builder ()
{-# INLINE paren #-}
paren b = encodePrim @Word8 OPEN_PAREN >> b >> encodePrim @Word8 CLOSE_PAREN
curly :: Builder () -> Builder ()
{-# INLINE curly #-}
curly b = encodePrim @Word8 OPEN_CURLY >> b >> encodePrim @Word8 CLOSE_CURLY
square :: Builder () -> Builder ()
{-# INLINE square #-}
square b = encodePrim @Word8 OPEN_SQUARE >> b >> encodePrim @Word8 CLOSE_SQUARE
angle :: Builder () -> Builder ()
{-# INLINE angle #-}
angle b = encodePrim @Word8 OPEN_ANGLE >> b >> encodePrim @Word8 CLOSE_ANGLE
quotes :: Builder () -> Builder ()
{-# INLINE quotes #-}
quotes b = encodePrim @Word8 DOUBLE_QUOTE >> b >> encodePrim @Word8 DOUBLE_QUOTE
squotes :: Builder () -> Builder ()
{-# INLINE squotes #-}
squotes b = encodePrim @Word8 SINGLE_QUOTE >> b >> encodePrim @Word8 SINGLE_QUOTE
colon :: Builder ()
{-# INLINE colon #-}
colon = encodePrim @Word8 COLON
comma :: Builder ()
{-# INLINE comma #-}
comma = encodePrim @Word8 COMMA
intercalateVec :: (V.Vec v a)
=> Builder ()
-> (a -> Builder ())
-> v a
-> Builder ()
{-# INLINE intercalateVec #-}
intercalateVec s f v = do
V.traverseVec_ (\ x -> f x >> s) (V.initMayEmpty v)
forM_ (V.lastMaybe v) f
intercalateList :: Builder ()
-> (a -> Builder ())
-> [a]
-> Builder ()
{-# INLINE intercalateList #-}
intercalateList s f xs = go xs
where
go [] = pure ()
go [x] = f x
go (x:xs') = f x >> s >> go xs'