{-# language BangPatterns #-}
{-# language BlockArguments #-}
{-# language MagicHash #-}
{-# language NamedFieldPuns #-}
{-# language TypeApplications #-}
{-# language UnboxedTuples #-}
module Data.Bytes
(
Bytes
, empty
, null
, length
, uncons
, unsnoc
, singleton
, doubleton
, tripleton
, replicate
, singletonU
, doubletonU
, tripletonU
, replicateU
, takeWhile
, dropWhile
, takeWhileEnd
, dropWhileEnd
, foldl
, foldl'
, foldr
, foldr'
, ifoldl'
, elem
, Byte.split
, Byte.splitU
, Byte.splitInit
, Byte.splitInitU
, Byte.splitNonEmpty
, split1
, split2
, split3
, Byte.count
, isPrefixOf
, isSuffixOf
, stripPrefix
, stripOptionalPrefix
, stripSuffix
, stripOptionalSuffix
, longestCommonPrefix
, isBytePrefixOf
, isByteSuffixOf
, equalsLatin1
, equalsLatin2
, equalsLatin3
, equalsLatin4
, equalsLatin5
, equalsLatin6
, equalsLatin7
, unsafeTake
, unsafeDrop
, unsafeIndex
, unsafeCopy
, pin
, contents
, touch
, toByteArray
, toByteArrayClone
, fromAsciiString
, fromLatinString
, fromByteArray
, toLatinString
, hGet
, hPut
) where
import Prelude hiding (length,takeWhile,dropWhile,null,foldl,foldr,elem,replicate)
import Control.Monad.Primitive (PrimMonad,PrimState,primitive_,unsafeIOToPrim)
import Control.Monad.ST.Run (runByteArrayST)
import Data.Bytes.Types (Bytes(Bytes,array,offset))
import Data.Char (ord)
import Data.Primitive (ByteArray(ByteArray),MutableByteArray)
import Foreign.Ptr (Ptr,plusPtr)
import GHC.Exts (Int(I#),Char(C#),word2Int#,chr#)
import GHC.Exts (Word#,Int#)
import GHC.IO (IO(IO))
import GHC.Word (Word8(W8#))
import System.IO (Handle)
import qualified Data.Bytes.Byte as Byte
import qualified Data.Primitive as PM
import qualified GHC.Exts as Exts
import qualified System.IO as IO
null :: Bytes -> Bool
null (Bytes _ _ len) = len == 0
length :: Bytes -> Int
length (Bytes _ _ len) = len
uncons :: Bytes -> Maybe (Word8, Bytes)
uncons b = case length b of
0 -> Nothing
_ -> Just (unsafeIndex b 0, unsafeDrop 1 b)
unsnoc :: Bytes -> Maybe (Bytes, Word8)
unsnoc b@(Bytes arr off len) = case len of
0 -> Nothing
_ -> let !len' = len - 1 in
Just (Bytes arr off len', unsafeIndex b len')
isBytePrefixOf :: Word8 -> Bytes -> Bool
isBytePrefixOf w b = case length b of
0 -> False
_ -> unsafeIndex b 0 == w
isByteSuffixOf :: Word8 -> Bytes -> Bool
isByteSuffixOf w b = case len of
0 -> False
_ -> unsafeIndex b (len - 1) == w
where
len = length b
isPrefixOf :: Bytes -> Bytes -> Bool
isPrefixOf (Bytes a aOff aLen) (Bytes b bOff bLen) =
if aLen <= bLen
then compareByteArrays a aOff b bOff aLen == EQ
else False
isSuffixOf :: Bytes -> Bytes -> Bool
isSuffixOf (Bytes a aOff aLen) (Bytes b bOff bLen) =
if aLen <= bLen
then compareByteArrays a aOff b (bOff + bLen - aLen) aLen == EQ
else False
longestCommonPrefix :: Bytes -> Bytes -> Bytes
longestCommonPrefix a b = loop 0
where
loop :: Int -> Bytes
loop !into
| into < maxLen
&& unsafeIndex a into == unsafeIndex b into
= loop (into + 1)
| otherwise = unsafeTake into a
maxLen = min (length a) (length b)
singleton :: Word8 -> Bytes
singleton !a = Bytes (singletonU a) 0 1
doubleton :: Word8 -> Word8 -> Bytes
doubleton !a !b = Bytes (doubletonU a b) 0 2
tripleton :: Word8 -> Word8 -> Word8 -> Bytes
tripleton !a !b !c = Bytes (tripletonU a b c) 0 3
singletonU :: Word8 -> ByteArray
singletonU !a = runByteArrayST do
arr <- PM.newByteArray 1
PM.writeByteArray arr 0 a
PM.unsafeFreezeByteArray arr
doubletonU :: Word8 -> Word8 -> ByteArray
doubletonU !a !b = runByteArrayST do
arr <- PM.newByteArray 2
PM.writeByteArray arr 0 a
PM.writeByteArray arr 1 b
PM.unsafeFreezeByteArray arr
tripletonU :: Word8 -> Word8 -> Word8 -> ByteArray
tripletonU !a !b !c = runByteArrayST do
arr <- PM.newByteArray 3
PM.writeByteArray arr 0 a
PM.writeByteArray arr 1 b
PM.writeByteArray arr 2 c
PM.unsafeFreezeByteArray arr
replicate ::
Int
-> Word8
-> Bytes
replicate !n !w = Bytes (replicateU n w) 0 n
replicateU :: Int -> Word8 -> ByteArray
replicateU !n !w = runByteArrayST do
arr <- PM.newByteArray n
PM.setByteArray arr 0 n w
PM.unsafeFreezeByteArray arr
stripPrefix :: Bytes -> Bytes -> Maybe Bytes
stripPrefix !pre !str = if pre `isPrefixOf` str
then Just (Bytes (array str) (offset str + length pre) (length str - length pre))
else Nothing
stripOptionalPrefix :: Bytes -> Bytes -> Bytes
stripOptionalPrefix !pre !str = if pre `isPrefixOf` str
then Bytes (array str) (offset str + length pre) (length str - length pre)
else str
stripSuffix :: Bytes -> Bytes -> Maybe Bytes
stripSuffix !suf !str = if suf `isSuffixOf` str
then Just (Bytes (array str) (offset str) (length str - length suf))
else Nothing
stripOptionalSuffix :: Bytes -> Bytes -> Bytes
stripOptionalSuffix !suf !str = if suf `isSuffixOf` str
then Bytes (array str) (offset str) (length str - length suf)
else str
split1 :: Word8 -> Bytes -> Maybe (Bytes,Bytes)
{-# inline split1 #-}
split1 w b@(Bytes arr off len) = case elemIndexLoop# w b of
(-1#) -> Nothing
i# -> let i = I# i# in
Just (Bytes arr off (i - off), Bytes arr (i + 1) (len - (1 + i - off)))
split2 :: Word8 -> Bytes -> Maybe (Bytes,Bytes,Bytes)
{-# inline split2 #-}
split2 w b@(Bytes arr off len) = case elemIndexLoop# w b of
(-1#) -> Nothing
i# -> let i = I# i# in
case elemIndexLoop# w (Bytes arr (i + 1) (len - (1 + i - off))) of
(-1#) -> Nothing
j# -> let j = I# j# in Just
( Bytes arr off (i - off)
, Bytes arr (i + 1) (j - (i + 1))
, Bytes arr (j + 1) (len - (1 + j - off))
)
split3 :: Word8 -> Bytes -> Maybe (Bytes,Bytes,Bytes,Bytes)
{-# inline split3 #-}
split3 w b@(Bytes arr off len) = case elemIndexLoop# w b of
(-1#) -> Nothing
i# -> let i = I# i# in
case elemIndexLoop# w (Bytes arr (i + 1) (len - (1 + i - off))) of
(-1#) -> Nothing
j# -> let j = I# j# in
case elemIndexLoop# w (Bytes arr (j + 1) (len - (1 + j - off))) of
(-1#) -> Nothing
k# -> let k = I# k# in Just
( Bytes arr off (i - off)
, Bytes arr (i + 1) (j - (i + 1))
, Bytes arr (j + 1) (k - (j + 1))
, Bytes arr (k + 1) (len - (1 + k - off))
)
elemIndexLoop# :: Word8 -> Bytes -> Int#
elemIndexLoop# !w (Bytes arr off@(I# off# ) len) = case len of
0 -> (-1#)
_ -> if PM.indexByteArray arr off == w
then off#
else elemIndexLoop# w (Bytes arr (off + 1) (len - 1))
elem :: Word8 -> Bytes -> Bool
elem (W8# w) b = case elemLoop 0# w b of
1# -> True
_ -> False
elemLoop :: Int# -> Word# -> Bytes -> Int#
elemLoop !r !w (Bytes arr@(ByteArray arr# ) off@(I# off# ) len) = case len of
0 -> r
_ -> elemLoop (Exts.orI# r (Exts.eqWord# w (Exts.indexWord8Array# arr# off# ) )) w (Bytes arr (off + 1) (len - 1))
takeWhile :: (Word8 -> Bool) -> Bytes -> Bytes
{-# inline takeWhile #-}
takeWhile k b = unsafeTake (countWhile k b) b
dropWhile :: (Word8 -> Bool) -> Bytes -> Bytes
{-# inline dropWhile #-}
dropWhile k b = unsafeDrop (countWhile k b) b
unsafeIndex :: Bytes -> Int -> Word8
unsafeIndex (Bytes arr off _) ix = PM.indexByteArray arr (off + ix)
dropWhileEnd :: (Word8 -> Bool) -> Bytes -> Bytes
{-# inline dropWhileEnd #-}
dropWhileEnd k !b = unsafeTake (length b - countWhileEnd k b) b
takeWhileEnd :: (Word8 -> Bool) -> Bytes -> Bytes
{-# inline takeWhileEnd #-}
takeWhileEnd k !b =
let n = countWhileEnd k b
in Bytes (array b) (offset b + length b - n) n
unsafeTake :: Int -> Bytes -> Bytes
{-# inline unsafeTake #-}
unsafeTake n (Bytes arr off _) =
Bytes arr off n
unsafeDrop :: Int -> Bytes -> Bytes
{-# inline unsafeDrop #-}
unsafeDrop n (Bytes arr off len) =
Bytes arr (off + n) (len - n)
countWhile :: (Word8 -> Bool) -> Bytes -> Int
{-# inline countWhile #-}
countWhile k (Bytes arr off0 len0) = go off0 len0 0 where
go !off !len !n = if len > 0
then if k (PM.indexByteArray arr off)
then go (off + 1) (len - 1) (n + 1)
else n
else n
countWhileEnd :: (Word8 -> Bool) -> Bytes -> Int
{-# inline countWhileEnd #-}
countWhileEnd k (Bytes arr off0 len0) = go (off0 + len0 - 1) (len0 - 1) 0 where
go !off !len !n = if len >= 0
then if k (PM.indexByteArray arr off)
then go (off - 1) (len - 1) (n + 1)
else n
else n
foldl :: (a -> Word8 -> a) -> a -> Bytes -> a
{-# inline foldl #-}
foldl f a0 (Bytes arr off0 len0) =
go (off0 + len0 - 1) (len0 - 1)
where
go !off !ix = case ix of
(-1) -> a0
_ -> f (go (off - 1) (ix - 1)) (PM.indexByteArray arr off)
foldr :: (Word8 -> a -> a) -> a -> Bytes -> a
{-# inline foldr #-}
foldr f a0 (Bytes arr off0 len0) = go off0 len0 where
go !off !len = case len of
0 -> a0
_ -> f (PM.indexByteArray arr off) (go (off + 1) (len - 1))
foldl' :: (a -> Word8 -> a) -> a -> Bytes -> a
{-# inline foldl' #-}
foldl' f a0 (Bytes arr off0 len0) = go a0 off0 len0 where
go !a !off !len = case len of
0 -> a
_ -> go (f a (PM.indexByteArray arr off)) (off + 1) (len - 1)
ifoldl' :: (a -> Int -> Word8 -> a) -> a -> Bytes -> a
{-# inline ifoldl' #-}
ifoldl' f a0 (Bytes arr off0 len0) = go a0 0 off0 len0 where
go !a !ix !off !len = case len of
0 -> a
_ -> go (f a ix (PM.indexByteArray arr off)) (ix + 1) (off + 1) (len - 1)
foldr' :: (Word8 -> a -> a) -> a -> Bytes -> a
{-# inline foldr' #-}
foldr' f a0 (Bytes arr off0 len0) =
go a0 (off0 + len0 - 1) (len0 - 1)
where
go !a !off !ix = case ix of
(-1) -> a
_ -> go (f (PM.indexByteArray arr off) a) (off - 1) (ix - 1)
toByteArray :: Bytes -> ByteArray
toByteArray b@(Bytes arr off len)
| off == 0, PM.sizeofByteArray arr == len = arr
| otherwise = toByteArrayClone b
toByteArrayClone :: Bytes -> ByteArray
toByteArrayClone (Bytes arr off len) = runByteArrayST $ do
m <- PM.newByteArray len
PM.copyByteArray m 0 arr off len
PM.unsafeFreezeByteArray m
fromAsciiString :: String -> Bytes
fromAsciiString = fromByteArray
. Exts.fromList
. map (\c -> let i = ord c in if i < 128 then fromIntegral @Int @Word8 i else 0)
fromLatinString :: String -> Bytes
fromLatinString =
fromByteArray . Exts.fromList . map (fromIntegral @Int @Word8 . ord)
toLatinString :: Bytes -> String
toLatinString = foldr (\(W8# w) xs -> C# (chr# (word2Int# w)) : xs) []
fromByteArray :: ByteArray -> Bytes
fromByteArray b = Bytes b 0 (PM.sizeofByteArray b)
compareByteArrays :: ByteArray -> Int -> ByteArray -> Int -> Int -> Ordering
{-# INLINE compareByteArrays #-}
compareByteArrays (ByteArray ba1#) (I# off1#) (ByteArray ba2#) (I# off2#) (I# n#) =
compare (I# (Exts.compareByteArrays# ba1# off1# ba2# off2# n#)) 0
equalsLatin1 :: Char -> Bytes -> Bool
equalsLatin1 !c0 (Bytes arr off len) = case len of
1 -> c0 == indexCharArray arr off
_ -> False
equalsLatin2 :: Char -> Char -> Bytes -> Bool
equalsLatin2 !c0 !c1 (Bytes arr off len) = case len of
2 -> c0 == indexCharArray arr off &&
c1 == indexCharArray arr (off + 1)
_ -> False
equalsLatin3 :: Char -> Char -> Char -> Bytes -> Bool
equalsLatin3 !c0 !c1 !c2 (Bytes arr off len) = case len of
3 -> c0 == indexCharArray arr off &&
c1 == indexCharArray arr (off + 1) &&
c2 == indexCharArray arr (off + 2)
_ -> False
equalsLatin4 :: Char -> Char -> Char -> Char -> Bytes -> Bool
equalsLatin4 !c0 !c1 !c2 !c3 (Bytes arr off len) = case len of
4 -> c0 == indexCharArray arr off &&
c1 == indexCharArray arr (off + 1) &&
c2 == indexCharArray arr (off + 2) &&
c3 == indexCharArray arr (off + 3)
_ -> False
equalsLatin5 :: Char -> Char -> Char -> Char -> Char -> Bytes -> Bool
equalsLatin5 !c0 !c1 !c2 !c3 !c4 (Bytes arr off len) = case len of
5 -> c0 == indexCharArray arr off &&
c1 == indexCharArray arr (off + 1) &&
c2 == indexCharArray arr (off + 2) &&
c3 == indexCharArray arr (off + 3) &&
c4 == indexCharArray arr (off + 4)
_ -> False
equalsLatin6 :: Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool
equalsLatin6 !c0 !c1 !c2 !c3 !c4 !c5 (Bytes arr off len) = case len of
6 -> c0 == indexCharArray arr off &&
c1 == indexCharArray arr (off + 1) &&
c2 == indexCharArray arr (off + 2) &&
c3 == indexCharArray arr (off + 3) &&
c4 == indexCharArray arr (off + 4) &&
c5 == indexCharArray arr (off + 5)
_ -> False
equalsLatin7 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool
equalsLatin7 !c0 !c1 !c2 !c3 !c4 !c5 !c6 (Bytes arr off len) = case len of
7 -> c0 == indexCharArray arr off &&
c1 == indexCharArray arr (off + 1) &&
c2 == indexCharArray arr (off + 2) &&
c3 == indexCharArray arr (off + 3) &&
c4 == indexCharArray arr (off + 4) &&
c5 == indexCharArray arr (off + 5) &&
c6 == indexCharArray arr (off + 6)
_ -> False
unsafeCopy :: PrimMonad m
=> MutableByteArray (PrimState m)
-> Int
-> Bytes
-> m ()
{-# inline unsafeCopy #-}
unsafeCopy dst dstIx (Bytes src srcIx len) =
PM.copyByteArray dst dstIx src srcIx len
pin :: Bytes -> Bytes
pin b@(Bytes arr _ len) = case PM.isByteArrayPinned arr of
True -> b
False -> Bytes
( runByteArrayST do
dst <- PM.newPinnedByteArray len
unsafeCopy dst 0 b
PM.unsafeFreezeByteArray dst
) 0 len
contents :: Bytes -> Ptr Word8
contents (Bytes arr off _) = plusPtr (PM.byteArrayContents arr) off
touch :: PrimMonad m => Bytes -> m ()
touch (Bytes (ByteArray arr) _ _) = unsafeIOToPrim
(primitive_ (\s -> Exts.touch# arr s))
indexCharArray :: ByteArray -> Int -> Char
indexCharArray (ByteArray arr) (I# off) = C# (Exts.indexCharArray# arr off)
empty :: Bytes
empty = Bytes mempty 0 0
hGet :: Handle -> Int -> IO Bytes
hGet h i = createPinnedAndTrim i (\p -> IO.hGetBuf h p i)
hPut :: Handle -> Bytes -> IO ()
hPut h b0 = do
let b1@(Bytes arr _ len) = pin b0
IO.hPutBuf h (contents b1) len
touchByteArrayIO arr
createPinnedAndTrim :: Int -> (Ptr Word8 -> IO Int) -> IO Bytes
{-# inline createPinnedAndTrim #-}
createPinnedAndTrim maxSz f = do
arr@(PM.MutableByteArray arr#) <- PM.newPinnedByteArray maxSz
sz <- f (PM.mutableByteArrayContents arr)
PM.shrinkMutablePrimArray (PM.MutablePrimArray @Exts.RealWorld @Word8 arr#) sz
r <- PM.unsafeFreezeByteArray arr
pure (Bytes r 0 sz)
touchByteArrayIO :: ByteArray -> IO ()
touchByteArrayIO (ByteArray x) =
IO (\s -> (# Exts.touch# x s, () #))