{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE UnliftedFFITypes #-}
{-# LANGUAGE Unsafe #-}
{-# LANGUAGE ViewPatterns #-}
module Data.Text.Short.Internal
(
ShortText(..)
, null
, length
, isAscii
, splitAt
, splitAtEnd
, indexEndMaybe
, indexMaybe
, isPrefixOf
, stripPrefix
, isSuffixOf
, stripSuffix
, cons
, snoc
, uncons
, unsnoc
, findIndex
, find
, all
, span
, spanEnd
, split
, intersperse
, intercalate
, reverse
, replicate
, filter
, dropAround
, foldl
, foldl'
, foldr
, foldl1
, foldl1'
, foldr1
, singleton
, Data.Text.Short.Internal.fromString
, toString
, fromText
, toText
, fromShortByteString
, fromShortByteStringUnsafe
, toShortByteString
, fromByteString
, fromByteStringUnsafe
, toByteString
, toBuilder
, BS.ByteString
, T.Text
, module Prelude
, isValidUtf8
) where
import Control.DeepSeq (NFData)
import Control.Monad.ST (stToIO)
import Data.Binary
import Data.Bits
import qualified Data.ByteString as BS
import qualified Data.ByteString.Builder as BB
import Data.ByteString.Short (ShortByteString)
import qualified Data.ByteString.Short as BSS
import qualified Data.ByteString.Short.Internal as BSSI
import Data.Char (ord)
import Data.Data (Data(..),constrIndex, Constr,
mkConstr, DataType, mkDataType,
Fixity(Prefix))
import Data.Hashable (Hashable)
import Data.Typeable (Typeable)
import qualified Data.List as List
import Data.Maybe (fromMaybe, isNothing)
import Data.Semigroup
import qualified Data.String as S
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import Foreign.C
import GHC.Base (assert, unsafeChr)
import qualified GHC.CString as GHC
import GHC.Exts (Addr#, ByteArray#, Int (I#),
Int#, MutableByteArray#,
Ptr (..), RealWorld, Word (W#))
import qualified GHC.Exts
import qualified GHC.Foreign as GHC
import GHC.IO.Encoding
import GHC.ST
import Prelude hiding (all, any, break, concat,
drop, dropWhile, filter, foldl,
foldl1, foldr, foldr1, head,
init, last, length, null,
replicate, reverse, span,
splitAt, tail, take, takeWhile)
import System.IO.Unsafe
import Text.Printf (PrintfArg, formatArg,
formatString)
import qualified PrimOps
newtype ShortText = ShortText ShortByteString
deriving (Hashable,Monoid,NFData,Data.Semigroup.Semigroup,Typeable)
instance Data ShortText where
gfoldl f z txt = z fromString `f` (toString txt)
toConstr _ = packConstr
gunfold k z c = case constrIndex c of
1 -> k (z fromString)
_ -> error "gunfold"
dataTypeOf _ = shortTextDataType
packConstr :: Constr
packConstr = mkConstr shortTextDataType "fromString" [] Prefix
shortTextDataType :: DataType
shortTextDataType = mkDataType "Data.Text.Short" [packConstr]
instance Eq ShortText where
{-# INLINE (==) #-}
(==) x y
| lx /= ly = False
| lx == 0 = True
| otherwise = case PrimOps.compareByteArrays# (toByteArray# x) 0# (toByteArray# y) 0# n# of
0# -> True
_ -> False
where
!lx@(I# n#) = toLength x
!ly = toLength y
instance Ord ShortText where
compare t1 t2
| n == 0 = compare n1 n2
| otherwise = case PrimOps.compareByteArrays# ba1# 0# ba2# 0# n# of
r# | I# r# < 0 -> LT
| I# r# > 0 -> GT
| n1 < n2 -> LT
| n1 > n2 -> GT
| otherwise -> EQ
where
ba1# = toByteArray# t1
ba2# = toByteArray# t2
!n1 = toLength t1
!n2 = toLength t2
!n@(I# n#) = n1 `min` n2
instance Show ShortText where
showsPrec p (ShortText b) = showsPrec p (decodeStringShort' utf8 b)
show (ShortText b) = show (decodeStringShort' utf8 b)
instance Read ShortText where
readsPrec p = map (\(x,s) -> (ShortText $ encodeStringShort utf8 x,s)) . readsPrec p
instance PrintfArg ShortText where
formatArg txt = formatString $ toString txt
#if MIN_VERSION_binary(0,8,1)
instance Binary ShortText where
put = put . toShortByteString
get = do
sbs <- get
case fromShortByteString sbs of
Nothing -> fail "Binary.get(ShortText): Invalid UTF-8 stream"
Just st -> return st
#else
instance Binary ShortText where
put = put . toByteString
get = do
bs <- get
case fromByteString bs of
Nothing -> fail "Binary.get(ShortText): Invalid UTF-8 stream"
Just st -> return st
#endif
null :: ShortText -> Bool
null = BSS.null . toShortByteString
length :: ShortText -> Int
length st = fromIntegral $ unsafeDupablePerformIO (c_text_short_length (toByteArray# st) (toCSize st))
foreign import ccall unsafe "hs_text_short_length" c_text_short_length :: ByteArray# -> CSize -> IO CSize
isAscii :: ShortText -> Bool
isAscii st = (/= 0) $ unsafeDupablePerformIO (c_text_short_is_ascii (toByteArray# st) sz)
where
sz = toCSize st
foreign import ccall unsafe "hs_text_short_is_ascii" c_text_short_is_ascii :: ByteArray# -> CSize -> IO CInt
all :: (Char -> Bool) -> ShortText -> Bool
all p st = isNothing (findOfs (not . p) st (B 0))
find :: (Char -> Bool) -> ShortText -> Maybe Char
find p st = go 0
where
go !ofs
| ofs >= sz = Nothing
| otherwise = let (c,ofs') = decodeCharAtOfs st ofs
in c `seq` ofs' `seq`
if p c
then Just c
else go ofs'
!sz = toB st
findIndex :: (Char -> Bool) -> ShortText -> Maybe Int
findIndex p st = go 0 0
where
go !ofs !i
| ofs >= sz = Nothing
| otherwise = let (c,ofs') = decodeCharAtOfs st ofs
in c `seq` ofs' `seq`
if p c
then Just i
else go ofs' (i+1)
!sz = toB st
split :: (Char -> Bool) -> ShortText -> [ShortText]
split p st0 = go 0
where
go !ofs0 = case findOfs' p st0 ofs0 of
Just (ofs1,ofs2) -> slice st0 ofs0 (ofs1-ofs0) : go ofs2
Nothing
| ofs0 == 0 -> st0 : []
| otherwise -> slice st0 ofs0 (maxOfs-ofs0) : []
!maxOfs = toB st0
{-# INLINE findOfs #-}
findOfs :: (Char -> Bool) -> ShortText -> B -> Maybe B
findOfs p st = go
where
go :: B -> Maybe B
go !ofs | ofs >= sz = Nothing
go !ofs | p c = Just ofs
| otherwise = go ofs'
where
(c,ofs') = decodeCharAtOfs st ofs
!sz = toB st
{-# INLINE findOfs' #-}
findOfs' :: (Char -> Bool) -> ShortText -> B -> Maybe (B,B)
findOfs' p st = go
where
go :: B -> Maybe (B,B)
go !ofs | ofs >= sz = Nothing
go !ofs | p c = Just (ofs,ofs')
| otherwise = go ofs'
where
(c,ofs') = decodeCharAtOfs st ofs
!sz = toB st
{-# INLINE findOfsRev #-}
findOfsRev :: (Char -> Bool) -> ShortText -> B -> Maybe B
findOfsRev p st = go
where
go (B 0) = Nothing
go !ofs
| p (cp2ch cp) = Just ofs
| otherwise = go (ofs-cpLen cp)
where
!cp = readCodePointRev st ofs
span :: (Char -> Bool) -> ShortText -> (ShortText,ShortText)
span p st
| Just ofs <- findOfs (not . p) st (B 0) = splitAtOfs ofs st
| otherwise = (st,mempty)
spanEnd :: (Char -> Bool) -> ShortText -> (ShortText,ShortText)
spanEnd p st
| Just ofs <- findOfsRev (not . p) st (toB st) = splitAtOfs ofs st
| otherwise = (mempty,st)
toCSize :: ShortText -> CSize
toCSize = fromIntegral . BSS.length . toShortByteString
toB :: ShortText -> B
toB = fromIntegral . BSS.length . toShortByteString
toLength :: ShortText -> Int
toLength st = I# (toLength# st)
toLength# :: ShortText -> Int#
toLength# st = GHC.Exts.sizeofByteArray# (toByteArray# st)
toByteArray# :: ShortText -> ByteArray#
toByteArray# (ShortText (BSSI.SBS ba#)) = ba#
toShortByteString :: ShortText -> ShortByteString
toShortByteString (ShortText b) = b
toByteString :: ShortText -> BS.ByteString
toByteString = BSS.fromShort . toShortByteString
toBuilder :: ShortText -> BB.Builder
toBuilder = BB.shortByteString . toShortByteString
toString :: ShortText -> String
toString st = go 0
where
go !ofs
| ofs >= sz = []
| otherwise = let (c,ofs') = decodeCharAtOfs st ofs
in c `seq` ofs' `seq` (c : go ofs')
!sz = toB st
foldl :: (a -> Char -> a) -> a -> ShortText -> a
foldl f z st = go 0 z
where
go !ofs acc
| ofs >= sz = acc
| otherwise = let (c,ofs') = decodeCharAtOfs st ofs
in c `seq` ofs' `seq` go ofs' (f acc c)
!sz = toB st
foldl1 :: (Char -> Char -> Char) -> ShortText -> Char
foldl1 f st
| sz == 0 = error "foldl1: empty ShortText"
| otherwise = go c0sz c0
where
go !ofs acc
| ofs >= sz = acc
| otherwise = let (c,ofs') = decodeCharAtOfs st ofs
in c `seq` ofs' `seq` go ofs' (f acc c)
!sz = toB st
(c0,c0sz) = decodeCharAtOfs st (B 0)
foldl' :: (a -> Char -> a) -> a -> ShortText -> a
foldl' f !z st = go 0 z
where
go !ofs !acc
| ofs >= sz = acc
| otherwise = let (c,ofs') = decodeCharAtOfs st ofs
in c `seq` ofs' `seq` go ofs' (f acc c)
!sz = toB st
foldl1' :: (Char -> Char -> Char) -> ShortText -> Char
foldl1' f st
| sz == 0 = error "foldl1: empty ShortText"
| otherwise = go c0sz c0
where
go !ofs !acc
| ofs >= sz = acc
| otherwise = let (c,ofs') = decodeCharAtOfs st ofs
in c `seq` ofs' `seq` go ofs' (f acc c)
!sz = toB st
(c0,c0sz) = decodeCharAtOfs st (B 0)
foldr :: (Char -> a -> a) -> a -> ShortText -> a
foldr f z st = go 0
where
go !ofs
| ofs >= sz = z
| otherwise = let (c,ofs') = decodeCharAtOfs st ofs
in c `seq` ofs' `seq` f c (go ofs')
!sz = toB st
foldr1 :: (Char -> Char -> Char) -> ShortText -> Char
foldr1 f st
| sz == 0 = error "foldr1: empty ShortText"
| otherwise = go 0
where
go !ofs = let (c,ofs') = decodeCharAtOfs st ofs
in c `seq` ofs' `seq`
(if ofs' >= sz then c else f c (go ofs'))
!sz = toB st
toText :: ShortText -> T.Text
toText = T.decodeUtf8 . toByteString
fromString :: String -> ShortText
fromString [] = mempty
fromString [c] = singleton c
fromString s = ShortText . encodeStringShort utf8 . map r $ s
where
r c | isSurr (ord c) = '\xFFFD'
| otherwise = c
fromText :: T.Text -> ShortText
fromText = fromByteStringUnsafe . T.encodeUtf8
fromShortByteString :: ShortByteString -> Maybe ShortText
fromShortByteString sbs
| isValidUtf8 st = Just st
| otherwise = Nothing
where
st = ShortText sbs
fromShortByteStringUnsafe :: ShortByteString -> ShortText
fromShortByteStringUnsafe = ShortText
fromByteString :: BS.ByteString -> Maybe ShortText
fromByteString = fromShortByteString . BSS.toShort
fromByteStringUnsafe :: BS.ByteString -> ShortText
fromByteStringUnsafe = ShortText . BSS.toShort
encodeString :: TextEncoding -> String -> BS.ByteString
encodeString te str = unsafePerformIO $ GHC.withCStringLen te str BS.packCStringLen
decodeString' :: TextEncoding -> BS.ByteString -> String
decodeString' te bs = unsafePerformIO $ BS.useAsCStringLen bs (GHC.peekCStringLen te)
decodeStringShort' :: TextEncoding -> ShortByteString -> String
decodeStringShort' te = decodeString' te . BSS.fromShort
encodeStringShort :: TextEncoding -> String -> BSS.ShortByteString
encodeStringShort te = BSS.toShort . encodeString te
isValidUtf8 :: ShortText -> Bool
isValidUtf8 st = (==0) $ unsafeDupablePerformIO (c_text_short_is_valid_utf8 (toByteArray# st) (toCSize st))
type CCodePoint = Word
foreign import ccall unsafe "hs_text_short_is_valid_utf8" c_text_short_is_valid_utf8 :: ByteArray# -> CSize -> IO CInt
foreign import ccall unsafe "hs_text_short_index_cp" c_text_short_index :: ByteArray# -> CSize -> CSize -> IO CCodePoint
indexMaybe :: ShortText -> Int -> Maybe Char
indexMaybe st i
| i < 0 = Nothing
| otherwise = cp2chSafe cp
where
cp = CP $ unsafeDupablePerformIO (c_text_short_index (toByteArray# st) (toCSize st) (fromIntegral i))
indexEndMaybe :: ShortText -> Int -> Maybe Char
indexEndMaybe st i
| i < 0 = Nothing
| otherwise = cp2chSafe cp
where
cp = CP $ unsafeDupablePerformIO (c_text_short_index_rev (toByteArray# st) (toCSize st) (fromIntegral i))
foreign import ccall unsafe "hs_text_short_index_cp_rev" c_text_short_index_rev :: ByteArray# -> CSize -> CSize -> IO CCodePoint
splitAt :: Int -> ShortText -> (ShortText,ShortText)
splitAt i st
| i <= 0 = (mempty,st)
| otherwise = splitAtOfs ofs st
where
ofs = csizeToB $
unsafeDupablePerformIO (c_text_short_index_ofs (toByteArray# st) stsz (fromIntegral i))
stsz = toCSize st
splitAtEnd :: Int -> ShortText -> (ShortText,ShortText)
splitAtEnd i st
| i <= 0 = (st,mempty)
| ofs >= stsz = (mempty,st)
| otherwise = splitAtOfs ofs st
where
ofs = csizeToB $
unsafeDupablePerformIO (c_text_short_index_ofs_rev (toByteArray# st) (toCSize st) (fromIntegral (i-1)))
stsz = toB st
{-# INLINE splitAtOfs #-}
splitAtOfs :: B -> ShortText -> (ShortText,ShortText)
splitAtOfs ofs st
| ofs == 0 = (mempty,st)
| ofs >= stsz = (st,mempty)
| otherwise = (slice st 0 ofs, slice st ofs (stsz-ofs))
where
!stsz = toB st
foreign import ccall unsafe "hs_text_short_index_ofs" c_text_short_index_ofs :: ByteArray# -> CSize -> CSize -> IO CSize
foreign import ccall unsafe "hs_text_short_index_ofs_rev" c_text_short_index_ofs_rev :: ByteArray# -> CSize -> CSize -> IO CSize
uncons :: ShortText -> Maybe (Char,ShortText)
uncons st
| null st = Nothing
| len2 == 0 = Just (c0, mempty)
| otherwise = Just (c0, slice st ofs len2)
where
c0 = cp2ch cp0
cp0 = readCodePoint st 0
ofs = cpLen cp0
len2 = toB st - ofs
unsnoc :: ShortText -> Maybe (ShortText,Char)
unsnoc st
| null st = Nothing
| len1 == 0 = Just (mempty, c0)
| otherwise = Just (slice st 0 len1, c0)
where
c0 = cp2ch cp0
cp0 = readCodePointRev st stsz
stsz = toB st
len1 = stsz - cpLen cp0
isPrefixOf :: ShortText -> ShortText -> Bool
isPrefixOf x y
| lx > ly = False
| lx == 0 = True
| otherwise = case PrimOps.compareByteArrays# (toByteArray# x) 0# (toByteArray# y) 0# n# of
0# -> True
_ -> False
where
!lx@(I# n#) = toLength x
!ly = toLength y
stripPrefix :: ShortText -> ShortText -> Maybe ShortText
stripPrefix pfx t
| isPrefixOf pfx t = Just $! snd (splitAtOfs (toB pfx) t)
| otherwise = Nothing
isSuffixOf :: ShortText -> ShortText -> Bool
isSuffixOf x y
| lx > ly = False
| lx == 0 = True
| otherwise = case PrimOps.compareByteArrays# (toByteArray# x) 0# (toByteArray# y) ofs2# n# of
0# -> True
_ -> False
where
!(I# ofs2#) = ly - lx
!lx@(I# n#) = toLength x
!ly = toLength y
stripSuffix :: ShortText -> ShortText -> Maybe ShortText
stripSuffix sfx t
| isSuffixOf sfx t = Just $! fst (splitAtOfs pfxLen t)
| otherwise = Nothing
where
pfxLen = toB t - toB sfx
intersperse :: Char -> ShortText -> ShortText
intersperse c st
| null st = mempty
| sn == 1 = st
| otherwise = create newsz $ \mba -> do
let !cp0 = readCodePoint st 0
!cp0sz = cpLen cp0
writeCodePointN cp0sz mba 0 cp0
go mba (sn - 1) cp0sz cp0sz
where
newsz = ssz + ((sn-1) `mulB` csz)
ssz = toB st
sn = length st
csz = cpLen cp
cp = ch2cp c
go :: MBA s -> Int -> B -> B -> ST s ()
go _ 0 !_ !_ = return ()
go mba n ofs ofs2 = do
let !cp1 = readCodePoint st ofs2
!cp1sz = cpLen cp1
writeCodePointN csz mba ofs cp
writeCodePointN cp1sz mba (ofs+csz) cp1
go mba (n-1) (ofs+csz+cp1sz) (ofs2+cp1sz)
intercalate :: ShortText -> [ShortText] -> ShortText
intercalate _ [] = mempty
intercalate _ [t] = t
intercalate sep ts
| null sep = mconcat ts
| otherwise = mconcat (List.intersperse sep ts)
replicate :: Int -> ShortText -> ShortText
replicate n0 t
| n0 < 1 = mempty
| null t = mempty
| otherwise = create (n0 `mulB` sz) (go 0)
where
go :: Int -> MBA s -> ST s ()
go j mba
| j == n0 = return ()
| otherwise = do
copyByteArray t 0 mba (j `mulB` sz) sz
go (j+1) mba
sz = toB t
reverse :: ShortText -> ShortText
reverse st
| null st = mempty
| sn == 1 = st
| otherwise = create sz $ go sn 0
where
sz = toB st
sn = length st
go :: Int -> B -> MBA s -> ST s ()
go 0 !_ _ = return ()
go i ofs mba = do
let !cp = readCodePoint st ofs
!cpsz = cpLen cp
!ofs' = ofs+cpsz
writeCodePointN cpsz mba (sz - ofs') cp
go (i-1) ofs' mba
filter :: (Char -> Bool) -> ShortText -> ShortText
filter p t
= case (mofs1,mofs2) of
(Nothing, _) -> t
(Just 0, Nothing) -> mempty
(Just ofs1, Nothing) -> slice t 0 ofs1
(Just ofs1, Just ofs2) -> createShrink (t0sz-(ofs2-ofs1)) $ \mba -> do
copyByteArray t 0 mba 0 ofs1
t1sz <- go mba ofs2 ofs1
return t1sz
where
mofs1 = findOfs (not . p) t (B 0)
mofs2 = findOfs p t (fromMaybe (B 0) mofs1)
t0sz = toB t
go :: MBA s -> B -> B -> ST s B
go mba !t0ofs !t1ofs
| t0ofs >= t0sz = return t1ofs
| otherwise = let !cp = readCodePoint t t0ofs
!cpsz = cpLen cp
in if p (cp2ch cp)
then writeCodePointN cpsz mba t1ofs cp >>
go mba (t0ofs+cpsz) (t1ofs+cpsz)
else go mba (t0ofs+cpsz) t1ofs
dropAround :: (Char -> Bool) -> ShortText -> ShortText
dropAround p t0 = case (mofs1,mofs2) of
(Nothing,_) -> mempty
(Just ofs1,Just ofs2)
| ofs1 == 0, ofs2 == t0sz -> t0
| ofs1 < ofs2 -> create (ofs2-ofs1) $ \mba -> do
copyByteArray t0 ofs1 mba (B 0) (ofs2-ofs1)
(_,_) -> error "dropAround: the impossible happened"
where
mofs1 = findOfs (not . p) t0 (B 0)
mofs2 = findOfsRev (not . p) t0 t0sz
t0sz = toB t0
slice :: ShortText -> B -> B -> ShortText
slice st ofs len
| ofs < 0 = error "invalid offset"
| len < 0 = error "invalid length"
| len' == 0 = mempty
| otherwise = create len' $ \mba -> copyByteArray st ofs' mba 0 len'
where
len0 = toB st
len' = max 0 (min len (len0-ofs))
ofs' = max 0 ofs
newtype B = B { unB :: Int }
deriving (Ord,Eq,Num)
mulB :: Int -> B -> B
mulB n (B b) = B (n*b)
csizeFromB :: B -> CSize
csizeFromB = fromIntegral . unB
csizeToB :: CSize -> B
csizeToB = B . fromIntegral
data MBA s = MBA# { unMBA# :: MutableByteArray# s }
{-# INLINE create #-}
create :: B -> (forall s. MBA s -> ST s ()) -> ShortText
create n go = runST $ do
mba <- newByteArray n
go mba
unsafeFreeze mba
{-# INLINE createShrink #-}
createShrink :: B -> (forall s. MBA s -> ST s B) -> ShortText
createShrink n go = runST $ do
mba <- newByteArray n
n' <- go mba
if n' < n
then unsafeFreezeShrink mba n'
else unsafeFreeze mba
{-# INLINE unsafeFreeze #-}
unsafeFreeze :: MBA s -> ST s ShortText
unsafeFreeze (MBA# mba#)
= ST $ \s -> case GHC.Exts.unsafeFreezeByteArray# mba# s of
(# s', ba# #) -> (# s', ShortText (BSSI.SBS ba#) #)
{-# INLINE copyByteArray #-}
copyByteArray :: ShortText -> B -> MBA s -> B -> B -> ST s ()
copyByteArray (ShortText (BSSI.SBS src#)) (B (I# src_off#)) (MBA# dst#) (B (I# dst_off#)) (B (I# len#))
= ST $ \s -> case GHC.Exts.copyByteArray# src# src_off# dst# dst_off# len# s of
s' -> (# s', () #)
{-# INLINE newByteArray #-}
newByteArray :: B -> ST s (MBA s)
newByteArray (B (I# n#))
= ST $ \s -> case GHC.Exts.newByteArray# n# s of
(# s', mba# #) -> (# s', MBA# mba# #)
{-# INLINE writeWord8Array #-}
writeWord8Array :: MBA s -> B -> Word -> ST s ()
writeWord8Array (MBA# mba#) (B (I# i#)) (W# w#)
= ST $ \s -> case GHC.Exts.writeWord8Array# mba# i# w# s of
s' -> (# s', () #)
{-# INLINE copyAddrToByteArray #-}
copyAddrToByteArray :: Ptr a -> MBA RealWorld -> B -> B -> ST RealWorld ()
copyAddrToByteArray (Ptr src#) (MBA# dst#) (B (I# dst_off#)) (B (I# len#))
= ST $ \s -> case GHC.Exts.copyAddrToByteArray# src# dst# dst_off# len# s of
s' -> (# s', () #)
#if __GLASGOW_HASKELL__ >= 710
{-# INLINE unsafeFreezeShrink #-}
unsafeFreezeShrink :: MBA s -> B -> ST s ShortText
unsafeFreezeShrink mba n = do
shrink mba n
unsafeFreeze mba
{-# INLINE shrink #-}
shrink :: MBA s -> B -> ST s ()
shrink (MBA# mba#) (B (I# i#))
= ST $ \s -> case GHC.Exts.shrinkMutableByteArray# mba# i# s of
s' -> (# s', () #)
#else
{-# INLINE unsafeFreezeShrink #-}
unsafeFreezeShrink :: MBA s -> B -> ST s ShortText
unsafeFreezeShrink mba0 n = do
mba' <- newByteArray n
copyByteArray2 mba0 0 mba' 0 n
unsafeFreeze mba'
{-# INLINE copyByteArray2 #-}
copyByteArray2 :: MBA s -> B -> MBA s -> B -> B -> ST s ()
copyByteArray2 (MBA# src#) (B (I# src_off#)) (MBA# dst#) (B (I# dst_off#)) (B( I# len#))
= ST $ \s -> case GHC.Exts.copyMutableByteArray# src# src_off# dst# dst_off# len# s of
s' -> (# s', () #)
#endif
newtype CP = CP Word
{-# INLINE ch2cp #-}
ch2cp :: Char -> CP
ch2cp (ord -> ci)
| isSurr ci = CP 0xFFFD
| otherwise = CP (fromIntegral ci)
{-# INLINE isSurr #-}
isSurr :: (Num i, Bits i) => i -> Bool
isSurr ci = ci .&. 0xfff800 == 0xd800
{-# INLINE cp2ch #-}
cp2ch :: CP -> Char
cp2ch (CP w) = (w < 0x110000) `assert` unsafeChr (fromIntegral w)
cp2chSafe :: CP -> Maybe Char
cp2chSafe cp
| cpNull cp = Nothing
| otherwise = Just $! cp2ch cp
where
cpNull :: CP -> Bool
cpNull (CP w) = w >= 0x110000
{-# INLINE cpLen #-}
cpLen :: CP -> B
cpLen (CP cp)
| cp < 0x80 = B 1
| cp < 0x800 = B 2
| cp < 0x10000 = B 3
| otherwise = B 4
{-# INLINE decodeCharAtOfs #-}
decodeCharAtOfs :: ShortText -> B -> (Char,B)
decodeCharAtOfs st ofs = (c,ofs')
where
c = cp2ch cp
ofs' = ofs + cpLen cp
cp = readCodePoint st ofs
singleton :: Char -> ShortText
singleton = singleton' . ch2cp
singleton' :: CP -> ShortText
singleton' cp@(CP cpw)
| cpw < 0x80 = create 1 $ \mba -> writeCodePoint1 mba 0 cp
| cpw < 0x800 = create 2 $ \mba -> writeCodePoint2 mba 0 cp
| cpw < 0x10000 = create 3 $ \mba -> writeCodePoint3 mba 0 cp
| otherwise = create 4 $ \mba -> writeCodePoint4 mba 0 cp
cons :: Char -> ShortText -> ShortText
cons (ch2cp -> cp@(CP cpw)) sfx
| n == 0 = singleton' cp
| cpw < 0x80 = create (n+1) $ \mba -> writeCodePoint1 mba 0 cp >> copySfx 1 mba
| cpw < 0x800 = create (n+2) $ \mba -> writeCodePoint2 mba 0 cp >> copySfx 2 mba
| cpw < 0x10000 = create (n+3) $ \mba -> writeCodePoint3 mba 0 cp >> copySfx 3 mba
| otherwise = create (n+4) $ \mba -> writeCodePoint4 mba 0 cp >> copySfx 4 mba
where
!n = toB sfx
copySfx :: B -> MBA s -> ST s ()
copySfx ofs mba = copyByteArray sfx 0 mba ofs n
snoc :: ShortText -> Char -> ShortText
snoc pfx (ch2cp -> cp@(CP cpw))
| n == 0 = singleton' cp
| cpw < 0x80 = create (n+1) $ \mba -> copyPfx mba >> writeCodePoint1 mba n cp
| cpw < 0x800 = create (n+2) $ \mba -> copyPfx mba >> writeCodePoint2 mba n cp
| cpw < 0x10000 = create (n+3) $ \mba -> copyPfx mba >> writeCodePoint3 mba n cp
| otherwise = create (n+4) $ \mba -> copyPfx mba >> writeCodePoint4 mba n cp
where
!n = toB pfx
copyPfx :: MBA s -> ST s ()
copyPfx mba = copyByteArray pfx 0 mba 0 n
writeCodePointN :: B -> MBA s -> B -> CP -> ST s ()
writeCodePointN 1 = writeCodePoint1
writeCodePointN 2 = writeCodePoint2
writeCodePointN 3 = writeCodePoint3
writeCodePointN 4 = writeCodePoint4
writeCodePointN _ = undefined
writeCodePoint1 :: MBA s -> B -> CP -> ST s ()
writeCodePoint1 mba ofs (CP cp) =
writeWord8Array mba ofs cp
writeCodePoint2 :: MBA s -> B -> CP -> ST s ()
writeCodePoint2 mba ofs (CP cp) = do
writeWord8Array mba ofs (0xc0 .|. (cp `unsafeShiftR` 6))
writeWord8Array mba (ofs+1) (0x80 .|. (cp .&. 0x3f))
writeCodePoint3 :: MBA s -> B -> CP -> ST s ()
writeCodePoint3 mba ofs (CP cp) = do
writeWord8Array mba ofs (0xe0 .|. (cp `unsafeShiftR` 12))
writeWord8Array mba (ofs+1) (0x80 .|. ((cp `unsafeShiftR` 6) .&. 0x3f))
writeWord8Array mba (ofs+2) (0x80 .|. (cp .&. 0x3f))
writeCodePoint4 :: MBA s -> B -> CP -> ST s ()
writeCodePoint4 mba ofs (CP cp) = do
writeWord8Array mba ofs (0xf0 .|. (cp `unsafeShiftR` 18))
writeWord8Array mba (ofs+1) (0x80 .|. ((cp `unsafeShiftR` 12) .&. 0x3f))
writeWord8Array mba (ofs+2) (0x80 .|. ((cp `unsafeShiftR` 6) .&. 0x3f))
writeWord8Array mba (ofs+3) (0x80 .|. (cp .&. 0x3f))
readCodePoint :: ShortText -> B -> CP
readCodePoint st (csizeFromB -> ofs)
= CP $ unsafeDupablePerformIO (c_text_short_ofs_cp (toByteArray# st) ofs)
foreign import ccall unsafe "hs_text_short_ofs_cp" c_text_short_ofs_cp :: ByteArray# -> CSize -> IO CCodePoint
readCodePointRev :: ShortText -> B -> CP
readCodePointRev st (csizeFromB -> ofs)
= CP $ unsafeDupablePerformIO (c_text_short_ofs_cp_rev (toByteArray# st) ofs)
foreign import ccall unsafe "hs_text_short_ofs_cp_rev" c_text_short_ofs_cp_rev :: ByteArray# -> CSize -> IO CCodePoint
instance GHC.Exts.IsList ShortText where
type (Item ShortText) = Char
fromList = fromString
toList = toString
instance S.IsString ShortText where
fromString = fromStringLit
{-# INLINE [0] fromStringLit #-}
fromStringLit :: String -> ShortText
fromStringLit = fromString
{-# RULES "ShortText empty literal" fromStringLit "" = mempty #-}
{-# RULES "ShortText singleton literal" forall c . fromStringLit [c] = singleton c #-}
{-# RULES "ShortText literal ASCII" forall s . fromStringLit (GHC.unpackCString# s) = fromLitAsciiAddr# s #-}
{-# RULES "ShortText literal UTF-8" forall s . fromStringLit (GHC.unpackCStringUtf8# s) = fromLitMUtf8Addr# s #-}
{-# NOINLINE fromLitAsciiAddr# #-}
fromLitAsciiAddr# :: Addr# -> ShortText
fromLitAsciiAddr# (Ptr -> ptr) = unsafeDupablePerformIO $ do
sz <- csizeToB `fmap` c_strlen ptr
case sz `compare` 0 of
EQ -> return mempty
GT -> stToIO $ do
mba <- newByteArray sz
copyAddrToByteArray ptr mba 0 sz
unsafeFreeze mba
LT -> return (error "fromLitAsciiAddr#")
foreign import ccall unsafe "strlen" c_strlen :: CString -> IO CSize
{-# NOINLINE fromLitMUtf8Addr# #-}
fromLitMUtf8Addr# :: Addr# -> ShortText
fromLitMUtf8Addr# (Ptr -> ptr) = unsafeDupablePerformIO $ do
sz <- B `fmap` c_text_short_mutf8_strlen ptr
case sz `compare` 0 of
EQ -> return mempty
GT -> stToIO $ do
mba <- newByteArray sz
copyAddrToByteArray ptr mba 0 sz
unsafeFreeze mba
LT -> do
mba <- stToIO (newByteArray (abs sz))
c_text_short_mutf8_trans ptr (unMBA# mba)
stToIO (unsafeFreeze mba)
foreign import ccall unsafe "hs_text_short_mutf8_strlen" c_text_short_mutf8_strlen :: CString -> IO Int
foreign import ccall unsafe "hs_text_short_mutf8_trans" c_text_short_mutf8_trans :: CString -> MutableByteArray# RealWorld -> IO ()