module Z.Data.Text.Base (
Text(..)
, validate, validateASCII
, validateMaybe, validateASCIIMaybe
, index, indexMaybe, charByteIndex, indexR, indexMaybeR, charByteIndexR
, empty, singleton, copy
, replicate , cycleN
, pack, packN, packR, packRN
, unpack, unpackR
, fromVector
, toVector
, null
, length
, append
, map', imap'
, foldl', ifoldl'
, foldr', ifoldr'
, concat, concatMap
, count, all, any
, NormalizationResult(..), NormalizeMode(..)
, isNormalized, isNormalizedTo, normalize, normalizeTo
, Locale(LocaleDefault, LocaleLithuanian, LocaleTurkishAndAzeriLatin)
, envLocale
, caseFold, caseFoldWith, toLower, toLowerWith, toUpper, toUpperWith, toTitle, toTitleWith
, isCategory, spanCategory
, Category(
CategoryLetterUppercase
, CategoryLetterLowercase
, CategoryLetterTitlecase
, CategoryLetterOther
, CategoryLetter
, CategoryCaseMapped
, CategoryMarkNonSpacing
, CategoryMarkSpacing
, CategoryMarkEnclosing
, CategoryMark
, CategoryNumberDecimal
, CategoryNumberLetter
, CategoryNumberOther
, CategoryNumber
, CategoryPunctuationConnector
, CategoryPunctuationDash
, CategoryPunctuationOpen
, CategoryPunctuationClose
, CategoryPunctuationInitial
, CategoryPunctuationFinal
, CategoryPunctuationOther
, CategoryPunctuation
, CategorySymbolMath
, CategorySymbolCurrency
, CategorySymbolModifier
, CategorySymbolOther
, CategorySymbol
, CategorySeparatorSpace
, CategorySeparatorLine
, CategorySeparatorParagraph
, CategorySeparator
, CategoryControl
, CategoryFormat
, CategorySurrogate
, CategoryPrivateUse
, CategoryUnassigned
, CategoryCompatibility
, CategoryIgnoreGraphemeCluster
, CategoryIscntrl
, CategoryIsprint
, CategoryIsspace
, CategoryIsblank
, CategoryIsgraph
, CategoryIspunct
, CategoryIsalnum
, CategoryIsalpha
, CategoryIsupper
, CategoryIslower
, CategoryIsdigit
, CategoryIsxdigit)
, TextException(..), errorEmptyText
, c_utf8_validate_ba
, c_utf8_validate_addr
, c_ascii_validate_ba
, c_ascii_validate_addr
) where
#define DOUBLE_QUOTE 34
import Control.DeepSeq
import Control.Exception
import Control.Monad.ST
import Control.Monad
import Data.Bits
import Data.Char hiding (toLower, toUpper, toTitle)
import Data.Foldable (foldlM)
import Data.Hashable (Hashable(..))
import qualified Data.List as List
import Data.Primitive.PrimArray
import Data.Typeable
import Data.Word
import Foreign.C.Types (CSize(..))
import GHC.Exts
import GHC.Types
import GHC.Stack
import GHC.CString (unpackCString#, unpackCStringUtf8#)
import Z.Data.Array
import Z.Data.Text.UTF8Codec
import Z.Data.Text.UTF8Rewind
import Z.Data.Vector.Base (Bytes, PrimVector(..), c_strlen)
import qualified Z.Data.Vector.Base as V
import qualified Z.Data.Vector.Search as V
import System.IO.Unsafe (unsafeDupablePerformIO)
import Prelude hiding (concat, concatMap,
elem, notElem, null, length, map,
foldl, foldl1, foldr, foldr1,
maximum, minimum, product, sum,
all, any, replicate, traverse)
import Test.QuickCheck.Arbitrary (Arbitrary(..), CoArbitrary(..))
newtype Text = Text
{ getUTF8Bytes :: Bytes
} deriving newtype (Monoid, Semigroup)
instance Eq Text where
Text b1 == Text b2 = b1 == b2
{-# INLINE (==) #-}
instance Ord Text where
Text b1 `compare` Text b2 = b1 `compare` b2
{-# INLINE compare #-}
instance Show Text where
show = unpack . escapeTextJSON
where
escapeTextJSON (Text (V.PrimVector ba@(PrimArray ba#) s l)) = unsafeDupablePerformIO $ do
let siz = escape_json_string_length ba# s l
mba@(MutablePrimArray mba#) <- newPrimArray siz
if siz == l+2
then do
writePrimArray mba 0 DOUBLE_QUOTE
copyPrimArray mba 1 ba s l
writePrimArray mba (l+1) DOUBLE_QUOTE
else void $ (escape_json_string ba# s l mba# 0)
ba' <- unsafeFreezePrimArray mba
return (Text (V.PrimVector ba' 0 siz))
foreign import ccall unsafe escape_json_string_length
:: ByteArray# -> Int -> Int -> Int
foreign import ccall unsafe escape_json_string
:: ByteArray# -> Int -> Int -> MutableByteArray# RealWorld -> Int -> IO Int
instance NFData Text where
rnf (Text bs) = rnf bs
instance Arbitrary Text where
arbitrary = pack <$> arbitrary
shrink a = pack <$> shrink (unpack a)
instance CoArbitrary Text where
coarbitrary = coarbitrary . unpack
instance Hashable Text where
{-# INLINE hashWithSalt #-}
hashWithSalt salt (Text bs) = hashWithSalt salt bs
instance IsString Text where
{-# INLINE fromString #-}
fromString = pack
index :: HasCallStack => Text -> Int -> Char
{-# INLINABLE index #-}
index t n = case t `indexMaybe` n of Nothing -> throw (IndexOutOfTextRange n callStack)
Just x -> x
indexMaybe :: Text -> Int -> Maybe Char
{-# INLINABLE indexMaybe #-}
indexMaybe (Text (V.PrimVector ba s l)) n
| n < 0 = Nothing
| otherwise = go s 0
where
!end = s + l
go !i !j
| i >= end = Nothing
| j >= n = let !c = decodeChar_ ba i in Just c
| otherwise = let l' = decodeCharLen ba i in go (i+l') (j+1)
charByteIndex :: Text -> Int -> Int
{-# INLINABLE charByteIndex #-}
charByteIndex (Text (V.PrimVector ba s l)) n
| n < 0 = s
| otherwise = go s 0
where
!end = s + l
go !i !j
| i >= end = i
| j >= n = i
| otherwise = let l' = decodeCharLen ba i in go (i+l') (j+1)
indexR :: HasCallStack => Text -> Int -> Char
{-# INLINABLE indexR #-}
indexR t n = case t `indexMaybeR` n of Nothing -> throw (V.IndexOutOfVectorRange n callStack)
Just x -> x
indexMaybeR :: Text -> Int -> Maybe Char
{-# INLINABLE indexMaybeR #-}
indexMaybeR (Text (V.PrimVector ba s l)) n
| n < 0 = Nothing
| otherwise = go (s+l-1) 0
where
go !i !j
| i < s = Nothing
| j >= n = let !c = decodeCharReverse_ ba i in Just c
| otherwise = let l' = decodeCharLenReverse ba i in go (i-l') (j+1)
charByteIndexR :: Text -> Int -> Int
{-# INLINABLE charByteIndexR #-}
charByteIndexR (Text (V.PrimVector ba s l)) n
| n < 0 = s+l
| otherwise = go (s+l-1) 0
where
go !i !j
| i < s = i
| j >= n = i
| otherwise = let l' = decodeCharLenReverse ba i in go (i-l') (j+1)
validate :: HasCallStack => Bytes -> Text
{-# INLINE validate #-}
validate bs@(V.PrimVector (PrimArray ba#) (I# s#) l@(I# l#))
| l == 0 = Text bs
| c_utf8_validate_ba ba# s# l# > 0 = Text bs
| otherwise = throw (InvalidUTF8Exception callStack)
validateMaybe :: Bytes -> Maybe Text
{-# INLINE validateMaybe #-}
validateMaybe bs@(V.PrimVector (PrimArray ba#) (I# s#) l@(I# l#))
| l == 0 = Just (Text bs)
| c_utf8_validate_ba ba# s# l# > 0 = Just (Text bs)
| otherwise = Nothing
validateASCII :: HasCallStack => Bytes -> Text
{-# INLINE validateASCII #-}
validateASCII bs@(V.PrimVector (PrimArray ba#) (I# s#) l@(I# l#))
| l == 0 = Text bs
| c_ascii_validate_ba ba# s# l# > 0 = Text bs
| otherwise = throw (InvalidASCIIException callStack)
validateASCIIMaybe :: Bytes -> Maybe Text
{-# INLINE validateASCIIMaybe #-}
validateASCIIMaybe bs@(V.PrimVector (PrimArray ba#) (I# s#) l@(I# l#))
| l == 0 = Just (Text bs)
| c_ascii_validate_ba ba# s# l# > 0 = Just (Text bs)
| otherwise = Nothing
foreign import ccall unsafe "text.h utf8_validate"
c_utf8_validate_ba :: ByteArray# -> Int# -> Int# -> Int
foreign import ccall unsafe "text.h utf8_validate_addr"
c_utf8_validate_addr :: Addr# -> Int -> IO Int
foreign import ccall unsafe "text.h ascii_validate"
c_ascii_validate_ba :: ByteArray# -> Int# -> Int# -> Int
foreign import ccall unsafe "text.h ascii_validate_addr"
c_ascii_validate_addr :: Addr# -> Int -> IO Int
data TextException = InvalidUTF8Exception CallStack
| InvalidASCIIException CallStack
| IndexOutOfTextRange Int CallStack
| EmptyText CallStack
deriving (Show, Typeable)
instance Exception TextException
errorEmptyText :: HasCallStack => a
{-# INLINE errorEmptyText #-}
errorEmptyText = throw (EmptyText callStack)
pack :: String -> Text
pack = packN V.defaultInitSize
{-# INLINE CONLIKE [0] pack #-}
{-# RULES "pack/packASCIIAddr" forall addr . pack (unpackCString# addr) = packASCIIAddr addr #-}
{-# RULES "pack/packUTF8Addr" forall addr . pack (unpackCStringUtf8# addr) = packUTF8Addr addr #-}
packASCIIAddr :: Addr# -> Text
packASCIIAddr addr0# = go addr0#
where
len = fromIntegral . unsafeDupablePerformIO $ c_strlen addr0#
go addr# = runST $ do
marr <- newPrimArray len
copyPtrToMutablePrimArray marr 0 (Ptr addr#) len
arr <- unsafeFreezePrimArray marr
return $ Text (PrimVector arr 0 len)
packUTF8Addr :: Addr# -> Text
packUTF8Addr addr0# = validateAndCopy addr0#
where
len = fromIntegral . unsafeDupablePerformIO $ c_strlen addr0#
valid = unsafeDupablePerformIO $ c_utf8_validate_addr addr0# len
validateAndCopy addr#
| valid == 0 = packN len (unpackCStringUtf8# addr#)
| otherwise = runST $ do
marr <- newPrimArray len
copyPtrToMutablePrimArray marr 0 (Ptr addr#) len
arr <- unsafeFreezePrimArray marr
return $ Text (PrimVector arr 0 len)
packN :: Int -> String -> Text
{-# INLINE packN #-}
packN n0 = \ ws0 ->
Text (V.create' (max 4 n0) (\ marr -> foldlM go (V.IPair 0 marr) ws0))
where
go :: V.IPair (MutablePrimArray s Word8) -> Char -> ST s (V.IPair (MutablePrimArray s Word8))
go (V.IPair i marr) !c = do
siz <- getSizeofMutablePrimArray marr
if i < siz - 3
then do
i' <- encodeChar marr i c
return (V.IPair i' marr)
else do
let !siz' = siz `shiftL` 1
!marr' <- resizeMutablePrimArray marr siz'
i' <- encodeChar marr' i c
return (V.IPair i' marr')
packR :: String -> Text
{-# INLINE packR #-}
packR = packRN V.defaultInitSize
packRN :: Int -> String -> Text
{-# INLINE packRN #-}
packRN n0 = \ ws0 -> runST (do let n = max 4 n0
marr <- newArr n
(V.IPair i marr') <- foldM go (V.IPair n marr) ws0
ba <- unsafeFreezeArr marr'
return $! Text (V.fromArr ba i (sizeofArr ba-i))
)
where
go :: V.IPair (MutablePrimArray s Word8) -> Char -> ST s (V.IPair (MutablePrimArray s Word8))
go (V.IPair i marr) !c = do
n <- sizeofMutableArr marr
let l = encodeCharLength c
if i >= l
then do _ <- encodeChar marr (i-l) c
return (V.IPair (i-l) marr)
else do let !n' = n `shiftL` 1
!marr' <- newArr n'
copyMutableArr marr' (n+i) marr i (n-i)
let i' = n+i-l
_ <- encodeChar marr' i' c
return (V.IPair i' marr')
unpack :: Text -> String
{-# INLINE [1] unpack #-}
unpack (Text (V.PrimVector ba s l)) = go s
where
!end = s + l
go !idx
| idx >= end = []
| otherwise = let (# c, i #) = decodeChar ba idx in c : go (idx + i)
unpackFB :: Text -> (Char -> a -> a) -> a -> a
{-# INLINE [0] unpackFB #-}
unpackFB (Text (V.PrimVector ba s l)) k z = go s
where
!end = s + l
go !idx
| idx >= end = z
| otherwise = let (# c, i #) = decodeChar ba idx in c `k` go (idx + i)
{-# RULES
"unpack" [~1] forall t . unpack t = build (\ k z -> unpackFB t k z)
"unpackFB" [1] forall t . unpackFB t (:) [] = unpack t
#-}
unpackR :: Text -> String
{-# INLINE [1] unpackR #-}
unpackR (Text (V.PrimVector ba s l)) = go (s+l-1)
where
go !idx
| idx < s = []
| otherwise = let (# c, i #) = decodeCharReverse ba idx in c : go (idx - i)
unpackRFB :: Text -> (Char -> a -> a) -> a -> a
{-# INLINE [0] unpackRFB #-}
unpackRFB (Text (V.PrimVector ba s l)) k z = go (s+l-1)
where
go !idx
| idx < s = z
| otherwise = let (# c, i #) = decodeCharReverse ba idx in c `k` go (idx - i)
{-# RULES
"unpackR" [~1] forall t . unpackR t = build (\ k z -> unpackRFB t k z)
"unpackRFB" [1] forall t . unpackRFB t (:) [] = unpackR t
#-}
singleton :: Char -> Text
{-# INLINABLE singleton #-}
singleton c = Text $ V.createN 4 $ \ marr -> encodeChar marr 0 c
empty :: Text
{-# INLINABLE empty #-}
empty = Text V.empty
copy :: Text -> Text
{-# INLINE copy #-}
copy (Text bs) = Text (V.copy bs)
append :: Text -> Text -> Text
append ta tb = Text ( getUTF8Bytes ta `V.append` getUTF8Bytes tb )
{-# INLINE append #-}
null :: Text -> Bool
{-# INLINABLE null #-}
null (Text bs) = V.null bs
length :: Text -> Int
{-# INLINABLE length #-}
length (Text (V.PrimVector ba s l)) = go s 0
where
!end = s + l
go !i !acc | i >= end = acc
| otherwise = let j = decodeCharLen ba i in go (i+j) (1+acc)
map' :: (Char -> Char) -> Text -> Text
{-# INLINE map' #-}
map' f (Text (V.PrimVector arr s l)) | l == 0 = empty
| otherwise = Text (V.create' (l+3) (go s 0))
where
end = s + l
go :: Int -> Int -> MutablePrimArray s Word8 -> ST s (V.IPair (MutablePrimArray s Word8))
go !i !j !marr
| i >= end = return (V.IPair j marr)
| otherwise = do
let (# c, d #) = decodeChar arr i
j' <- encodeChar marr j (f c)
let !i' = i + d
siz <- sizeofMutableArr marr
if j' < siz - 3
then go i' j' marr
else do
let !siz' = siz `shiftL` 1
!marr' <- resizeMutablePrimArray marr siz'
go i' j' marr'
imap' :: (Int -> Char -> Char) -> Text -> Text
{-# INLINE imap' #-}
imap' f (Text (V.PrimVector arr s l)) | l == 0 = empty
| otherwise = Text (V.create' (l+3) (go s 0 0))
where
end = s + l
go :: Int -> Int -> Int -> MutablePrimArray s Word8 -> ST s (V.IPair (MutablePrimArray s Word8))
go !i !j !k !marr
| i >= end = return (V.IPair j marr)
| otherwise = do
let (# c, d #) = decodeChar arr i
j' <- encodeChar marr j (f k c)
let !i' = i + d
!k' = k + 1
siz <- sizeofMutableArr marr
if j' < siz - 3
then go i' j' k' marr
else do
let !siz' = siz `shiftL` 1
!marr' <- resizeMutablePrimArray marr siz'
go i' j' k' marr'
foldl' :: (b -> Char -> b) -> b -> Text -> b
{-# INLINE foldl' #-}
foldl' f z (Text (V.PrimVector arr s l)) = go z s
where
!end = s + l
go !acc !i | i < end = case decodeChar arr i of
(# x, d #) -> go (f acc x) (i + d)
| otherwise = acc
ifoldl' :: (b -> Int -> Char -> b) -> b -> Text -> b
{-# INLINE ifoldl' #-}
ifoldl' f z (Text (V.PrimVector arr s l)) = go z s 0
where
!end = s + l
go !acc !i !k | i < end = case decodeChar arr i of
(# x, d #) -> go (f acc k x) (i + d) (k + 1)
| otherwise = acc
foldr' :: (Char -> b -> b) -> b -> Text -> b
{-# INLINE foldr' #-}
foldr' f z (Text (V.PrimVector arr s l)) = go z (s+l-1)
where
go !acc !i | i >= s = case decodeCharReverse arr i of
(# x, d #) -> go (f x acc) (i - d)
| otherwise = acc
ifoldr' :: (Int -> Char -> b -> b) -> b -> Text -> b
{-# INLINE ifoldr' #-}
ifoldr' f z (Text (V.PrimVector arr s l)) = go z (s+l-1) 0
where
go !acc !i !k | i >= s = case decodeCharReverse arr i of
(# x, d #) -> go (f k x acc) (i - d) (k + 1)
| otherwise = acc
concat :: [Text] -> Text
concat = Text . V.concat . coerce
{-# INLINE concat #-}
concatMap :: (Char -> Text) -> Text -> Text
{-# INLINE concatMap #-}
concatMap f = concat . foldr' ((:) . f) []
count :: Char -> Text -> Int
{-# INLINE count #-}
count c (Text v)
| encodeCharLength c == 1 = let w = V.c2w c in V.count w v
| otherwise = let (Text pat) = singleton c
in List.length $ V.indices pat v False
any :: (Char -> Bool) -> Text -> Bool
{-# INLINE any #-}
any f (Text (V.PrimVector arr s l))
| l <= 0 = False
| otherwise = case decodeChar arr s of
(# x0, d #) -> go (f x0) (s+d)
where
!end = s+l
go !acc !i | acc = True
| i >= end = acc
| otherwise = case decodeChar arr i of
(# x, d #) -> go (acc || f x) (i+d)
all :: (Char -> Bool) -> Text -> Bool
{-# INLINE all #-}
all f (Text (V.PrimVector arr s l))
| l <= 0 = True
| otherwise = case decodeChar arr s of
(# x0, d #) -> go (f x0) (s+d)
where
!end = s+l
go !acc !i | not acc = False
| i >= end = acc
| otherwise = case decodeChar arr i of
(# x, d #) -> go (acc && f x) (i+d)
replicate :: Int -> Char -> Text
{-# INLINE replicate #-}
replicate 0 _ = empty
replicate n c = Text (V.create siz (go 0))
where
!csiz = encodeCharLength c
!siz = n * csiz
go :: Int -> MutablePrimArray s Word8 -> ST s ()
go 0 marr = encodeChar marr 0 c >> go csiz marr
go i marr | i >= siz = return ()
| otherwise = do copyChar' csiz marr i marr (i-csiz)
go (i+csiz) marr
cycleN :: Int -> Text -> Text
{-# INLINE cycleN #-}
cycleN 0 _ = empty
cycleN n (Text v) = Text (V.cycleN n v)
fromVector :: V.PrimVector Char -> Text
{-# INLINE fromVector #-}
fromVector (V.PrimVector arr s l) = Text (V.createN l (go s 0))
where
end = s+l
go :: forall s. Int -> Int -> MutablePrimArray s Word8 -> ST s Int
go !i !j !marr
| i >= end = return j
| otherwise = do
let c = indexPrimArray arr i
j' <- encodeChar marr j c
go (i+1) j' marr
toVector :: Text -> V.PrimVector Char
{-# INLINE toVector #-}
toVector (Text (V.PrimVector arr s l)) = V.createN (l*4) (go s 0)
where
end = s+l
go :: forall s. Int -> Int -> MutablePrimArray s Char -> ST s Int
go !i !j !marr
| i >= end = return j
| otherwise = do
let (# c, n #) = decodeChar arr i
writePrimArray marr j c
go (i+n) (j+1) marr
isNormalized :: Text -> NormalizationResult
{-# INLINE isNormalized #-}
isNormalized = isNormalizedTo NFC
isNormalizedTo :: NormalizeMode -> Text -> NormalizationResult
isNormalizedTo nmode (Text (V.PrimVector (PrimArray arr#) (I# s#) l@(I# l#)))
| l == 0 = NormalizedYes
| otherwise =
let nflag = normalizeModeToFlag nmode
in toNormalizationResult (utf8_isnormalized arr# s# l# nflag)
normalize :: Text -> Text
{-# INLINE normalize #-}
normalize = normalizeTo NFC
normalizeTo :: NormalizeMode -> Text -> Text
normalizeTo nmode (Text (V.PrimVector (PrimArray arr#) (I# s#) l@(I# l#)))
| l == 0 = empty
| otherwise = unsafeDupablePerformIO $ do
let nflag = normalizeModeToFlag nmode
!l'@(I# l'#) = utf8_normalize_length arr# s# l# nflag
when (l' < 0) (error "impossible happened!")
!pa@(MutablePrimArray marr#) <- newArr l'
utf8_normalize arr# s# l# marr# l'# nflag
arr' <- unsafeFreezeArr pa
let !v = V.fromArr arr' 0 l'
return (Text v)
foreign import ccall unsafe utf8_isnormalized ::
ByteArray# -> Int# -> Int# -> CSize -> Int
foreign import ccall unsafe utf8_normalize ::
ByteArray# -> Int# -> Int# -> MutableByteArray# RealWorld -> Int# -> CSize -> IO ()
foreign import ccall unsafe utf8_normalize_length ::
ByteArray# -> Int# -> Int# -> CSize -> Int
caseFold :: Text -> Text
caseFold = caseFoldWith LocaleDefault
caseFoldWith :: Locale -> Text -> Text
caseFoldWith locale (Text (V.PrimVector (PrimArray arr#) (I# s#) l@(I# l#)))
| l == 0 = empty
| otherwise = unsafeDupablePerformIO $ do
let !l'@(I# l'#) = utf8_casefold_length arr# s# l# locale
when (l' < 0) (error "impossible happened!")
!pa@(MutablePrimArray marr#) <- newArr l'
utf8_casefold arr# s# l# marr# l'# locale
arr' <- unsafeFreezeArr pa
let !v = V.fromArr arr' 0 l'
return (Text v)
toLower :: Text -> Text
toLower = toLowerWith LocaleDefault
toLowerWith :: Locale -> Text -> Text
toLowerWith locale (Text (V.PrimVector (PrimArray arr#) (I# s#) l@(I# l#)))
| l == 0 = empty
| otherwise = unsafeDupablePerformIO $ do
let !l'@(I# l'#) = utf8_tolower_length arr# s# l# locale
when (l' < 0) (error "impossible happened!")
!pa@(MutablePrimArray marr#) <- newArr l'
utf8_tolower arr# s# l# marr# l'# locale
arr' <- unsafeFreezeArr pa
let !v = V.fromArr arr' 0 l'
return (Text v)
toUpper :: Text -> Text
toUpper = toUpperWith LocaleDefault
toUpperWith :: Locale -> Text -> Text
toUpperWith locale (Text (V.PrimVector (PrimArray arr#) (I# s#) l@(I# l#)))
| l == 0 = empty
| otherwise = unsafeDupablePerformIO $ do
let !l'@(I# l'#) = utf8_toupper_length arr# s# l# locale
when (l' < 0) (error "impossible happened!")
!pa@(MutablePrimArray marr#) <- newArr l'
utf8_toupper arr# s# l# marr# l'# locale
arr' <- unsafeFreezeArr pa
let !v = V.fromArr arr' 0 l'
return (Text v)
toTitle :: Text -> Text
toTitle = toTitleWith LocaleDefault
toTitleWith :: Locale -> Text -> Text
toTitleWith locale (Text (V.PrimVector (PrimArray arr#) (I# s#) l@(I# l#)))
| l == 0 = empty
| otherwise = unsafeDupablePerformIO $ do
let !l'@(I# l'#) = utf8_totitle_length arr# s# l# locale
when (l' < 0) (error "impossible happened!")
!pa@(MutablePrimArray marr#) <- newArr l'
utf8_totitle arr# s# l# marr# l'# locale
arr' <- unsafeFreezeArr pa
let !v = V.fromArr arr' 0 l'
return (Text v)
foreign import ccall unsafe utf8_casefold ::
ByteArray# -> Int# -> Int# -> MutableByteArray# RealWorld -> Int# -> Locale -> IO ()
foreign import ccall unsafe utf8_casefold_length ::
ByteArray# -> Int# -> Int# -> Locale -> Int
foreign import ccall unsafe utf8_tolower ::
ByteArray# -> Int# -> Int# -> MutableByteArray# RealWorld -> Int# -> Locale -> IO ()
foreign import ccall unsafe utf8_tolower_length ::
ByteArray# -> Int# -> Int# -> Locale -> Int
foreign import ccall unsafe utf8_toupper ::
ByteArray# -> Int# -> Int# -> MutableByteArray# RealWorld -> Int# -> Locale -> IO ()
foreign import ccall unsafe utf8_toupper_length ::
ByteArray# -> Int# -> Int# -> Locale -> Int
foreign import ccall unsafe utf8_totitle ::
ByteArray# -> Int# -> Int# -> MutableByteArray# RealWorld -> Int# -> Locale -> IO ()
foreign import ccall unsafe utf8_totitle_length ::
ByteArray# -> Int# -> Int# -> Locale -> Int
isCategory :: Category -> Text -> Bool
isCategory c (Text (V.PrimVector (PrimArray arr#) (I# s#) l@(I# l#)))
| l == 0 = True
| otherwise = utf8_iscategory arr# s# l# c == l
spanCategory :: Category -> Text -> (Text, Text)
spanCategory c (Text (V.PrimVector arr@(PrimArray arr#) s@(I# s#) l@(I# l#)))
| l == 0 = (empty, empty)
| otherwise =
let i = utf8_iscategory arr# s# l# c
in (Text (V.PrimVector arr s i), Text (V.PrimVector arr (s+i) (l-i)))
foreign import ccall utf8_iscategory :: ByteArray# -> Int# -> Int# -> Category -> Int