{-# LANGUAGE BangPatterns,CPP #-}
#if __GLASGOW_HASKELL__ >= 702
{-# LANGUAGE Trustworthy #-}
#endif
module Data.Text.Lazy.Encoding
(
decodeASCII
, decodeLatin1
, decodeUtf8
, decodeUtf16LE
, decodeUtf16BE
, decodeUtf32LE
, decodeUtf32BE
, decodeUtf8'
, decodeUtf8With
, decodeUtf16LEWith
, decodeUtf16BEWith
, decodeUtf32LEWith
, decodeUtf32BEWith
, encodeUtf8
, encodeUtf16LE
, encodeUtf16BE
, encodeUtf32LE
, encodeUtf32BE
, encodeUtf8Builder
, encodeUtf8BuilderEscaped
) where
import Control.Exception (evaluate, try)
import Data.Monoid (Monoid(..))
import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode)
import Data.Text.Internal.Lazy (Text(..), chunk, empty, foldrChunks)
import Data.Word (Word8)
import qualified Data.ByteString as S
import qualified Data.ByteString.Builder as B
import qualified Data.ByteString.Builder.Extra as B (safeStrategy, toLazyByteStringWith)
import qualified Data.ByteString.Builder.Prim as BP
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Internal as B
import qualified Data.ByteString.Unsafe as B
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Internal.Lazy.Encoding.Fusion as E
import qualified Data.Text.Internal.Lazy.Fusion as F
import Data.Text.Unsafe (unsafeDupablePerformIO)
decodeASCII :: B.ByteString -> Text
decodeASCII :: ByteString -> Text
decodeASCII = ByteString -> Text
decodeUtf8
{-# DEPRECATED decodeASCII "Use decodeUtf8 instead" #-}
decodeLatin1 :: B.ByteString -> Text
decodeLatin1 :: ByteString -> Text
decodeLatin1 = (ByteString -> Text -> Text) -> Text -> [ByteString] -> Text
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (Text -> Text -> Text
chunk (Text -> Text -> Text)
-> (ByteString -> Text) -> ByteString -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
TE.decodeLatin1) Text
empty ([ByteString] -> Text)
-> (ByteString -> [ByteString]) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
B.toChunks
decodeUtf8With :: OnDecodeError -> B.ByteString -> Text
decodeUtf8With :: OnDecodeError -> ByteString -> Text
decodeUtf8With OnDecodeError
onErr (B.Chunk ByteString
b0 ByteString
bs0) =
case OnDecodeError -> ByteString -> Decoding
TE.streamDecodeUtf8With OnDecodeError
onErr ByteString
b0 of
TE.Some Text
t ByteString
l ByteString -> Decoding
f -> Text -> Text -> Text
chunk Text
t ((ByteString -> Decoding) -> ByteString -> ByteString -> Text
go ByteString -> Decoding
f ByteString
l ByteString
bs0)
where
go :: (ByteString -> Decoding) -> ByteString -> ByteString -> Text
go ByteString -> Decoding
f0 ByteString
_ (B.Chunk ByteString
b ByteString
bs) =
case ByteString -> Decoding
f0 ByteString
b of
TE.Some Text
t ByteString
l ByteString -> Decoding
f -> Text -> Text -> Text
chunk Text
t ((ByteString -> Decoding) -> ByteString -> ByteString -> Text
go ByteString -> Decoding
f ByteString
l ByteString
bs)
go ByteString -> Decoding
_ ByteString
l ByteString
_
| ByteString -> Bool
S.null ByteString
l = Text
empty
| Bool
otherwise = case OnDecodeError
onErr [Char]
desc (Word8 -> Maybe Word8
forall a. a -> Maybe a
Just (ByteString -> Word8
B.unsafeHead ByteString
l)) of
Maybe Char
Nothing -> Text
empty
Just Char
c -> Text -> Text -> Text
Chunk (Char -> Text
T.singleton Char
c) Text
Empty
desc :: [Char]
desc = [Char]
"Data.Text.Lazy.Encoding.decodeUtf8With: Invalid UTF-8 stream"
decodeUtf8With OnDecodeError
_ ByteString
_ = Text
empty
decodeUtf8 :: B.ByteString -> Text
decodeUtf8 :: ByteString -> Text
decodeUtf8 = OnDecodeError -> ByteString -> Text
decodeUtf8With OnDecodeError
strictDecode
{-# INLINE[0] decodeUtf8 #-}
decodeUtf8' :: B.ByteString -> Either UnicodeException Text
decodeUtf8' :: ByteString -> Either UnicodeException Text
decodeUtf8' ByteString
bs = IO (Either UnicodeException Text) -> Either UnicodeException Text
forall a. IO a -> a
unsafeDupablePerformIO (IO (Either UnicodeException Text) -> Either UnicodeException Text)
-> IO (Either UnicodeException Text)
-> Either UnicodeException Text
forall a b. (a -> b) -> a -> b
$ do
let t :: Text
t = ByteString -> Text
decodeUtf8 ByteString
bs
IO Text -> IO (Either UnicodeException Text)
forall e a. Exception e => IO a -> IO (Either e a)
try (Text -> IO Text
forall a. a -> IO a
evaluate (Text -> ()
rnf Text
t () -> Text -> Text
`seq` Text
t))
where
rnf :: Text -> ()
rnf Text
Empty = ()
rnf (Chunk Text
_ Text
ts) = Text -> ()
rnf Text
ts
{-# INLINE decodeUtf8' #-}
encodeUtf8 :: Text -> B.ByteString
encodeUtf8 :: Text -> ByteString
encodeUtf8 Text
Empty = ByteString
B.empty
encodeUtf8 lt :: Text
lt@(Chunk Text
t Text
_) =
AllocationStrategy -> ByteString -> Builder -> ByteString
B.toLazyByteStringWith AllocationStrategy
strategy ByteString
B.empty (Builder -> ByteString) -> Builder -> ByteString
forall a b. (a -> b) -> a -> b
$ Text -> Builder
encodeUtf8Builder Text
lt
where
firstChunkSize :: Int
firstChunkSize = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
B.smallChunkSize (Int
4 Int -> Int -> Int
forall a. Num a => a -> a -> a
* (Text -> Int
T.length Text
t Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1))
strategy :: AllocationStrategy
strategy = Int -> Int -> AllocationStrategy
B.safeStrategy Int
firstChunkSize Int
B.defaultChunkSize
encodeUtf8Builder :: Text -> B.Builder
encodeUtf8Builder :: Text -> Builder
encodeUtf8Builder =
(Text -> Builder -> Builder) -> Builder -> Text -> Builder
forall a. (Text -> a -> a) -> a -> Text -> a
foldrChunks (\Text
c Builder
b -> Text -> Builder
TE.encodeUtf8Builder Text
c Builder -> Builder -> Builder
forall a. Monoid a => a -> a -> a
`mappend` Builder
b) Builder
forall a. Monoid a => a
Data.Monoid.mempty
{-# INLINE encodeUtf8BuilderEscaped #-}
encodeUtf8BuilderEscaped :: BP.BoundedPrim Word8 -> Text -> B.Builder
encodeUtf8BuilderEscaped :: BoundedPrim Word8 -> Text -> Builder
encodeUtf8BuilderEscaped BoundedPrim Word8
prim =
(Text -> Builder -> Builder) -> Builder -> Text -> Builder
forall a. (Text -> a -> a) -> a -> Text -> a
foldrChunks (\Text
c Builder
b -> BoundedPrim Word8 -> Text -> Builder
TE.encodeUtf8BuilderEscaped BoundedPrim Word8
prim Text
c Builder -> Builder -> Builder
forall a. Monoid a => a -> a -> a
`mappend` Builder
b) Builder
forall a. Monoid a => a
mempty
decodeUtf16LEWith :: OnDecodeError -> B.ByteString -> Text
decodeUtf16LEWith :: OnDecodeError -> ByteString -> Text
decodeUtf16LEWith OnDecodeError
onErr ByteString
bs = Stream Char -> Text
F.unstream (OnDecodeError -> ByteString -> Stream Char
E.streamUtf16LE OnDecodeError
onErr ByteString
bs)
{-# INLINE decodeUtf16LEWith #-}
decodeUtf16LE :: B.ByteString -> Text
decodeUtf16LE :: ByteString -> Text
decodeUtf16LE = OnDecodeError -> ByteString -> Text
decodeUtf16LEWith OnDecodeError
strictDecode
{-# INLINE decodeUtf16LE #-}
decodeUtf16BEWith :: OnDecodeError -> B.ByteString -> Text
decodeUtf16BEWith :: OnDecodeError -> ByteString -> Text
decodeUtf16BEWith OnDecodeError
onErr ByteString
bs = Stream Char -> Text
F.unstream (OnDecodeError -> ByteString -> Stream Char
E.streamUtf16BE OnDecodeError
onErr ByteString
bs)
{-# INLINE decodeUtf16BEWith #-}
decodeUtf16BE :: B.ByteString -> Text
decodeUtf16BE :: ByteString -> Text
decodeUtf16BE = OnDecodeError -> ByteString -> Text
decodeUtf16BEWith OnDecodeError
strictDecode
{-# INLINE decodeUtf16BE #-}
encodeUtf16LE :: Text -> B.ByteString
encodeUtf16LE :: Text -> ByteString
encodeUtf16LE Text
txt = [ByteString] -> ByteString
B.fromChunks ((Text -> [ByteString] -> [ByteString])
-> [ByteString] -> Text -> [ByteString]
forall a. (Text -> a -> a) -> a -> Text -> a
foldrChunks ((:) (ByteString -> [ByteString] -> [ByteString])
-> (Text -> ByteString) -> Text -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
TE.encodeUtf16LE) [] Text
txt)
{-# INLINE encodeUtf16LE #-}
encodeUtf16BE :: Text -> B.ByteString
encodeUtf16BE :: Text -> ByteString
encodeUtf16BE Text
txt = [ByteString] -> ByteString
B.fromChunks ((Text -> [ByteString] -> [ByteString])
-> [ByteString] -> Text -> [ByteString]
forall a. (Text -> a -> a) -> a -> Text -> a
foldrChunks ((:) (ByteString -> [ByteString] -> [ByteString])
-> (Text -> ByteString) -> Text -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
TE.encodeUtf16BE) [] Text
txt)
{-# INLINE encodeUtf16BE #-}
decodeUtf32LEWith :: OnDecodeError -> B.ByteString -> Text
decodeUtf32LEWith :: OnDecodeError -> ByteString -> Text
decodeUtf32LEWith OnDecodeError
onErr ByteString
bs = Stream Char -> Text
F.unstream (OnDecodeError -> ByteString -> Stream Char
E.streamUtf32LE OnDecodeError
onErr ByteString
bs)
{-# INLINE decodeUtf32LEWith #-}
decodeUtf32LE :: B.ByteString -> Text
decodeUtf32LE :: ByteString -> Text
decodeUtf32LE = OnDecodeError -> ByteString -> Text
decodeUtf32LEWith OnDecodeError
strictDecode
{-# INLINE decodeUtf32LE #-}
decodeUtf32BEWith :: OnDecodeError -> B.ByteString -> Text
decodeUtf32BEWith :: OnDecodeError -> ByteString -> Text
decodeUtf32BEWith OnDecodeError
onErr ByteString
bs = Stream Char -> Text
F.unstream (OnDecodeError -> ByteString -> Stream Char
E.streamUtf32BE OnDecodeError
onErr ByteString
bs)
{-# INLINE decodeUtf32BEWith #-}
decodeUtf32BE :: B.ByteString -> Text
decodeUtf32BE :: ByteString -> Text
decodeUtf32BE = OnDecodeError -> ByteString -> Text
decodeUtf32BEWith OnDecodeError
strictDecode
{-# INLINE decodeUtf32BE #-}
encodeUtf32LE :: Text -> B.ByteString
encodeUtf32LE :: Text -> ByteString
encodeUtf32LE Text
txt = [ByteString] -> ByteString
B.fromChunks ((Text -> [ByteString] -> [ByteString])
-> [ByteString] -> Text -> [ByteString]
forall a. (Text -> a -> a) -> a -> Text -> a
foldrChunks ((:) (ByteString -> [ByteString] -> [ByteString])
-> (Text -> ByteString) -> Text -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
TE.encodeUtf32LE) [] Text
txt)
{-# INLINE encodeUtf32LE #-}
encodeUtf32BE :: Text -> B.ByteString
encodeUtf32BE :: Text -> ByteString
encodeUtf32BE Text
txt = [ByteString] -> ByteString
B.fromChunks ((Text -> [ByteString] -> [ByteString])
-> [ByteString] -> Text -> [ByteString]
forall a. (Text -> a -> a) -> a -> Text -> a
foldrChunks ((:) (ByteString -> [ByteString] -> [ByteString])
-> (Text -> ByteString) -> Text -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
TE.encodeUtf32BE) [] Text
txt)
{-# INLINE encodeUtf32BE #-}