module Data.Enumerator.Text
(
enumHandle
, enumFile
, iterHandle
, fold
, foldM
, Data.Enumerator.Text.map
, Data.Enumerator.Text.mapM
, Data.Enumerator.Text.mapM_
, Data.Enumerator.Text.concatMap
, concatMapM
, mapAccum
, mapAccumM
, concatMapAccum
, concatMapAccumM
, Data.Enumerator.Text.iterate
, iterateM
, Data.Enumerator.Text.repeat
, repeatM
, Data.Enumerator.Text.replicate
, replicateM
, generateM
, unfold
, unfoldM
, Data.Enumerator.Text.drop
, Data.Enumerator.Text.dropWhile
, Data.Enumerator.Text.filter
, filterM
, Data.Enumerator.Text.head
, head_
, Data.Enumerator.Text.take
, takeWhile
, consume
, zip
, zip3
, zip4
, zip5
, zip6
, zip7
, zipWith
, zipWith3
, zipWith4
, zipWith5
, zipWith6
, zipWith7
, require
, isolate
, isolateWhile
, splitWhen
, lines
, Codec
, encode
, decode
, utf8
, utf16_le
, utf16_be
, utf32_le
, utf32_be
, ascii
, iso8859_1
) where
import qualified Prelude
import Prelude hiding (head, drop, takeWhile, lines, zip, zip3, zipWith, zipWith3)
import Control.Arrow (first)
import qualified Control.Exception as Exc
import qualified Control.Monad as CM
import Control.Monad (liftM)
import Control.Monad.IO.Class (MonadIO)
import Control.Monad.Trans.Class (lift)
import Data.Bits ((.&.), (.|.), shiftL)
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import Data.Char (ord)
import Data.Maybe (catMaybes)
import Data.Monoid (mappend)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.IO as TIO
import qualified Data.Text.Lazy as TL
import Data.Word (Word8, Word16)
import qualified System.IO as IO
import System.IO.Error (isEOFError)
import System.IO.Unsafe (unsafePerformIO)
import Data.Enumerator.Internal
import Data.Enumerator (isEOF, tryIO, throwError)
import qualified Data.Enumerator.List as EL
import Data.Enumerator.Util (tSpanBy, tlSpanBy, reprWord, reprChar, textToStrict)
fold :: Monad m => (b -> Char -> b) -> b
-> Iteratee T.Text m b
fold step = EL.fold (T.foldl' step)
foldM :: Monad m => (b -> Char -> m b) -> b
-> Iteratee T.Text m b
foldM step = EL.foldM (\b txt -> CM.foldM step b (T.unpack txt))
unfold :: Monad m => (s -> Maybe (Char, s)) -> s -> Enumerator T.Text m b
unfold f = checkContinue1 $ \loop s k -> case f s of
Nothing -> continue k
Just (c, s') -> k (Chunks [T.singleton c]) >>== loop s'
unfoldM :: Monad m => (s -> m (Maybe (Char, s))) -> s -> Enumerator T.Text m b
unfoldM f = checkContinue1 $ \loop s k -> do
fs <- lift (f s)
case fs of
Nothing -> continue k
Just (c, s') -> k (Chunks [T.singleton c]) >>== loop s'
map :: Monad m => (Char -> Char) -> Enumeratee T.Text T.Text m b
map f = Data.Enumerator.Text.concatMap (\x -> T.singleton (f x))
mapM :: Monad m => (Char -> m Char) -> Enumeratee T.Text T.Text m b
mapM f = Data.Enumerator.Text.concatMapM (\x -> liftM T.singleton (f x))
mapM_ :: Monad m => (Char -> m ()) -> Iteratee T.Text m ()
mapM_ f = foldM (\_ x -> f x >> return ()) ()
concatMap :: Monad m => (Char -> T.Text) -> Enumeratee T.Text T.Text m b
concatMap f = Data.Enumerator.Text.concatMapM (return . f)
concatMapM :: Monad m => (Char -> m T.Text) -> Enumeratee T.Text T.Text m b
concatMapM f = checkDone (continue . step) where
step k EOF = yield (Continue k) EOF
step k (Chunks xs) = loop k (TL.unpack (TL.fromChunks xs))
loop k [] = continue (step k)
loop k (x:xs) = do
fx <- lift (f x)
k (Chunks [fx]) >>==
checkDoneEx (Chunks [T.pack xs]) (`loop` xs)
concatMapAccum :: Monad m => (s -> Char -> (s, T.Text)) -> s -> Enumeratee T.Text T.Text m b
concatMapAccum f s0 = checkDone (continue . step s0) where
step _ k EOF = yield (Continue k) EOF
step s k (Chunks xs) = loop s k xs
loop s k [] = continue (step s k)
loop s k (x:xs) = case T.uncons x of
Nothing -> loop s k xs
Just (c, x') -> case f s c of
(s', ai) -> k (Chunks [ai]) >>==
checkDoneEx (Chunks (x':xs)) (\k' -> loop s' k' (x':xs))
concatMapAccumM :: Monad m => (s -> Char -> m (s, T.Text)) -> s -> Enumeratee T.Text T.Text m b
concatMapAccumM f s0 = checkDone (continue . step s0) where
step _ k EOF = yield (Continue k) EOF
step s k (Chunks xs) = loop s k xs
loop s k [] = continue (step s k)
loop s k (x:xs) = case T.uncons x of
Nothing -> loop s k xs
Just (c, x') -> do
(s', ai) <- lift (f s c)
k (Chunks [ai]) >>==
checkDoneEx (Chunks (x':xs)) (\k' -> loop s' k' (x':xs))
mapAccum :: Monad m => (s -> Char -> (s, Char)) -> s -> Enumeratee T.Text T.Text m b
mapAccum f = concatMapAccum (\s c -> case f s c of (s', c') -> (s', T.singleton c'))
mapAccumM :: Monad m => (s -> Char -> m (s, Char)) -> s -> Enumeratee T.Text T.Text m b
mapAccumM f = concatMapAccumM (\s c -> do
(s', c') <- f s c
return (s', T.singleton c'))
iterate :: Monad m => (Char -> Char) -> Char -> Enumerator T.Text m b
iterate f = checkContinue1 $ \loop s k -> k (Chunks [T.singleton s]) >>== loop (f s)
iterateM :: Monad m => (Char -> m Char) -> Char -> Enumerator T.Text m b
iterateM f base = worker (return base) where
worker = checkContinue1 $ \loop m_char k -> do
char <- lift m_char
k (Chunks [T.singleton char]) >>== loop (f char)
repeat :: Monad m => Char -> Enumerator T.Text m b
repeat char = EL.repeat (T.singleton char)
repeatM :: Monad m => m Char -> Enumerator T.Text m b
repeatM next = EL.repeatM (liftM T.singleton next)
replicate :: Monad m => Integer -> Char -> Enumerator T.Text m b
replicate n byte = EL.replicate n (T.singleton byte)
replicateM :: Monad m => Integer -> m Char -> Enumerator T.Text m b
replicateM n next = EL.replicateM n (liftM T.singleton next)
generateM :: Monad m => m (Maybe Char) -> Enumerator T.Text m b
generateM next = EL.generateM (liftM (liftM T.singleton) next)
filter :: Monad m => (Char -> Bool) -> Enumeratee T.Text T.Text m b
filter p = Data.Enumerator.Text.concatMap (\x -> T.pack [x | p x])
filterM :: Monad m => (Char -> m Bool) -> Enumeratee T.Text T.Text m b
filterM p = Data.Enumerator.Text.concatMapM (\x -> liftM T.pack (CM.filterM p [x]))
take :: Monad m => Integer -> Iteratee T.Text m TL.Text
take n | n <= 0 = return TL.empty
take n = continue (loop id n) where
loop acc n' (Chunks xs) = iter where
lazy = TL.fromChunks xs
len = toInteger (TL.length lazy)
iter = if len < n'
then continue (loop (acc . TL.append lazy) (n' len))
else let
(xs', extra) = TL.splitAt (fromInteger n') lazy
in yield (acc xs') (toChunks extra)
loop acc _ EOF = yield (acc TL.empty) EOF
takeWhile :: Monad m => (Char -> Bool) -> Iteratee T.Text m TL.Text
takeWhile p = continue (loop id) where
loop acc (Chunks []) = continue (loop acc)
loop acc (Chunks xs) = iter where
lazy = TL.fromChunks xs
(xs', extra) = tlSpanBy p lazy
iter = if TL.null extra
then continue (loop (acc . TL.append lazy))
else yield (acc xs') (toChunks extra)
loop acc EOF = yield (acc TL.empty) EOF
consume :: Monad m => Iteratee T.Text m TL.Text
consume = continue (loop id) where
loop acc (Chunks []) = continue (loop acc)
loop acc (Chunks xs) = iter where
lazy = TL.fromChunks xs
iter = continue (loop (acc . TL.append lazy))
loop acc EOF = yield (acc TL.empty) EOF
zip :: Monad m
=> Iteratee T.Text m b1
-> Iteratee T.Text m b2
-> Iteratee T.Text m (b1, b2)
zip i1 i2 = continue step where
step (Chunks []) = continue step
step stream@(Chunks _) = do
let enumStream s = case s of
Continue k -> k stream
Yield b extra -> yield b (mappend extra stream)
Error err -> throwError err
s1 <- lift (runIteratee (enumStream ==<< i1))
s2 <- lift (runIteratee (enumStream ==<< i2))
case (s1, s2) of
(Continue k1, Continue k2) -> zip (continue k1) (continue k2)
(Yield b1 _, Continue k2) -> zip (yield b1 (Chunks [])) (continue k2)
(Continue k1, Yield b2 _) -> zip (continue k1) (yield b2 (Chunks []))
(Yield b1 ex1, Yield b2 ex2) -> yield (b1, b2) (shorter ex1 ex2)
(Error err, _) -> throwError err
(_, Error err) -> throwError err
step EOF = do
b1 <- enumEOF =<< lift (runIteratee i1)
b2 <- enumEOF =<< lift (runIteratee i2)
return (b1, b2)
shorter c1@(Chunks xs) c2@(Chunks ys) = let
xs' = T.concat xs
ys' = T.concat ys
in if T.length xs' < T.length ys'
then c1
else c2
shorter _ _ = EOF
zip3 :: Monad m
=> Iteratee T.Text m b1
-> Iteratee T.Text m b2
-> Iteratee T.Text m b3
-> Iteratee T.Text m (b1, b2, b3)
zip3 i1 i2 i3 = do
(b1, (b2, b3)) <- zip i1 (zip i2 i3)
return (b1, b2, b3)
zip4 :: Monad m
=> Iteratee T.Text m b1
-> Iteratee T.Text m b2
-> Iteratee T.Text m b3
-> Iteratee T.Text m b4
-> Iteratee T.Text m (b1, b2, b3, b4)
zip4 i1 i2 i3 i4 = do
(b1, (b2, b3, b4)) <- zip i1 (zip3 i2 i3 i4)
return (b1, b2, b3, b4)
zip5 :: Monad m
=> Iteratee T.Text m b1
-> Iteratee T.Text m b2
-> Iteratee T.Text m b3
-> Iteratee T.Text m b4
-> Iteratee T.Text m b5
-> Iteratee T.Text m (b1, b2, b3, b4, b5)
zip5 i1 i2 i3 i4 i5 = do
(b1, (b2, b3, b4, b5)) <- zip i1 (zip4 i2 i3 i4 i5)
return (b1, b2, b3, b4, b5)
zip6 :: Monad m
=> Iteratee T.Text m b1
-> Iteratee T.Text m b2
-> Iteratee T.Text m b3
-> Iteratee T.Text m b4
-> Iteratee T.Text m b5
-> Iteratee T.Text m b6
-> Iteratee T.Text m (b1, b2, b3, b4, b5, b6)
zip6 i1 i2 i3 i4 i5 i6 = do
(b1, (b2, b3, b4, b5, b6)) <- zip i1 (zip5 i2 i3 i4 i5 i6)
return (b1, b2, b3, b4, b5, b6)
zip7 :: Monad m
=> Iteratee T.Text m b1
-> Iteratee T.Text m b2
-> Iteratee T.Text m b3
-> Iteratee T.Text m b4
-> Iteratee T.Text m b5
-> Iteratee T.Text m b6
-> Iteratee T.Text m b7
-> Iteratee T.Text m (b1, b2, b3, b4, b5, b6, b7)
zip7 i1 i2 i3 i4 i5 i6 i7 = do
(b1, (b2, b3, b4, b5, b6, b7)) <- zip i1 (zip6 i2 i3 i4 i5 i6 i7)
return (b1, b2, b3, b4, b5, b6, b7)
zipWith :: Monad m
=> (b1 -> b2 -> c)
-> Iteratee T.Text m b1
-> Iteratee T.Text m b2
-> Iteratee T.Text m c
zipWith f i1 i2 = do
(b1, b2) <- zip i1 i2
return (f b1 b2)
zipWith3 :: Monad m
=> (b1 -> b2 -> b3 -> c)
-> Iteratee T.Text m b1
-> Iteratee T.Text m b2
-> Iteratee T.Text m b3
-> Iteratee T.Text m c
zipWith3 f i1 i2 i3 = do
(b1, b2, b3) <- zip3 i1 i2 i3
return (f b1 b2 b3)
zipWith4 :: Monad m
=> (b1 -> b2 -> b3 -> b4 -> c)
-> Iteratee T.Text m b1
-> Iteratee T.Text m b2
-> Iteratee T.Text m b3
-> Iteratee T.Text m b4
-> Iteratee T.Text m c
zipWith4 f i1 i2 i3 i4 = do
(b1, b2, b3, b4) <- zip4 i1 i2 i3 i4
return (f b1 b2 b3 b4)
zipWith5 :: Monad m
=> (b1 -> b2 -> b3 -> b4 -> b5 -> c)
-> Iteratee T.Text m b1
-> Iteratee T.Text m b2
-> Iteratee T.Text m b3
-> Iteratee T.Text m b4
-> Iteratee T.Text m b5
-> Iteratee T.Text m c
zipWith5 f i1 i2 i3 i4 i5 = do
(b1, b2, b3, b4, b5) <- zip5 i1 i2 i3 i4 i5
return (f b1 b2 b3 b4 b5)
zipWith6 :: Monad m
=> (b1 -> b2 -> b3 -> b4 -> b5 -> b6 -> c)
-> Iteratee T.Text m b1
-> Iteratee T.Text m b2
-> Iteratee T.Text m b3
-> Iteratee T.Text m b4
-> Iteratee T.Text m b5
-> Iteratee T.Text m b6
-> Iteratee T.Text m c
zipWith6 f i1 i2 i3 i4 i5 i6 = do
(b1, b2, b3, b4, b5, b6) <- zip6 i1 i2 i3 i4 i5 i6
return (f b1 b2 b3 b4 b5 b6)
zipWith7 :: Monad m
=> (b1 -> b2 -> b3 -> b4 -> b5 -> b6 -> b7 -> c)
-> Iteratee T.Text m b1
-> Iteratee T.Text m b2
-> Iteratee T.Text m b3
-> Iteratee T.Text m b4
-> Iteratee T.Text m b5
-> Iteratee T.Text m b6
-> Iteratee T.Text m b7
-> Iteratee T.Text m c
zipWith7 f i1 i2 i3 i4 i5 i6 i7 = do
(b1, b2, b3, b4, b5, b6, b7) <- zip7 i1 i2 i3 i4 i5 i6 i7
return (f b1 b2 b3 b4 b5 b6 b7)
head :: Monad m => Iteratee T.Text m (Maybe Char)
head = continue loop where
loop (Chunks xs) = case TL.uncons (TL.fromChunks xs) of
Just (char, extra) -> yield (Just char) (toChunks extra)
Nothing -> head
loop EOF = yield Nothing EOF
head_ :: Monad m => Iteratee T.Text m Char
head_ = head >>= \x -> case x of
Just x' -> return x'
Nothing -> throwError (Exc.ErrorCall "head_: stream has ended")
drop :: Monad m => Integer -> Iteratee T.Text m ()
drop n | n <= 0 = return ()
drop n = continue (loop n) where
loop n' (Chunks xs) = iter where
lazy = TL.fromChunks xs
len = toInteger (TL.length lazy)
iter = if len < n'
then drop (n' len)
else yield () (toChunks (TL.drop (fromInteger n') lazy))
loop _ EOF = yield () EOF
dropWhile :: Monad m => (Char -> Bool) -> Iteratee T.Text m ()
dropWhile p = continue loop where
loop (Chunks xs) = iter where
lazy = TL.dropWhile p (TL.fromChunks xs)
iter = if TL.null lazy
then continue loop
else yield () (toChunks lazy)
loop EOF = yield () EOF
require :: Monad m => Integer -> Iteratee T.Text m ()
require n | n <= 0 = return ()
require n = continue (loop id n) where
loop acc n' (Chunks xs) = iter where
lazy = TL.fromChunks xs
len = toInteger (TL.length lazy)
iter = if len < n'
then continue (loop (acc . TL.append lazy) (n' len))
else yield () (toChunks (acc lazy))
loop _ _ EOF = throwError (Exc.ErrorCall "require: Unexpected EOF")
isolate :: Monad m => Integer -> Enumeratee T.Text T.Text m b
isolate n step | n <= 0 = return step
isolate n (Continue k) = continue loop where
loop (Chunks []) = continue loop
loop (Chunks xs) = iter where
lazy = TL.fromChunks xs
len = toInteger (TL.length lazy)
iter = if len <= n
then k (Chunks xs) >>== isolate (n len)
else let
(s1, s2) = TL.splitAt (fromInteger n) lazy
in k (toChunks s1) >>== (`yield` toChunks s2)
loop EOF = k EOF >>== (`yield` EOF)
isolate n step = drop n >> return step
isolateWhile :: Monad m => (Char -> Bool) -> Enumeratee T.Text T.Text m b
isolateWhile p (Continue k) = continue loop where
loop (Chunks []) = continue loop
loop (Chunks xs) = iter where
lazy = TL.fromChunks xs
(s1, s2) = tlSpanBy p lazy
iter = if TL.null s2
then k (Chunks xs) >>== isolateWhile p
else k (toChunks s1) >>== (`yield` toChunks s2)
loop EOF = k EOF >>== (`yield` EOF)
isolateWhile p step = Data.Enumerator.Text.dropWhile p >> return step
splitWhen :: Monad m => (Char -> Bool) -> Enumeratee T.Text T.Text m b
splitWhen p = loop where
loop = checkDone step
step k = isEOF >>= \eof -> if eof
then yield (Continue k) EOF
else do
lazy <- takeWhile (not . p)
let text = textToStrict lazy
eof <- isEOF
drop 1
if TL.null lazy && eof
then yield (Continue k) EOF
else k (Chunks [text]) >>== loop
lines :: Monad m => Enumeratee T.Text T.Text m b
lines = splitWhen (== '\n')
enumHandle :: MonadIO m => IO.Handle
-> Enumerator T.Text m b
enumHandle h = checkContinue0 $ \loop k -> do
maybeText <- tryIO (textGetLine h)
case maybeText of
Nothing -> continue k
Just text -> k (Chunks [text]) >>== loop
textGetLine :: IO.Handle -> IO (Maybe T.Text)
textGetLine h = loop [] where
#if MIN_VERSION_base(4,2,0)
pack = T.pack
#else
pack = TE.decodeUtf8 . B8.pack
#endif
loop acc = Exc.catch
(do
c <- IO.hGetChar h
if c == '\n'
then return (Just (pack (reverse (c:acc))))
else loop (c:acc))
(\err -> if isEOFError err
then case acc of
[] -> return Nothing
_ -> return (Just (pack (reverse acc)))
else Exc.throwIO err)
enumFile :: FilePath -> Enumerator T.Text IO b
enumFile path step = do
h <- tryIO (IO.openFile path IO.ReadMode)
Iteratee $ Exc.finally
(runIteratee (enumHandle h step))
(IO.hClose h)
iterHandle :: MonadIO m => IO.Handle
-> Iteratee T.Text m ()
iterHandle h = continue step where
step EOF = yield () EOF
step (Chunks []) = continue step
step (Chunks chunks) = do
tryIO (CM.mapM_ (TIO.hPutStr h) chunks)
continue step
data Codec = Codec
{ codecName :: T.Text
, codecEncode
:: T.Text
-> (B.ByteString, Maybe (Exc.SomeException, T.Text))
, codecDecode
:: B.ByteString
-> (T.Text, Either
(Exc.SomeException, B.ByteString)
B.ByteString)
}
instance Show Codec where
showsPrec d c = showParen (d > 10) $
showString "Codec " . shows (codecName c)
encode :: Monad m => Codec
-> Enumeratee T.Text B.ByteString m b
encode codec = checkDone (continue . step) where
step k EOF = yield (Continue k) EOF
step k (Chunks xs) = loop k xs
loop k [] = continue (step k)
loop k (x:xs) = let
(bytes, extra) = codecEncode codec x
extraChunks = Chunks $ case extra of
Nothing -> xs
Just (_, text) -> text:xs
checkError k' = case extra of
Nothing -> loop k' xs
Just (exc, _) -> throwError exc
in if B.null bytes
then checkError k
else k (Chunks [bytes]) >>==
checkDoneEx extraChunks checkError
decode :: Monad m => Codec
-> Enumeratee B.ByteString T.Text m b
decode codec = checkDone (continue . step B.empty) where
step acc k EOF = if B.null acc
then yield (Continue k) EOF
else throwError (Exc.ErrorCall "Unexpected EOF while decoding")
step acc k (Chunks xs) = loop acc k xs
loop acc k [] = continue (step acc k)
loop acc k (x:xs) = let
(text, extra) = codecDecode codec (B.append acc x)
extraChunks = Chunks $ case extra of
Right bytes | B.null bytes -> xs
Right bytes -> bytes:xs
Left (_, bytes) -> bytes:xs
checkError k' = case extra of
Left (exc, _) -> throwError exc
Right bytes -> loop bytes k' xs
in if T.null text
then checkError k
else k (Chunks [text]) >>==
checkDoneEx extraChunks checkError
byteSplits :: B.ByteString
-> [(B.ByteString, B.ByteString)]
byteSplits bytes = loop (B.length bytes) where
loop 0 = [(B.empty, bytes)]
loop n = B.splitAt n bytes : loop (n 1)
splitSlowly :: (B.ByteString -> T.Text)
-> B.ByteString
-> (T.Text, Either
(Exc.SomeException, B.ByteString)
B.ByteString)
splitSlowly dec bytes = valid where
valid = firstValid (Prelude.map decFirst splits)
splits = byteSplits bytes
firstValid = Prelude.head . catMaybes
tryDec = tryEvaluate . dec
decFirst (a, b) = case tryDec a of
Left _ -> Nothing
Right text -> Just (text, case tryDec b of
Left exc -> Left (exc, b)
Right _ -> Right B.empty)
utf8 :: Codec
utf8 = Codec name enc dec where
name = T.pack "UTF-8"
enc text = (TE.encodeUtf8 text, Nothing)
dec bytes = case splitQuickly bytes of
Just (text, extra) -> (text, Right extra)
Nothing -> splitSlowly TE.decodeUtf8 bytes
splitQuickly bytes = loop 0 >>= maybeDecode where
required x0
| x0 .&. 0x80 == 0x00 = 1
| x0 .&. 0xE0 == 0xC0 = 2
| x0 .&. 0xF0 == 0xE0 = 3
| x0 .&. 0xF8 == 0xF0 = 4
| otherwise = 0
maxN = B.length bytes
loop n | n == maxN = Just (TE.decodeUtf8 bytes, B.empty)
loop n = let
req = required (B.index bytes n)
tooLong = first TE.decodeUtf8 (B.splitAt n bytes)
decodeMore = loop $! n + req
in if req == 0
then Nothing
else if n + req > maxN
then Just tooLong
else decodeMore
utf16_le :: Codec
utf16_le = Codec name enc dec where
name = T.pack "UTF-16-LE"
enc text = (TE.encodeUtf16LE text, Nothing)
dec bytes = case splitQuickly bytes of
Just (text, extra) -> (text, Right extra)
Nothing -> splitSlowly TE.decodeUtf16LE bytes
splitQuickly bytes = maybeDecode (loop 0) where
maxN = B.length bytes
loop n | n == maxN = decodeAll
| (n + 1) == maxN = decodeTo n
loop n = let
req = utf16Required
(B.index bytes n)
(B.index bytes (n + 1))
decodeMore = loop $! n + req
in if n + req > maxN
then decodeTo n
else decodeMore
decodeTo n = first TE.decodeUtf16LE (B.splitAt n bytes)
decodeAll = (TE.decodeUtf16LE bytes, B.empty)
utf16_be :: Codec
utf16_be = Codec name enc dec where
name = T.pack "UTF-16-BE"
enc text = (TE.encodeUtf16BE text, Nothing)
dec bytes = case splitQuickly bytes of
Just (text, extra) -> (text, Right extra)
Nothing -> splitSlowly TE.decodeUtf16BE bytes
splitQuickly bytes = maybeDecode (loop 0) where
maxN = B.length bytes
loop n | n == maxN = decodeAll
| (n + 1) == maxN = decodeTo n
loop n = let
req = utf16Required
(B.index bytes (n + 1))
(B.index bytes n)
decodeMore = loop $! n + req
in if n + req > maxN
then decodeTo n
else decodeMore
decodeTo n = first TE.decodeUtf16BE (B.splitAt n bytes)
decodeAll = (TE.decodeUtf16BE bytes, B.empty)
utf16Required :: Word8 -> Word8 -> Int
utf16Required x0 x1 = required where
required = if x >= 0xD800 && x <= 0xDBFF
then 4
else 2
x :: Word16
x = (fromIntegral x1 `shiftL` 8) .|. fromIntegral x0
utf32_le :: Codec
utf32_le = Codec name enc dec where
name = T.pack "UTF-32-LE"
enc text = (TE.encodeUtf32LE text, Nothing)
dec bs = case utf32SplitBytes TE.decodeUtf32LE bs of
Just (text, extra) -> (text, Right extra)
Nothing -> splitSlowly TE.decodeUtf32LE bs
utf32_be :: Codec
utf32_be = Codec name enc dec where
name = T.pack "UTF-32-BE"
enc text = (TE.encodeUtf32BE text, Nothing)
dec bs = case utf32SplitBytes TE.decodeUtf32BE bs of
Just (text, extra) -> (text, Right extra)
Nothing -> splitSlowly TE.decodeUtf32BE bs
utf32SplitBytes :: (B.ByteString -> T.Text)
-> B.ByteString
-> Maybe (T.Text, B.ByteString)
utf32SplitBytes dec bytes = split where
split = maybeDecode (dec toDecode, extra)
len = B.length bytes
lenExtra = mod len 4
lenToDecode = len lenExtra
(toDecode, extra) = if lenExtra == 0
then (bytes, B.empty)
else B.splitAt lenToDecode bytes
ascii :: Codec
ascii = Codec name enc dec where
name = T.pack "ASCII"
enc text = (bytes, extra) where
(safe, unsafe) = tSpanBy (\c -> ord c <= 0x7F) text
bytes = B8.pack (T.unpack safe)
extra = if T.null unsafe
then Nothing
else Just (illegalEnc name (T.head unsafe), unsafe)
dec bytes = (text, extra) where
(safe, unsafe) = B.span (<= 0x7F) bytes
text = T.pack (B8.unpack safe)
extra = if B.null unsafe
then Right B.empty
else Left (illegalDec name (B.head unsafe), unsafe)
iso8859_1 :: Codec
iso8859_1 = Codec name enc dec where
name = T.pack "ISO-8859-1"
enc text = (bytes, extra) where
(safe, unsafe) = tSpanBy (\c -> ord c <= 0xFF) text
bytes = B8.pack (T.unpack safe)
extra = if T.null unsafe
then Nothing
else Just (illegalEnc name (T.head unsafe), unsafe)
dec bytes = (T.pack (B8.unpack bytes), Right B.empty)
illegalEnc :: T.Text -> Char -> Exc.SomeException
illegalEnc name c = Exc.toException . Exc.ErrorCall $
concat [ "Codec "
, show name
, " can't encode character "
, reprChar c
]
illegalDec :: T.Text -> Word8 -> Exc.SomeException
illegalDec name w = Exc.toException . Exc.ErrorCall $
concat [ "Codec "
, show name
, " can't decode byte "
, reprWord w
]
tryEvaluate :: a -> Either Exc.SomeException a
tryEvaluate = unsafePerformIO . Exc.try . Exc.evaluate
maybeDecode:: (a, b) -> Maybe (a, b)
maybeDecode (a, b) = case tryEvaluate a of
Left _ -> Nothing
Right _ -> Just (a, b)
toChunks :: TL.Text -> Stream T.Text
toChunks = Chunks . TL.toChunks