{-# LANGUAGE DeriveDataTypeable, RankNTypes #-}
module Data.Conduit.Text
(
Codec
, encode
, decode
, utf8
, utf16_le
, utf16_be
, utf32_le
, utf32_be
, ascii
, iso8859_1
, lines
, linesBounded
, TextException (..)
, takeWhile
, dropWhile
, take
, drop
, foldLines
, withLine
, CC.decodeUtf8
, CC.decodeUtf8Lenient
, CC.encodeUtf8
, detectUtf
) where
import Prelude hiding (head, drop, takeWhile, lines, zip, zip3, zipWith, zipWith3, take, dropWhile)
import qualified Control.Exception as Exc
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import Data.Char (ord)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Data.Word (Word8)
import Data.Typeable (Typeable)
import Data.Conduit
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Combinators as CC
import Control.Monad.Trans.Class (lift)
import Control.Monad.Trans.Resource (MonadThrow, throwM)
import Control.Monad (unless)
import Data.Streaming.Text
data Codec = Codec
{ Codec -> Text
_codecName :: T.Text
, Codec -> Text -> (ByteString, Maybe (TextException, Text))
codecEncode
:: T.Text
-> (B.ByteString, Maybe (TextException, T.Text))
, Codec
-> ByteString
-> (Text, Either (TextException, ByteString) ByteString)
codecDecode
:: B.ByteString
-> (T.Text, Either
(TextException, B.ByteString)
B.ByteString)
}
| NewCodec T.Text (T.Text -> B.ByteString) (B.ByteString -> DecodeResult)
instance Show Codec where
showsPrec :: Int -> Codec -> ShowS
showsPrec Int
d Codec
c =
let (String
cnst, Text
name) = case Codec
c of
Codec Text
t Text -> (ByteString, Maybe (TextException, Text))
_ ByteString -> (Text, Either (TextException, ByteString) ByteString)
_ -> (String
"Codec ", Text
t)
NewCodec Text
t Text -> ByteString
_ ByteString -> DecodeResult
_ -> (String
"NewCodec ", Text
t)
in Bool -> ShowS -> ShowS
showParen (Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
10) (ShowS -> ShowS) -> ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ String -> ShowS
showString String
cnst ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ShowS
forall a. Show a => a -> ShowS
shows Text
name
lines :: Monad m => ConduitT T.Text T.Text m ()
lines :: ConduitT Text Text m ()
lines =
([Text] -> [Text]) -> ConduitT Text Text m ()
forall (m :: * -> *).
Monad m =>
([Text] -> [Text]) -> ConduitT Text Text m ()
awaitText [Text] -> [Text]
forall a. a -> a
id
where
awaitText :: ([Text] -> [Text]) -> ConduitT Text Text m ()
awaitText [Text] -> [Text]
front = ConduitT Text Text m (Maybe Text)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT Text Text m (Maybe Text)
-> (Maybe Text -> ConduitT Text Text m ())
-> ConduitT Text Text m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ConduitT Text Text m ()
-> (Text -> ConduitT Text Text m ())
-> Maybe Text
-> ConduitT Text Text m ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (([Text] -> [Text]) -> ConduitT Text Text m ()
forall (m :: * -> *) a i.
Monad m =>
([a] -> [Text]) -> ConduitT i Text m ()
finish [Text] -> [Text]
front) (([Text] -> [Text]) -> Text -> ConduitT Text Text m ()
process [Text] -> [Text]
front)
finish :: ([a] -> [Text]) -> ConduitT i Text m ()
finish [a] -> [Text]
front =
let t :: Text
t = [Text] -> Text
T.concat ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ [a] -> [Text]
front []
in Bool -> ConduitT i Text m () -> ConduitT i Text m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (Text -> Bool
T.null Text
t) (Text -> ConduitT i Text m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
t)
process :: ([Text] -> [Text]) -> Text -> ConduitT Text Text m ()
process [Text] -> [Text]
front Text
text =
let (Text
line, Text
rest) = (Char -> Bool) -> Text -> (Text, Text)
T.break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n') Text
text
in case Text -> Maybe (Char, Text)
T.uncons Text
rest of
Just (Char
_, Text
rest') -> do
Text -> ConduitT Text Text m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield ([Text] -> Text
T.concat ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ [Text] -> [Text]
front [Text
line])
([Text] -> [Text]) -> Text -> ConduitT Text Text m ()
process [Text] -> [Text]
forall a. a -> a
id Text
rest'
Maybe (Char, Text)
Nothing -> Bool -> ConduitT Text Text m () -> ConduitT Text Text m ()
forall a. (?callStack::CallStack) => Bool -> a -> a
Exc.assert (Text
line Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
text) (ConduitT Text Text m () -> ConduitT Text Text m ())
-> ConduitT Text Text m () -> ConduitT Text Text m ()
forall a b. (a -> b) -> a -> b
$ ([Text] -> [Text]) -> ConduitT Text Text m ()
awaitText (([Text] -> [Text]) -> ConduitT Text Text m ())
-> ([Text] -> [Text]) -> ConduitT Text Text m ()
forall a b. (a -> b) -> a -> b
$ [Text] -> [Text]
front ([Text] -> [Text]) -> ([Text] -> [Text]) -> [Text] -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text
lineText -> [Text] -> [Text]
forall a. a -> [a] -> [a]
:)
linesBounded :: MonadThrow m => Int -> ConduitT T.Text T.Text m ()
linesBounded :: Int -> ConduitT Text Text m ()
linesBounded Int
maxLineLen =
Int -> Text -> ConduitT Text Text m ()
forall (m :: * -> *).
MonadThrow m =>
Int -> Text -> ConduitT Text Text m ()
awaitText Int
0 Text
T.empty
where
awaitText :: Int -> Text -> ConduitT Text Text m ()
awaitText Int
len Text
buf = ConduitT Text Text m (Maybe Text)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT Text Text m (Maybe Text)
-> (Maybe Text -> ConduitT Text Text m ())
-> ConduitT Text Text m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ConduitT Text Text m ()
-> (Text -> ConduitT Text Text m ())
-> Maybe Text
-> ConduitT Text Text m ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Text -> ConduitT Text Text m ()
forall (m :: * -> *) i. Monad m => Text -> ConduitT i Text m ()
finish Text
buf) (Int -> Text -> Text -> ConduitT Text Text m ()
process Int
len Text
buf)
finish :: Text -> ConduitT i Text m ()
finish Text
buf = Bool -> ConduitT i Text m () -> ConduitT i Text m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (Text -> Bool
T.null Text
buf) (Text -> ConduitT i Text m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
buf)
process :: Int -> Text -> Text -> ConduitT Text Text m ()
process Int
len Text
buf Text
text =
let (Text
line, Text
rest) = (Char -> Bool) -> Text -> (Text, Text)
T.break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n') Text
text
len' :: Int
len' = Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Text -> Int
T.length Text
line
in if Int
len' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
maxLineLen
then m () -> ConduitT Text Text m ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m () -> ConduitT Text Text m ())
-> m () -> ConduitT Text Text m ()
forall a b. (a -> b) -> a -> b
$ TextException -> m ()
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (Int -> TextException
LengthExceeded Int
maxLineLen)
else case Text -> Maybe (Char, Text)
T.uncons Text
rest of
Just (Char
_, Text
rest') ->
Text -> ConduitT Text Text m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (Text
buf Text -> Text -> Text
`T.append` Text
line) ConduitT Text Text m ()
-> ConduitT Text Text m () -> ConduitT Text Text m ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int -> Text -> Text -> ConduitT Text Text m ()
process Int
0 Text
T.empty Text
rest'
Maybe (Char, Text)
_ ->
Int -> Text -> ConduitT Text Text m ()
awaitText Int
len' (Text -> ConduitT Text Text m ())
-> Text -> ConduitT Text Text m ()
forall a b. (a -> b) -> a -> b
$ Text
buf Text -> Text -> Text
`T.append` Text
text
encode :: MonadThrow m => Codec -> ConduitT T.Text B.ByteString m ()
encode :: Codec -> ConduitT Text ByteString m ()
encode (NewCodec Text
_ Text -> ByteString
enc ByteString -> DecodeResult
_) = (Text -> ByteString) -> ConduitT Text ByteString m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
CL.map Text -> ByteString
enc
encode Codec
codec = (Text -> m ByteString) -> ConduitT Text ByteString m ()
forall (m :: * -> *) a b.
Monad m =>
(a -> m b) -> ConduitT a b m ()
CL.mapM ((Text -> m ByteString) -> ConduitT Text ByteString m ())
-> (Text -> m ByteString) -> ConduitT Text ByteString m ()
forall a b. (a -> b) -> a -> b
$ \Text
t -> do
let (ByteString
bs, Maybe (TextException, Text)
mexc) = Codec -> Text -> (ByteString, Maybe (TextException, Text))
codecEncode Codec
codec Text
t
m ByteString
-> ((TextException, Text) -> m ByteString)
-> Maybe (TextException, Text)
-> m ByteString
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (ByteString -> m ByteString
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
bs) (TextException -> m ByteString
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (TextException -> m ByteString)
-> ((TextException, Text) -> TextException)
-> (TextException, Text)
-> m ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (TextException, Text) -> TextException
forall a b. (a, b) -> a
fst) Maybe (TextException, Text)
mexc
decodeNew
:: Monad m
=> (Int -> B.ByteString -> T.Text -> B.ByteString -> ConduitT B.ByteString T.Text m ())
-> t
-> Int
-> (B.ByteString -> DecodeResult)
-> ConduitT B.ByteString T.Text m ()
decodeNew :: (Int
-> ByteString
-> Text
-> ByteString
-> ConduitT ByteString Text m ())
-> t
-> Int
-> (ByteString -> DecodeResult)
-> ConduitT ByteString Text m ()
decodeNew Int
-> ByteString
-> Text
-> ByteString
-> ConduitT ByteString Text m ()
onFailure t
_name =
Int
-> (ByteString -> DecodeResult) -> ConduitT ByteString Text m ()
loop
where
loop :: Int
-> (ByteString -> DecodeResult) -> ConduitT ByteString Text m ()
loop Int
consumed ByteString -> DecodeResult
dec =
ConduitT ByteString Text m (Maybe ByteString)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT ByteString Text m (Maybe ByteString)
-> (Maybe ByteString -> ConduitT ByteString Text m ())
-> ConduitT ByteString Text m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ConduitT ByteString Text m ()
-> (ByteString -> ConduitT ByteString Text m ())
-> Maybe ByteString
-> ConduitT ByteString Text m ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe ConduitT ByteString Text m ()
finish ByteString -> ConduitT ByteString Text m ()
go
where
finish :: ConduitT ByteString Text m ()
finish =
case ByteString -> DecodeResult
dec ByteString
B.empty of
DecodeResultSuccess Text
_ ByteString -> DecodeResult
_ -> () -> ConduitT ByteString Text m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
DecodeResultFailure Text
t ByteString
rest -> Int
-> ByteString
-> Text
-> ByteString
-> ConduitT ByteString Text m ()
onFailure Int
consumed ByteString
B.empty Text
t ByteString
rest
{-# INLINE finish #-}
go :: ByteString -> ConduitT ByteString Text m ()
go ByteString
bs | ByteString -> Bool
B.null ByteString
bs = Int
-> (ByteString -> DecodeResult) -> ConduitT ByteString Text m ()
loop Int
consumed ByteString -> DecodeResult
dec
go ByteString
bs =
case ByteString -> DecodeResult
dec ByteString
bs of
DecodeResultSuccess Text
t ByteString -> DecodeResult
dec' -> do
let consumed' :: Int
consumed' = Int
consumed Int -> Int -> Int
forall a. Num a => a -> a -> a
+ ByteString -> Int
B.length ByteString
bs
next :: ConduitT ByteString Text m ()
next = do
Bool
-> ConduitT ByteString Text m () -> ConduitT ByteString Text m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (Text -> Bool
T.null Text
t) (Text -> ConduitT ByteString Text m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
t)
Int
-> (ByteString -> DecodeResult) -> ConduitT ByteString Text m ()
loop Int
consumed' ByteString -> DecodeResult
dec'
in Int
consumed' Int
-> ConduitT ByteString Text m () -> ConduitT ByteString Text m ()
`seq` ConduitT ByteString Text m ()
next
DecodeResultFailure Text
t ByteString
rest -> Int
-> ByteString
-> Text
-> ByteString
-> ConduitT ByteString Text m ()
onFailure Int
consumed ByteString
bs Text
t ByteString
rest
decode :: MonadThrow m => Codec -> ConduitT B.ByteString T.Text m ()
decode :: Codec -> ConduitT ByteString Text m ()
decode (NewCodec Text
name Text -> ByteString
_ ByteString -> DecodeResult
start) =
(Int
-> ByteString
-> Text
-> ByteString
-> ConduitT ByteString Text m ())
-> Text
-> Int
-> (ByteString -> DecodeResult)
-> ConduitT ByteString Text m ()
forall (m :: * -> *) t.
Monad m =>
(Int
-> ByteString
-> Text
-> ByteString
-> ConduitT ByteString Text m ())
-> t
-> Int
-> (ByteString -> DecodeResult)
-> ConduitT ByteString Text m ()
decodeNew Int
-> ByteString
-> Text
-> ByteString
-> ConduitT ByteString Text m ()
forall (m :: * -> *) b.
MonadThrow m =>
Int
-> ByteString -> Text -> ByteString -> ConduitT ByteString Text m b
onFailure Text
name Int
0 ByteString -> DecodeResult
start
where
onFailure :: Int
-> ByteString -> Text -> ByteString -> ConduitT ByteString Text m b
onFailure Int
consumed ByteString
bs Text
t ByteString
rest = do
Bool
-> ConduitT ByteString Text m () -> ConduitT ByteString Text m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (Text -> Bool
T.null Text
t) (Text -> ConduitT ByteString Text m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
t)
ByteString -> ConduitT ByteString Text m ()
forall i o (m :: * -> *). i -> ConduitT i o m ()
leftover ByteString
rest
let consumed' :: Int
consumed' = Int
consumed Int -> Int -> Int
forall a. Num a => a -> a -> a
+ ByteString -> Int
B.length ByteString
bs Int -> Int -> Int
forall a. Num a => a -> a -> a
- ByteString -> Int
B.length ByteString
rest
TextException -> ConduitT ByteString Text m b
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (TextException -> ConduitT ByteString Text m b)
-> TextException -> ConduitT ByteString Text m b
forall a b. (a -> b) -> a -> b
$ Text -> Int -> ByteString -> TextException
NewDecodeException Text
name Int
consumed' (Int -> ByteString -> ByteString
B.take Int
4 ByteString
rest)
{-# INLINE onFailure #-}
decode Codec
codec =
(ByteString -> ByteString) -> ConduitT ByteString Text m ()
forall (m :: * -> *).
MonadThrow m =>
(ByteString -> ByteString) -> ConduitT ByteString Text m ()
loop ByteString -> ByteString
forall a. a -> a
id
where
loop :: (ByteString -> ByteString) -> ConduitT ByteString Text m ()
loop ByteString -> ByteString
front = ConduitT ByteString Text m (Maybe ByteString)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT ByteString Text m (Maybe ByteString)
-> (Maybe ByteString -> ConduitT ByteString Text m ())
-> ConduitT ByteString Text m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ConduitT ByteString Text m ()
-> (ByteString -> ConduitT ByteString Text m ())
-> Maybe ByteString
-> ConduitT ByteString Text m ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe ((ByteString -> ByteString) -> ConduitT ByteString Text m ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *).
(MonadTrans t, Monad (t m), MonadThrow m) =>
(ByteString -> ByteString) -> t m ()
finish ByteString -> ByteString
front) ((ByteString -> ByteString)
-> ByteString -> ConduitT ByteString Text m ()
go ByteString -> ByteString
front)
finish :: (ByteString -> ByteString) -> t m ()
finish ByteString -> ByteString
front =
case ByteString -> Maybe (Word8, ByteString)
B.uncons (ByteString -> Maybe (Word8, ByteString))
-> ByteString -> Maybe (Word8, ByteString)
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
front ByteString
B.empty of
Maybe (Word8, ByteString)
Nothing -> () -> t m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
Just (Word8
w, ByteString
_) -> m () -> t m ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m () -> t m ()) -> m () -> t m ()
forall a b. (a -> b) -> a -> b
$ TextException -> m ()
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (TextException -> m ()) -> TextException -> m ()
forall a b. (a -> b) -> a -> b
$ Codec -> Word8 -> TextException
DecodeException Codec
codec Word8
w
go :: (ByteString -> ByteString)
-> ByteString -> ConduitT ByteString Text m ()
go ByteString -> ByteString
front ByteString
bs' =
case Either (TextException, ByteString) ByteString
extra of
Left (TextException
exc, ByteString
_) -> m () -> ConduitT ByteString Text m ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m () -> ConduitT ByteString Text m ())
-> m () -> ConduitT ByteString Text m ()
forall a b. (a -> b) -> a -> b
$ TextException -> m ()
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM TextException
exc
Right ByteString
bs'' -> Text -> ConduitT ByteString Text m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
text ConduitT ByteString Text m ()
-> ConduitT ByteString Text m () -> ConduitT ByteString Text m ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (ByteString -> ByteString) -> ConduitT ByteString Text m ()
loop (ByteString -> ByteString -> ByteString
B.append ByteString
bs'')
where
(Text
text, Either (TextException, ByteString) ByteString
extra) = Codec
-> ByteString
-> (Text, Either (TextException, ByteString) ByteString)
codecDecode Codec
codec ByteString
bs
bs :: ByteString
bs = ByteString -> ByteString
front ByteString
bs'
data TextException = DecodeException Codec Word8
| EncodeException Codec Char
| LengthExceeded Int
| TextException Exc.SomeException
| NewDecodeException !T.Text !Int !B.ByteString
deriving Typeable
instance Show TextException where
show :: TextException -> String
show (DecodeException Codec
codec Word8
w) = [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
[ String
"Error decoding legacy Data.Conduit.Text codec "
, Codec -> String
forall a. Show a => a -> String
show Codec
codec
, String
" when parsing byte: "
, Word8 -> String
forall a. Show a => a -> String
show Word8
w
]
show (EncodeException Codec
codec Char
c) = [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
[ String
"Error encoding legacy Data.Conduit.Text codec "
, Codec -> String
forall a. Show a => a -> String
show Codec
codec
, String
" when parsing char: "
, Char -> String
forall a. Show a => a -> String
show Char
c
]
show (LengthExceeded Int
i) = String
"Data.Conduit.Text.linesBounded: line too long: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
i
show (TextException SomeException
se) = String
"Data.Conduit.Text.TextException: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ SomeException -> String
forall a. Show a => a -> String
show SomeException
se
show (NewDecodeException Text
codec Int
consumed ByteString
next) = [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
[ String
"Data.Conduit.Text.decode: Error decoding stream of "
, Text -> String
T.unpack Text
codec
, String
" bytes. Error encountered in stream at offset "
, Int -> String
forall a. Show a => a -> String
show Int
consumed
, String
". Encountered at byte sequence "
, ByteString -> String
forall a. Show a => a -> String
show ByteString
next
]
instance Exc.Exception TextException
utf8 :: Codec
utf8 :: Codec
utf8 = Text
-> (Text -> ByteString) -> (ByteString -> DecodeResult) -> Codec
NewCodec (String -> Text
T.pack String
"UTF-8") Text -> ByteString
TE.encodeUtf8 ByteString -> DecodeResult
Data.Streaming.Text.decodeUtf8
utf16_le :: Codec
utf16_le :: Codec
utf16_le = Text
-> (Text -> ByteString) -> (ByteString -> DecodeResult) -> Codec
NewCodec (String -> Text
T.pack String
"UTF-16-LE") Text -> ByteString
TE.encodeUtf16LE ByteString -> DecodeResult
decodeUtf16LE
utf16_be :: Codec
utf16_be :: Codec
utf16_be = Text
-> (Text -> ByteString) -> (ByteString -> DecodeResult) -> Codec
NewCodec (String -> Text
T.pack String
"UTF-16-BE") Text -> ByteString
TE.encodeUtf16BE ByteString -> DecodeResult
decodeUtf16BE
utf32_le :: Codec
utf32_le :: Codec
utf32_le = Text
-> (Text -> ByteString) -> (ByteString -> DecodeResult) -> Codec
NewCodec (String -> Text
T.pack String
"UTF-32-LE") Text -> ByteString
TE.encodeUtf32LE ByteString -> DecodeResult
decodeUtf32LE
utf32_be :: Codec
utf32_be :: Codec
utf32_be = Text
-> (Text -> ByteString) -> (ByteString -> DecodeResult) -> Codec
NewCodec (String -> Text
T.pack String
"UTF-32-BE") Text -> ByteString
TE.encodeUtf32BE ByteString -> DecodeResult
decodeUtf32BE
ascii :: Codec
ascii :: Codec
ascii = Text
-> (Text -> (ByteString, Maybe (TextException, Text)))
-> (ByteString
-> (Text, Either (TextException, ByteString) ByteString))
-> Codec
Codec Text
name Text -> (ByteString, Maybe (TextException, Text))
enc ByteString -> (Text, Either (TextException, ByteString) ByteString)
dec where
name :: Text
name = String -> Text
T.pack String
"ASCII"
enc :: Text -> (ByteString, Maybe (TextException, Text))
enc Text
text = (ByteString
bytes, Maybe (TextException, Text)
extra) where
(Text
safe, Text
unsafe) = (Char -> Bool) -> Text -> (Text, Text)
T.span (\Char
c -> Char -> Int
ord Char
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x7F) Text
text
bytes :: ByteString
bytes = String -> ByteString
B8.pack (Text -> String
T.unpack Text
safe)
extra :: Maybe (TextException, Text)
extra = if Text -> Bool
T.null Text
unsafe
then Maybe (TextException, Text)
forall a. Maybe a
Nothing
else (TextException, Text) -> Maybe (TextException, Text)
forall a. a -> Maybe a
Just (Codec -> Char -> TextException
EncodeException Codec
ascii (Text -> Char
T.head Text
unsafe), Text
unsafe)
dec :: ByteString -> (Text, Either (TextException, ByteString) ByteString)
dec ByteString
bytes = (Text
text, Either (TextException, ByteString) ByteString
extra) where
(ByteString
safe, ByteString
unsafe) = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span (Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x7F) ByteString
bytes
text :: Text
text = String -> Text
T.pack (ByteString -> String
B8.unpack ByteString
safe)
extra :: Either (TextException, ByteString) ByteString
extra = if ByteString -> Bool
B.null ByteString
unsafe
then ByteString -> Either (TextException, ByteString) ByteString
forall a b. b -> Either a b
Right ByteString
B.empty
else (TextException, ByteString)
-> Either (TextException, ByteString) ByteString
forall a b. a -> Either a b
Left (Codec -> Word8 -> TextException
DecodeException Codec
ascii (ByteString -> Word8
B.head ByteString
unsafe), ByteString
unsafe)
iso8859_1 :: Codec
iso8859_1 :: Codec
iso8859_1 = Text
-> (Text -> (ByteString, Maybe (TextException, Text)))
-> (ByteString
-> (Text, Either (TextException, ByteString) ByteString))
-> Codec
Codec Text
name Text -> (ByteString, Maybe (TextException, Text))
enc ByteString -> (Text, Either (TextException, ByteString) ByteString)
forall a. ByteString -> (Text, Either a ByteString)
dec where
name :: Text
name = String -> Text
T.pack String
"ISO-8859-1"
enc :: Text -> (ByteString, Maybe (TextException, Text))
enc Text
text = (ByteString
bytes, Maybe (TextException, Text)
extra) where
(Text
safe, Text
unsafe) = (Char -> Bool) -> Text -> (Text, Text)
T.span (\Char
c -> Char -> Int
ord Char
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xFF) Text
text
bytes :: ByteString
bytes = String -> ByteString
B8.pack (Text -> String
T.unpack Text
safe)
extra :: Maybe (TextException, Text)
extra = if Text -> Bool
T.null Text
unsafe
then Maybe (TextException, Text)
forall a. Maybe a
Nothing
else (TextException, Text) -> Maybe (TextException, Text)
forall a. a -> Maybe a
Just (Codec -> Char -> TextException
EncodeException Codec
iso8859_1 (Text -> Char
T.head Text
unsafe), Text
unsafe)
dec :: ByteString -> (Text, Either a ByteString)
dec ByteString
bytes = (String -> Text
T.pack (ByteString -> String
B8.unpack ByteString
bytes), ByteString -> Either a ByteString
forall a b. b -> Either a b
Right ByteString
B.empty)
takeWhile :: Monad m
=> (Char -> Bool)
-> ConduitT T.Text T.Text m ()
takeWhile :: (Char -> Bool) -> ConduitT Text Text m ()
takeWhile Char -> Bool
p =
ConduitT Text Text m ()
loop
where
loop :: ConduitT Text Text m ()
loop = ConduitT Text Text m (Maybe Text)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT Text Text m (Maybe Text)
-> (Maybe Text -> ConduitT Text Text m ())
-> ConduitT Text Text m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ConduitT Text Text m ()
-> (Text -> ConduitT Text Text m ())
-> Maybe Text
-> ConduitT Text Text m ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (() -> ConduitT Text Text m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()) Text -> ConduitT Text Text m ()
go
go :: Text -> ConduitT Text Text m ()
go Text
t =
case (Char -> Bool) -> Text -> (Text, Text)
T.span Char -> Bool
p Text
t of
(Text
x, Text
y)
| Text -> Bool
T.null Text
y -> Text -> ConduitT Text Text m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
x ConduitT Text Text m ()
-> ConduitT Text Text m () -> ConduitT Text Text m ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ConduitT Text Text m ()
loop
| Bool
otherwise -> Text -> ConduitT Text Text m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
x ConduitT Text Text m ()
-> ConduitT Text Text m () -> ConduitT Text Text m ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> ConduitT Text Text m ()
forall i o (m :: * -> *). i -> ConduitT i o m ()
leftover Text
y
dropWhile :: Monad m
=> (Char -> Bool)
-> ConduitT T.Text o m ()
dropWhile :: (Char -> Bool) -> ConduitT Text o m ()
dropWhile Char -> Bool
p =
ConduitT Text o m ()
forall o. ConduitT Text o m ()
loop
where
loop :: ConduitT Text o m ()
loop = ConduitT Text o m (Maybe Text)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT Text o m (Maybe Text)
-> (Maybe Text -> ConduitT Text o m ()) -> ConduitT Text o m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ConduitT Text o m ()
-> (Text -> ConduitT Text o m ())
-> Maybe Text
-> ConduitT Text o m ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (() -> ConduitT Text o m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()) Text -> ConduitT Text o m ()
go
go :: Text -> ConduitT Text o m ()
go Text
t
| Text -> Bool
T.null Text
x = ConduitT Text o m ()
loop
| Bool
otherwise = Text -> ConduitT Text o m ()
forall i o (m :: * -> *). i -> ConduitT i o m ()
leftover Text
x
where
x :: Text
x = (Char -> Bool) -> Text -> Text
T.dropWhile Char -> Bool
p Text
t
take :: Monad m => Int -> ConduitT T.Text T.Text m ()
take :: Int -> ConduitT Text Text m ()
take =
Int -> ConduitT Text Text m ()
forall (m :: * -> *). Monad m => Int -> ConduitT Text Text m ()
loop
where
loop :: Int -> ConduitT Text Text m ()
loop Int
i = ConduitT Text Text m (Maybe Text)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT Text Text m (Maybe Text)
-> (Maybe Text -> ConduitT Text Text m ())
-> ConduitT Text Text m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ConduitT Text Text m ()
-> (Text -> ConduitT Text Text m ())
-> Maybe Text
-> ConduitT Text Text m ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (() -> ConduitT Text Text m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()) (Int -> Text -> ConduitT Text Text m ()
go Int
i)
go :: Int -> Text -> ConduitT Text Text m ()
go Int
i Text
t
| Int
diff Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Text -> ConduitT Text Text m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
t
| Int
diff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 =
let (Text
x, Text
y) = Int -> Text -> (Text, Text)
T.splitAt Int
i Text
t
in Text -> ConduitT Text Text m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
x ConduitT Text Text m ()
-> ConduitT Text Text m () -> ConduitT Text Text m ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> ConduitT Text Text m ()
forall i o (m :: * -> *). i -> ConduitT i o m ()
leftover Text
y
| Bool
otherwise = Text -> ConduitT Text Text m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
t ConduitT Text Text m ()
-> ConduitT Text Text m () -> ConduitT Text Text m ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int -> ConduitT Text Text m ()
loop Int
diff
where
diff :: Int
diff = Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Text -> Int
T.length Text
t
drop :: Monad m => Int -> ConduitT T.Text o m ()
drop :: Int -> ConduitT Text o m ()
drop =
Int -> ConduitT Text o m ()
forall (m :: * -> *) o. Monad m => Int -> ConduitT Text o m ()
loop
where
loop :: Int -> ConduitT Text o m ()
loop Int
i = ConduitT Text o m (Maybe Text)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT Text o m (Maybe Text)
-> (Maybe Text -> ConduitT Text o m ()) -> ConduitT Text o m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ConduitT Text o m ()
-> (Text -> ConduitT Text o m ())
-> Maybe Text
-> ConduitT Text o m ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (() -> ConduitT Text o m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()) (Int -> Text -> ConduitT Text o m ()
go Int
i)
go :: Int -> Text -> ConduitT Text o m ()
go Int
i Text
t
| Int
diff Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = () -> ConduitT Text o m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
| Int
diff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 = Text -> ConduitT Text o m ()
forall i o (m :: * -> *). i -> ConduitT i o m ()
leftover (Text -> ConduitT Text o m ()) -> Text -> ConduitT Text o m ()
forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
T.drop Int
i Text
t
| Bool
otherwise = Int -> ConduitT Text o m ()
loop Int
diff
where
diff :: Int
diff = Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Text -> Int
T.length Text
t
foldLines :: Monad m
=> (a -> ConduitM T.Text o m a)
-> a
-> ConduitT T.Text o m a
foldLines :: (a -> ConduitM Text o m a) -> a -> ConduitM Text o m a
foldLines a -> ConduitM Text o m a
f =
a -> ConduitM Text o m a
start
where
start :: a -> ConduitM Text o m a
start a
a = ConduitT Text o m (Maybe Text)
forall (m :: * -> *) a o. Monad m => ConduitT a o m (Maybe a)
CL.peek ConduitT Text o m (Maybe Text)
-> (Maybe Text -> ConduitM Text o m a) -> ConduitM Text o m a
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ConduitM Text o m a
-> (Text -> ConduitM Text o m a)
-> Maybe Text
-> ConduitM Text o m a
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (a -> ConduitM Text o m a
forall (m :: * -> *) a. Monad m => a -> m a
return a
a) (ConduitM Text o m a -> Text -> ConduitM Text o m a
forall a b. a -> b -> a
const (ConduitM Text o m a -> Text -> ConduitM Text o m a)
-> ConduitM Text o m a -> Text -> ConduitM Text o m a
forall a b. (a -> b) -> a -> b
$ ConduitM Text o m a -> ConduitM Text o m a
loop (ConduitM Text o m a -> ConduitM Text o m a)
-> ConduitM Text o m a -> ConduitM Text o m a
forall a b. (a -> b) -> a -> b
$ a -> ConduitM Text o m a
f a
a)
loop :: ConduitM Text o m a -> ConduitM Text o m a
loop ConduitM Text o m a
consumer = do
a
a <- (Char -> Bool) -> ConduitT Text Text m ()
forall (m :: * -> *).
Monad m =>
(Char -> Bool) -> ConduitT Text Text m ()
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\n') ConduitT Text Text m ()
-> ConduitM Text o m a -> ConduitM Text o m a
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| do
a
a <- (Text -> Text) -> ConduitT Text Text m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
CL.map ((Char -> Bool) -> Text -> Text
T.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\r')) ConduitT Text Text m ()
-> ConduitM Text o m a -> ConduitM Text o m a
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM Text o m a
consumer
ConduitT Text o m ()
forall (m :: * -> *) i o. Monad m => ConduitT i o m ()
CL.sinkNull
a -> ConduitM Text o m a
forall (m :: * -> *) a. Monad m => a -> m a
return a
a
Int -> ConduitT Text o m ()
forall (m :: * -> *) o. Monad m => Int -> ConduitT Text o m ()
drop Int
1
a -> ConduitM Text o m a
start a
a
withLine :: Monad m
=> ConduitT T.Text Void m a
-> ConduitT T.Text o m (Maybe a)
withLine :: ConduitT Text Void m a -> ConduitT Text o m (Maybe a)
withLine ConduitT Text Void m a
consumer = Sink Text m (Maybe a) -> Consumer Text m (Maybe a)
forall (m :: * -> *) a b. Monad m => Sink a m b -> Consumer a m b
toConsumer (Sink Text m (Maybe a) -> Consumer Text m (Maybe a))
-> Sink Text m (Maybe a) -> Consumer Text m (Maybe a)
forall a b. (a -> b) -> a -> b
$ do
Maybe Text
mx <- ConduitT Text Void m (Maybe Text)
forall (m :: * -> *) a o. Monad m => ConduitT a o m (Maybe a)
CL.peek
case Maybe Text
mx of
Maybe Text
Nothing -> Maybe a -> Sink Text m (Maybe a)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe a
forall a. Maybe a
Nothing
Just Text
_ -> do
a
x <- (Char -> Bool) -> ConduitT Text Text m ()
forall (m :: * -> *).
Monad m =>
(Char -> Bool) -> ConduitT Text Text m ()
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\n') ConduitT Text Text m ()
-> ConduitT Text Void m a -> ConduitT Text Void m a
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| do
a
x <- (Text -> Text) -> ConduitT Text Text m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
CL.map ((Char -> Bool) -> Text -> Text
T.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\r')) ConduitT Text Text m ()
-> ConduitT Text Void m a -> ConduitT Text Void m a
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitT Text Void m a
consumer
ConduitT Text Void m ()
forall (m :: * -> *) i o. Monad m => ConduitT i o m ()
CL.sinkNull
a -> ConduitT Text Void m a
forall (m :: * -> *) a. Monad m => a -> m a
return a
x
Int -> ConduitT Text Void m ()
forall (m :: * -> *) o. Monad m => Int -> ConduitT Text o m ()
drop Int
1
Maybe a -> Sink Text m (Maybe a)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe a -> Sink Text m (Maybe a))
-> Maybe a -> Sink Text m (Maybe a)
forall a b. (a -> b) -> a -> b
$ a -> Maybe a
forall a. a -> Maybe a
Just a
x
detectUtf :: MonadThrow m => ConduitT B.ByteString T.Text m ()
detectUtf :: ConduitT ByteString Text m ()
detectUtf =
(ByteString -> ByteString) -> ConduitT ByteString Text m ()
forall (m :: * -> *).
MonadThrow m =>
(ByteString -> ByteString) -> ConduitT ByteString Text m ()
go ByteString -> ByteString
forall a. a -> a
id
where
go :: (ByteString -> ByteString) -> ConduitT ByteString Text m ()
go ByteString -> ByteString
front = ConduitT ByteString Text m (Maybe ByteString)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT ByteString Text m (Maybe ByteString)
-> (Maybe ByteString -> ConduitT ByteString Text m ())
-> ConduitT ByteString Text m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ConduitT ByteString Text m ()
-> (ByteString -> ConduitT ByteString Text m ())
-> Maybe ByteString
-> ConduitT ByteString Text m ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe ((ByteString -> ByteString) -> ConduitT ByteString Text m ()
forall (m :: * -> *).
MonadThrow m =>
(ByteString -> ByteString) -> ConduitT ByteString Text m ()
close ByteString -> ByteString
front) ((ByteString -> ByteString)
-> ByteString -> ConduitT ByteString Text m ()
push ByteString -> ByteString
front)
push :: (ByteString -> ByteString)
-> ByteString -> ConduitT ByteString Text m ()
push ByteString -> ByteString
front ByteString
bs'
| ByteString -> Int
B.length ByteString
bs Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
4 = (ByteString -> ByteString) -> ConduitT ByteString Text m ()
go ((ByteString -> ByteString) -> ConduitT ByteString Text m ())
-> (ByteString -> ByteString) -> ConduitT ByteString Text m ()
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString -> ByteString
B.append ByteString
bs
| Bool
otherwise = ByteString -> ConduitT ByteString Text m ()
forall (m :: * -> *).
MonadThrow m =>
ByteString -> ConduitT ByteString Text m ()
leftDecode ByteString
bs
where bs :: ByteString
bs = ByteString -> ByteString
front ByteString
bs'
close :: (ByteString -> ByteString) -> ConduitT ByteString Text m ()
close ByteString -> ByteString
front = ByteString -> ConduitT ByteString Text m ()
forall (m :: * -> *).
MonadThrow m =>
ByteString -> ConduitT ByteString Text m ()
leftDecode (ByteString -> ConduitT ByteString Text m ())
-> ByteString -> ConduitT ByteString Text m ()
forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString
front ByteString
B.empty
leftDecode :: ByteString -> ConduitT ByteString Text m ()
leftDecode ByteString
bs = ByteString -> ConduitT ByteString Text m ()
forall i o (m :: * -> *). i -> ConduitT i o m ()
leftover ByteString
bsOut ConduitT ByteString Text m ()
-> ConduitT ByteString Text m () -> ConduitT ByteString Text m ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Codec -> ConduitT ByteString Text m ()
forall (m :: * -> *).
MonadThrow m =>
Codec -> ConduitT ByteString Text m ()
decode Codec
codec
where
bsOut :: ByteString
bsOut = ByteString -> ByteString -> ByteString
B.append (Int -> ByteString -> ByteString
B.drop Int
toDrop ByteString
x) ByteString
y
(ByteString
x, ByteString
y) = Int -> ByteString -> (ByteString, ByteString)
B.splitAt Int
4 ByteString
bs
(Int
toDrop, Codec
codec) =
case ByteString -> [Word8]
B.unpack ByteString
x of
[Word8
0x00, Word8
0x00, Word8
0xFE, Word8
0xFF] -> (Int
4, Codec
utf32_be)
[Word8
0xFF, Word8
0xFE, Word8
0x00, Word8
0x00] -> (Int
4, Codec
utf32_le)
Word8
0xFE : Word8
0xFF: [Word8]
_ -> (Int
2, Codec
utf16_be)
Word8
0xFF : Word8
0xFE: [Word8]
_ -> (Int
2, Codec
utf16_le)
Word8
0xEF : Word8
0xBB: Word8
0xBF : [Word8]
_ -> (Int
3, Codec
utf8)
[Word8]
_ -> (Int
0, Codec
utf8)
{-# INLINE detectUtf #-}