{-# LANGUAGE CPP #-}
module Web.Mangrove.Parse.Encoding.Preprocess
( preprocess
, preprocessStep
, Encoding ( .. )
, DecoderState
, initialDecoderState
) where
import qualified Data.Bifunctor as F.B
import qualified Data.ByteString as BS
import qualified Data.ByteString.Short as BS.SH
import qualified Data.Tuple.HT as U.HT
#if MIN_VERSION_base(4,11,0)
#else
import Data.Semigroup ( (<>) )
#endif
import Web.Mangrove.Parse.Common.Error
import Web.Willow.Common.Encoding
import Web.Willow.Common.Encoding.Character
import Web.Willow.Common.Parser.Util
preprocess :: DecoderState -> BS.ByteString -> ([([ParseError], Char)], DecoderState)
preprocess :: DecoderState
-> ByteString -> ([([ParseError], Char)], DecoderState)
preprocess DecoderState
state = ([Either ShortByteString String] -> [([ParseError], Char)])
-> ([Either ShortByteString String], DecoderState)
-> ([([ParseError], Char)], DecoderState)
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
F.B.first ([([ParseError], Char)] -> [([ParseError], Char)]
forall a. Semigroup a => [(a, Char)] -> [(a, Char)]
normalize ([([ParseError], Char)] -> [([ParseError], Char)])
-> ([Either ShortByteString String] -> [([ParseError], Char)])
-> [Either ShortByteString String]
-> [([ParseError], Char)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (([ParseError], Char) -> ([ParseError], Char))
-> [([ParseError], Char)] -> [([ParseError], Char)]
forall a b. (a -> b) -> [a] -> [b]
map ([ParseError], Char) -> ([ParseError], Char)
charError ([([ParseError], Char)] -> [([ParseError], Char)])
-> ([Either ShortByteString String] -> [([ParseError], Char)])
-> [Either ShortByteString String]
-> [([ParseError], Char)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Either ShortByteString String -> [([ParseError], Char)])
-> [Either ShortByteString String] -> [([ParseError], Char)]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Either ShortByteString String -> [([ParseError], Char)]
flatten) (([Either ShortByteString String], DecoderState)
-> ([([ParseError], Char)], DecoderState))
-> (ByteString -> ([Either ShortByteString String], DecoderState))
-> ByteString
-> ([([ParseError], Char)], DecoderState)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DecoderState
-> ByteString -> ([Either ShortByteString String], DecoderState)
decode DecoderState
state
where normalize :: [(a, Char)] -> [(a, Char)]
normalize ((a
err1, Char
'\r'):(a
err2, Char
'\n'):[(a, Char)]
cs) = (a
err1 a -> a -> a
forall a. Semigroup a => a -> a -> a
<> a
err2, Char
'\n') (a, Char) -> [(a, Char)] -> [(a, Char)]
forall a. a -> [a] -> [a]
: [(a, Char)] -> [(a, Char)]
normalize [(a, Char)]
cs
normalize ((a
err, Char
'\r'):[(a, Char)]
cs) = (a
err, Char
'\n') (a, Char) -> [(a, Char)] -> [(a, Char)]
forall a. a -> [a] -> [a]
: [(a, Char)] -> [(a, Char)]
normalize [(a, Char)]
cs
normalize ((a, Char)
c:[(a, Char)]
cs) = (a, Char)
c (a, Char) -> [(a, Char)] -> [(a, Char)]
forall a. a -> [a] -> [a]
: [(a, Char)] -> [(a, Char)]
normalize [(a, Char)]
cs
normalize [] = []
preprocessStep
:: DecoderState
-> BS.ByteString
-> ([([ParseError], Char)], DecoderState, BS.ByteString)
preprocessStep :: DecoderState
-> ByteString -> ([([ParseError], Char)], DecoderState, ByteString)
preprocessStep DecoderState
state ByteString
stream = ([([ParseError], Char)], DecoderState, ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString)
normalize' (([([ParseError], Char)], DecoderState, ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString))
-> ((Maybe (Either ShortByteString String), DecoderState,
ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString))
-> (Maybe (Either ShortByteString String), DecoderState,
ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Maybe (Either ShortByteString String), DecoderState, ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString)
forall b c.
(Maybe (Either ShortByteString String), b, c)
-> ([([ParseError], Char)], b, c)
flatten' ((Maybe (Either ShortByteString String), DecoderState, ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString))
-> (Maybe (Either ShortByteString String), DecoderState,
ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString)
forall a b. (a -> b) -> a -> b
$ DecoderState
-> ByteString
-> (Maybe (Either ShortByteString String), DecoderState,
ByteString)
decodeStep DecoderState
state ByteString
stream
where flatten' :: (Maybe (Either ShortByteString String), b, c)
-> ([([ParseError], Char)], b, c)
flatten' = (Maybe (Either ShortByteString String) -> [([ParseError], Char)])
-> (Maybe (Either ShortByteString String), b, c)
-> ([([ParseError], Char)], b, c)
forall a d b c. (a -> d) -> (a, b, c) -> (d, b, c)
U.HT.mapFst3 ([([ParseError], Char)]
-> (Either ShortByteString String -> [([ParseError], Char)])
-> Maybe (Either ShortByteString String)
-> [([ParseError], Char)]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [] ((Either ShortByteString String -> [([ParseError], Char)])
-> Maybe (Either ShortByteString String) -> [([ParseError], Char)])
-> (Either ShortByteString String -> [([ParseError], Char)])
-> Maybe (Either ShortByteString String)
-> [([ParseError], Char)]
forall a b. (a -> b) -> a -> b
$ (([ParseError], Char) -> ([ParseError], Char))
-> [([ParseError], Char)] -> [([ParseError], Char)]
forall a b. (a -> b) -> [a] -> [b]
map ([ParseError], Char) -> ([ParseError], Char)
charError ([([ParseError], Char)] -> [([ParseError], Char)])
-> (Either ShortByteString String -> [([ParseError], Char)])
-> Either ShortByteString String
-> [([ParseError], Char)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Either ShortByteString String -> [([ParseError], Char)]
flatten)
normalize' :: ([([ParseError], Char)], DecoderState, ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString)
normalize' c' :: ([([ParseError], Char)], DecoderState, ByteString)
c'@([], DecoderState
_, ByteString
_) = ([([ParseError], Char)], DecoderState, ByteString)
c'
normalize' ([([ParseError]
errs, Char
'\r')], DecoderState
state', ByteString
stream') = case (Maybe (Either ShortByteString String), DecoderState, ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString)
forall b c.
(Maybe (Either ShortByteString String), b, c)
-> ([([ParseError], Char)], b, c)
flatten' ((Maybe (Either ShortByteString String), DecoderState, ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString))
-> (Maybe (Either ShortByteString String), DecoderState,
ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString)
forall a b. (a -> b) -> a -> b
$ DecoderState
-> ByteString
-> (Maybe (Either ShortByteString String), DecoderState,
ByteString)
decodeStep DecoderState
state' ByteString
stream' of
(([ParseError]
errs', Char
'\n'):[([ParseError], Char)]
cs, DecoderState
state'', ByteString
stream'') ->
([([ParseError], Char)], DecoderState, ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString)
normalize' (([ParseError]
errs [ParseError] -> [ParseError] -> [ParseError]
forall a. [a] -> [a] -> [a]
++ [ParseError]
errs', Char
'\n') ([ParseError], Char)
-> [([ParseError], Char)] -> [([ParseError], Char)]
forall a. a -> [a] -> [a]
: [([ParseError], Char)]
cs, DecoderState
state'', ByteString
stream'')
([([ParseError], Char)], DecoderState, ByteString)
_ -> ([([ParseError]
errs, Char
'\n')], DecoderState
state', ByteString
stream')
normalize' (([ParseError]
errs, Char
'\r'):([ParseError]
errs', Char
'\n'):[([ParseError], Char)]
cs, DecoderState
state', ByteString
stream') =
([([ParseError], Char)] -> [([ParseError], Char)])
-> ([([ParseError], Char)], DecoderState, ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString)
forall a d b c. (a -> d) -> (a, b, c) -> (d, b, c)
U.HT.mapFst3 (([ParseError]
errs [ParseError] -> [ParseError] -> [ParseError]
forall a. [a] -> [a] -> [a]
++ [ParseError]
errs', Char
'\n') ([ParseError], Char)
-> [([ParseError], Char)] -> [([ParseError], Char)]
forall a. a -> [a] -> [a]
:) (([([ParseError], Char)], DecoderState, ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString))
-> ([([ParseError], Char)], DecoderState, ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString)
forall a b. (a -> b) -> a -> b
$ ([([ParseError], Char)], DecoderState, ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString)
normalize' ([([ParseError], Char)]
cs, DecoderState
state', ByteString
stream')
normalize' (([ParseError]
errs, Char
'\r'):[([ParseError], Char)]
cs, DecoderState
state', ByteString
stream') =
([([ParseError], Char)] -> [([ParseError], Char)])
-> ([([ParseError], Char)], DecoderState, ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString)
forall a d b c. (a -> d) -> (a, b, c) -> (d, b, c)
U.HT.mapFst3 (([ParseError]
errs, Char
'\n') ([ParseError], Char)
-> [([ParseError], Char)] -> [([ParseError], Char)]
forall a. a -> [a] -> [a]
:) (([([ParseError], Char)], DecoderState, ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString))
-> ([([ParseError], Char)], DecoderState, ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString)
forall a b. (a -> b) -> a -> b
$ ([([ParseError], Char)], DecoderState, ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString)
normalize' ([([ParseError], Char)]
cs, DecoderState
state', ByteString
stream')
normalize' (([ParseError], Char)
c:[([ParseError], Char)]
cs, DecoderState
state', ByteString
stream') =
([([ParseError], Char)] -> [([ParseError], Char)])
-> ([([ParseError], Char)], DecoderState, ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString)
forall a d b c. (a -> d) -> (a, b, c) -> (d, b, c)
U.HT.mapFst3 (([ParseError], Char)
c ([ParseError], Char)
-> [([ParseError], Char)] -> [([ParseError], Char)]
forall a. a -> [a] -> [a]
:) (([([ParseError], Char)], DecoderState, ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString))
-> ([([ParseError], Char)], DecoderState, ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString)
forall a b. (a -> b) -> a -> b
$ ([([ParseError], Char)], DecoderState, ByteString)
-> ([([ParseError], Char)], DecoderState, ByteString)
normalize' ([([ParseError], Char)]
cs, DecoderState
state', ByteString
stream')
charError :: ([ParseError], Char) -> ([ParseError], Char)
charError :: ([ParseError], Char) -> ([ParseError], Char)
charError c' :: ([ParseError], Char)
c'@([ParseError]
_, Char
c)
| Char -> Char -> Char -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Char
'\xD800' Char
'\xDFFF' Char
c = ParseError -> ([ParseError], Char)
addErr ParseError
SurrogateInInputStream
| Char -> Char -> Char -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Char
'\xFDD0' Char
'\xFDEF' Char
c = ParseError -> ([ParseError], Char)
addErr ParseError
NoncharacterInInputStream
| Bool
noncharacter = ParseError -> ([ParseError], Char)
addErr ParseError
NoncharacterInInputStream
| Char -> Char -> Char -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Char
'\SOH' Char
'\US' Char
c Bool -> Bool -> Bool
&& Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
notElem Char
c String
"\t\n\f\r" = ParseError -> ([ParseError], Char)
addErr ParseError
ControlCharacterInInputStream
| Char -> Char -> Char -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Char
'\DEL' Char
'\x9F' Char
c = ParseError -> ([ParseError], Char)
addErr ParseError
ControlCharacterInInputStream
| Bool
otherwise = ([ParseError], Char)
c'
where noncharacter :: Bool
noncharacter = case Int -> Int -> Int
forall a. Integral a => a -> a -> a
mod (Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
c) Int
0x10000 of
Int
0xFFFE -> Bool
True
Int
0xFFFF -> Bool
True
Int
_ -> Bool
False
addErr :: ParseError -> ([ParseError], Char)
addErr ParseError
err = ([ParseError] -> [ParseError])
-> ([ParseError], Char) -> ([ParseError], Char)
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
F.B.first (ParseError
err ParseError -> [ParseError] -> [ParseError]
forall a. a -> [a] -> [a]
:) ([ParseError], Char)
c'
flatten :: Either BS.SH.ShortByteString String -> [([ParseError], Char)]
flatten :: Either ShortByteString String -> [([ParseError], Char)]
flatten = (ShortByteString -> [([ParseError], Char)])
-> (String -> [([ParseError], Char)])
-> Either ShortByteString String
-> [([ParseError], Char)]
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (\ShortByteString
err -> [([ShortByteString -> ParseError
InvalidByteSequence ShortByteString
err], Char
replacementChar)]) ((Char -> ([ParseError], Char)) -> String -> [([ParseError], Char)]
forall a b. (a -> b) -> [a] -> [b]
map ((Char -> ([ParseError], Char))
-> String -> [([ParseError], Char)])
-> (Char -> ([ParseError], Char))
-> String
-> [([ParseError], Char)]
forall a b. (a -> b) -> a -> b
$ \Char
c -> ([], Char
c))