module Snap.Internal.Parsing where
import Control.Applicative (Alternative ((<|>)), Applicative (pure, (*>), (<*)), liftA2, (<$>))
import Control.Arrow (first, second)
import Control.Monad (Monad (return), MonadPlus (mzero), liftM, when)
import Data.Attoparsec.ByteString.Char8 (IResult (Done, Fail, Partial), Parser, Result, anyChar, char, choice, decimal, endOfInput, feed, inClass, isDigit, isSpace, letter_ascii, many', match, option, parse, satisfy, skipSpace, skipWhile, string, take, takeTill, takeWhile, sepBy')
import qualified Data.Attoparsec.ByteString.Char8 as AP
import Data.Bits (Bits (unsafeShiftL, (.&.), (.|.)))
import Data.ByteString.Builder (Builder, byteString, char8, toLazyByteString, word8)
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as S
import Data.ByteString.Internal (c2w, w2c)
import qualified Data.ByteString.Lazy.Char8 as L
import Data.CaseInsensitive (CI)
import qualified Data.CaseInsensitive as CI (mk)
import Data.Char (Char, intToDigit, isAlpha, isAlphaNum, isAscii, isControl, isHexDigit, ord)
import Data.Int (Int64)
import Data.List (concat, intercalate, intersperse)
import Data.Map (Map)
import qualified Data.Map as Map (empty, insertWith', toList)
import Data.Maybe (Maybe (..), maybe)
import Data.Monoid (Monoid (mconcat, mempty), (<>))
import Data.Word (Word8)
import GHC.Exts (Int (I#), uncheckedShiftRL#, word2Int#)
import GHC.Word (Word8 (..))
import Prelude (Bool (..), Either (..), Enum (fromEnum, toEnum), Eq (..), Num (..), Ord (..), String, and, any, concatMap, elem, error, filter, flip, foldr, fst, id, map, not, otherwise, show, snd, ($), ($!), (&&), (++), (.), (||))
import Snap.Internal.Http.Types (Cookie (Cookie))
fullyParse :: ByteString -> Parser a -> Either String a
fullyParse = fullyParse' parse feed
(<?>) :: Parser a -> String -> Parser a
(<?>) a !b = (AP.<?>) a b
infix 0 <?>
fullyParse' :: (Parser a -> ByteString -> Result a)
-> (Result a -> ByteString -> Result a)
-> ByteString
-> Parser a
-> Either String a
fullyParse' parseFunc feedFunc s p =
case r' of
(Fail _ context e) -> Left $ concat [ "Parsing "
, intercalate "/" context
, ": "
, e
, "."
]
(Partial _) -> Left "parse failed"
(Done _ x) -> Right x
where
r = parseFunc p s
r' = feedFunc r ""
parseNum :: Parser Int64
parseNum = decimal
untilEOL :: Parser ByteString
untilEOL = takeWhile notend <?> "untilEOL"
where
notend c = not $ c == '\r' || c == '\n'
crlf :: Parser ByteString
crlf = string "\r\n" <?> "crlf"
toTableList :: (Char -> Bool) -> [Char]
toTableList f = l
where
g c = c /= '-' && f c
!l1 = filter g $ map w2c [0..255]
!l0 = if f '-' then ['-'] else []
!l = l0 ++ l1
toTable :: (Char -> Bool) -> (Char -> Bool)
toTable = inClass . toTableList
skipFieldChars :: Parser ()
skipFieldChars = skipWhile isFieldChar
isFieldChar :: Char -> Bool
isFieldChar = toTable f
where
f c = (isDigit c) || (isAlpha c) || c == '-' || c == '_'
pHeaders :: Parser [(ByteString, ByteString)]
pHeaders = many' header <?> "headers"
where
slurp p = fst <$> match p
header =
liftA2 (,)
fieldName
(char ':' *> skipSpace *> contents)
fieldName =
slurp (letter_ascii *> skipFieldChars)
contents =
liftA2 S.append
(untilEOL <* crlf)
(continuation <|> pure S.empty)
isLeadingWS w =
w == ' ' || w == '\t'
leadingWhiteSpace =
skipWhile1 isLeadingWS
continuation =
liftA2 S.cons
(leadingWhiteSpace *> pure ' ')
contents
skipWhile1 f = satisfy f *> skipWhile f
pWord :: Parser ByteString
pWord = pQuotedString <|> (takeWhile (/= ';'))
pQuotedString :: Parser ByteString
pQuotedString = q *> quotedText <* q
where
quotedText = (S.concat . L.toChunks . toLazyByteString) <$> f mempty
f soFar = do
t <- takeWhile qdtext
let soFar' = soFar <> byteString t
choice [ string "\\\"" *> f (soFar' <> char8 '"')
, pure soFar' ]
q = char '"'
qdtext = matchAll [ isRFCText, (/= '"'), (/= '\\') ]
isRFCText :: Char -> Bool
isRFCText = not . isControl
matchAll :: [ Char -> Bool ] -> Char -> Bool
matchAll x c = and $ map ($ c) x
pAvPairs :: Parser [(ByteString, ByteString)]
pAvPairs = do
a <- pAvPair
b <- many' (skipSpace *> char ';' *> skipSpace *> pAvPair)
return $! a:b
pAvPair :: Parser (ByteString, ByteString)
pAvPair = do
key <- pToken <* skipSpace
val <- liftM trim (option "" $ char '=' *> skipSpace *> pWord)
return $! (key, val)
pParameter :: Parser (ByteString, ByteString)
pParameter = parser <?> "pParameter"
where
parser = do
key <- pToken <* skipSpace
val <- liftM trim (char '=' *> skipSpace *> pWord)
return $! (trim key, val)
trim :: ByteString -> ByteString
trim = snd . S.span isSpace . fst . S.spanEnd isSpace
pValueWithParameters :: Parser (ByteString, [(CI ByteString, ByteString)])
pValueWithParameters = parser <?> "pValueWithParameters"
where
parser = do
value <- liftM trim (skipSpace *> takeWhile (/= ';'))
params <- many' pParam
endOfInput
return (value, map (first CI.mk) params)
pParam = skipSpace *> char ';' *> skipSpace *> pParameter
pContentTypeWithParameters :: Parser ( ByteString
, [(CI ByteString, ByteString)] )
pContentTypeWithParameters = parser <?> "pContentTypeWithParameters"
where
parser = do
value <- liftM trim (skipSpace *> takeWhile (not . isSep))
params <- many' (skipSpace *> satisfy isSep *> skipSpace *> pParameter)
endOfInput
return $! (value, map (first CI.mk) params)
isSep c = c == ';' || c == ','
pToken :: Parser ByteString
pToken = takeWhile isToken
isToken :: Char -> Bool
isToken = toTable f
where
f = matchAll [ isAscii
, not . isControl
, not . isSpace
, not . flip elem [ '(', ')', '<', '>', '@', ',', ';'
, ':', '\\', '\"', '/', '[', ']'
, '?', '=', '{', '}' ]
]
pTokens :: Parser [ByteString]
pTokens = (skipSpace *> pToken <* skipSpace) `sepBy'` char ','
parseToCompletion :: Parser a -> ByteString -> Maybe a
parseToCompletion p s = toResult $ finish r
where
r = parse p s
toResult (Done _ c) = Just c
toResult _ = Nothing
type DList a = [a] -> [a]
pUrlEscaped :: Parser ByteString
pUrlEscaped = do
sq <- nextChunk id
return $! S.concat $ sq []
where
nextChunk :: DList ByteString -> Parser (DList ByteString)
nextChunk !s = (endOfInput *> pure s) <|> do
c <- anyChar
case c of
'+' -> plusSpace s
'%' -> percentEncoded s
_ -> unEncoded c s
percentEncoded :: DList ByteString -> Parser (DList ByteString)
percentEncoded !l = do
hx <- take 2
when (S.length hx /= 2 || (not $ S.all isHexDigit hx)) $
mzero
let code = w2c ((unsafeFromHex hx) :: Word8)
nextChunk $ l . ((S.singleton code) :)
unEncoded :: Char -> DList ByteString -> Parser (DList ByteString)
unEncoded !c !l' = do
let l = l' . ((S.singleton c) :)
bs <- takeTill (flip elem ['%', '+'])
if S.null bs
then nextChunk l
else nextChunk $ l . (bs :)
plusSpace :: DList ByteString -> Parser (DList ByteString)
plusSpace l = nextChunk (l . ((S.singleton ' ') :))
urlDecode :: ByteString -> Maybe ByteString
urlDecode = parseToCompletion pUrlEscaped
urlEncode :: ByteString -> ByteString
urlEncode = S.concat . L.toChunks . toLazyByteString . urlEncodeBuilder
urlEncodeBuilder :: ByteString -> Builder
urlEncodeBuilder = go mempty
where
go !b !s = maybe b' esc (S.uncons y)
where
(x,y) = S.span urlEncodeClean s
b' = b <> byteString x
esc (c,r) = let b'' = if c == ' '
then b' <> char8 '+'
else b' <> hexd c
in go b'' r
urlEncodeClean :: Char -> Bool
urlEncodeClean = toTable f
where
f c = any ($ c) [\c' -> isAscii c' && isAlphaNum c'
, flip elem [ '$', '_', '-', '.', '!'
, '*' , '\'', '(', ')', ',' ]]
hexd :: Char -> Builder
hexd c0 = char8 '%' <> word8 hi <> word8 low
where
!c = c2w c0
toDigit = c2w . intToDigit
!low = toDigit $ fromEnum $ c .&. 0xf
!hi = toDigit $ (c .&. 0xf0) `shiftr` 4
shiftr (W8# a#) (I# b#) = I# (word2Int# (uncheckedShiftRL# a# b#))
finish :: Result a -> Result a
finish (Partial f) = flip feed "" $ f ""
finish x = x
parseUrlEncoded :: ByteString -> Map ByteString [ByteString]
parseUrlEncoded s = foldr ins Map.empty decoded
where
ins (!k,v) !m = Map.insertWith' (++) k [v] m
parts :: [(ByteString,ByteString)]
parts = map breakApart $
S.splitWith (\c -> c == '&' || c == ';') s
breakApart = (second (S.drop 1)) . S.break (== '=')
urldecode = parseToCompletion pUrlEscaped
decodeOne (a,b) = do
!a' <- urldecode a
!b' <- urldecode b
return $! (a',b')
decoded = go id parts
where
go !dl [] = dl []
go !dl (x:xs) = maybe (go dl xs)
(\p -> go (dl . (p:)) xs)
(decodeOne x)
buildUrlEncoded :: Map ByteString [ByteString] -> Builder
buildUrlEncoded m = mconcat builders
where
builders = intersperse (char8 '&') $
concatMap encodeVS $ Map.toList m
encodeVS (k,vs) = map (encodeOne k) vs
encodeOne k v = mconcat [ urlEncodeBuilder k
, char8 '='
, urlEncodeBuilder v ]
printUrlEncoded :: Map ByteString [ByteString] -> ByteString
printUrlEncoded = S.concat . L.toChunks . toLazyByteString . buildUrlEncoded
pCookies :: Parser [Cookie]
pCookies = do
kvps <- pAvPairs
return $! map toCookie $ filter (not . S.isPrefixOf "$" . fst) kvps
where
toCookie (nm,val) = Cookie nm val Nothing Nothing Nothing False False
parseCookie :: ByteString -> Maybe [Cookie]
parseCookie = parseToCompletion pCookies
unsafeFromHex :: (Enum a, Num a, Bits a) => ByteString -> a
unsafeFromHex = S.foldl' f 0
where
#if MIN_VERSION_base(4,5,0)
sl = unsafeShiftL
#else
sl = shiftL
#endif
f !cnt !i = sl cnt 4 .|. nybble i
nybble c | c >= '0' && c <= '9' = toEnum $! fromEnum c fromEnum '0'
| c >= 'a' && c <= 'f' = toEnum $! 10 + fromEnum c fromEnum 'a'
| c >= 'A' && c <= 'F' = toEnum $! 10 + fromEnum c fromEnum 'A'
| otherwise = error $ "bad hex digit: " ++ show c
unsafeFromNat :: (Enum a, Num a, Bits a) => ByteString -> a
unsafeFromNat = S.foldl' f 0
where
zero = ord '0'
f !cnt !i = cnt * 10 + toEnum (digitToInt i)
digitToInt c = if d >= 0 && d <= 9
then d
else error $ "bad digit: '" ++ [c] ++ "'"
where
!d = ord c zero