{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
module Data.JsonStream.Parser (
Parser
, ParseOutput(..)
, runParser
, runParser'
, parseByteString
, parseLazyByteString
, decode
, eitherDecode
, decodeStrict
, eitherDecodeStrict
, value
, string
, safeString
, number
, integer
, real
, bool
, jNull
, (.:)
, (.:?)
, (.|)
, (.!)
, objectWithKey
, objectItems
, objectValues
, arrayOf
, arrayWithIndexOf
, indexedArrayOf
, nullable
, filterI
, takeI
, mapWithFailure
, arrayFound
, objectFound
) where
#if !MIN_VERSION_bytestring(0,10,6)
import Data.Monoid (Monoid, mappend, mempty)
#endif
#if MIN_VERSION_base(4,10,0)
import Data.Semigroup (Semigroup(..))
#endif
import Control.Applicative
import qualified Data.Aeson as AE
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.ByteString.Lazy.Internal as BL
import Data.Char (isSpace)
import qualified Data.HashMap.Strict as HMap
import Data.Scientific (Scientific, isInteger,
toBoundedInteger, toRealFloat)
import qualified Data.Text as T
import qualified Data.Vector as Vec
import Foreign.C.Types
import Data.JsonStream.CLexer
import Data.JsonStream.TokenParser
objectKeyStringLimit :: Int
objectKeyStringLimit = 65536
data ParseResult v = MoreData (Parser v, BS.ByteString -> TokenResult)
| Failed String
| Done BS.ByteString TokenResult
| Yield v (ParseResult v)
instance Functor ParseResult where
fmap f (MoreData (np, ntok)) = MoreData (fmap f np, ntok)
fmap _ (Failed err) = Failed err
fmap _ (Done ctx tok) = Done ctx tok
fmap f (Yield v np) = Yield (f v) (fmap f np)
newtype Parser a = Parser {
callParse :: TokenResult -> ParseResult a
}
instance Functor Parser where
fmap f (Parser p) = Parser $ \d -> fmap f (p d)
yieldResults :: [a] -> ParseResult a -> ParseResult a
yieldResults values end = foldr Yield end values
instance Applicative Parser where
pure x = Parser $ \tok -> process (callParse ignoreVal tok)
where
process (Failed err) = Failed err
process (Done ctx tok) = Yield x (Done ctx tok)
process (MoreData (np, ntok)) = MoreData (Parser (process . callParse np), ntok)
process _ = Failed "Internal error in pure, ignoreVal doesn't yield"
(<*>) m1 m2 = Parser $ \tok -> process ([], []) (callParse m1 tok) (callParse m2 tok)
where
process ([], _) (Done ctx ntok) _ = Done ctx ntok
process (lst1, lst2) (Yield v np1) p2 = process (v:lst1, lst2) np1 p2
process (lst1, lst2) p1 (Yield v np2) = process (lst1, v:lst2) p1 np2
process (lst1, lst2) (Done ctx ntok) (Done {}) =
yieldResults [ mx my | mx <- reverse lst1, my <- reverse lst2 ] (Done ctx ntok)
process lsts (MoreData (np1, ntok1)) (MoreData (np2, _)) =
MoreData (Parser (\tok -> process lsts (callParse np1 tok) (callParse np2 tok)), ntok1)
process _ (Failed err) _ = Failed err
process _ _ (Failed err) = Failed err
process _ _ _ = Failed "Unexpected error in parallel processing <*>."
#if MIN_VERSION_base(4,10,0)
instance Monoid (Parser a) where
mempty = ignoreVal
mappend = (<>)
instance Semigroup (Parser a) where
(<>) m1 m2 =
#else
instance Monoid (Parser a) where
mempty = ignoreVal
mappend m1 m2 =
#endif
Parser $ \tok -> process (callParse m1 tok) (callParse m2 tok)
where
process (Yield v np1) p2 = Yield v (process np1 p2)
process p1 (Yield v np2) = Yield v (process p1 np2)
process (Done ctx ntok) Done {} = Done ctx ntok
process (MoreData (np1, ntok)) (MoreData (np2, _)) =
MoreData (Parser $ \tok -> process (callParse np1 tok) (callParse np2 tok), ntok)
process (Failed err) _ = Failed err
process _ (Failed err) = Failed err
process _ _ = Failed "Unexpected error in parallel processing <|>"
instance Alternative Parser where
empty = ignoreVal
m1 <|> m2 = Parser $ \tok -> process [] (callParse m1 tok) (Just $ callParse m2 tok)
where
process _ (Yield v np1) _ = Yield v (process [] np1 Nothing)
process _ (Done ctx ntok) Nothing = Done ctx ntok
process lst (Done ctx ntok) (Just (Done {})) = yieldResults (reverse lst) (Done ctx ntok)
process lst np1 (Just (Yield v np2)) = process (v:lst) np1 (Just np2)
process lst (MoreData (np1, ntok)) Nothing =
MoreData (Parser $ \tok -> process lst (callParse np1 tok) Nothing, ntok)
process lst (MoreData (np1, ntok)) (Just (MoreData (np2, _))) =
MoreData (Parser $ \tok -> process lst (callParse np1 tok) (Just $ callParse np2 tok), ntok)
process _ (Failed err) _ = Failed err
process _ _ (Just (Failed err)) = Failed err
process _ _ _ = Failed "Unexpected error in parallel processing <|>"
some = filterI (not . null) . many
many f = Parser $ \ntok -> loop id (callParse f ntok)
where
loop acc (Done ctx ntp) = Yield (acc []) (Done ctx ntp)
loop acc (MoreData (Parser np, ntok)) = MoreData (Parser (loop acc . np), ntok)
loop acc (Yield v np) = loop (\nxt -> acc (v : nxt)) np
loop _ (Failed err) = Failed err
array' :: (Int -> Parser a) -> Parser a
array' valparse = Parser $ \tp ->
case tp of
(PartialResult ArrayBegin ntp) -> moreData (nextitem 0) ntp
(PartialResult _ _) -> callParse ignoreVal tp
(TokMoreData ntok) -> MoreData (array' valparse, ntok)
(TokFailed) -> Failed "Array - token failed"
where
nextitem !_ _ (ArrayEnd ctx) ntok = Done ctx ntok
nextitem !i tok _ _ = arrcontent i (callParse (valparse i) tok)
arrcontent !i (Done _ ntp) = moreData (nextitem (i+1)) ntp
arrcontent !i (MoreData (Parser np, ntp)) = MoreData (Parser (arrcontent i . np), ntp)
arrcontent !i (Yield v np) = Yield v (arrcontent i np)
arrcontent !_ (Failed err) = Failed err
arrayOf :: Parser a -> Parser a
arrayOf valparse = array' (const valparse)
elemFound :: Element -> a -> a -> Parser a -> Parser a
elemFound elsearch start end parser = Parser $ moreData handle
where
handle tok el _
| el == elsearch = Yield start (parseAndAppend (callParse parser tok))
handle tok _ _ = callParse ignoreVal tok
parseAndAppend (Failed err) = Failed err
parseAndAppend (Yield v np) = Yield v (parseAndAppend np)
parseAndAppend (MoreData (Parser np, ntp)) = MoreData (Parser (parseAndAppend . np), ntp)
parseAndAppend (Done ctx ntp) = Yield end (Done ctx ntp)
objectFound :: a -> a -> Parser a -> Parser a
objectFound = elemFound ObjectBegin
arrayFound :: a -> a -> Parser a -> Parser a
arrayFound = elemFound ArrayBegin
arrayWithIndexOf :: Int -> Parser a -> Parser a
arrayWithIndexOf idx valparse = array' itemFn
where
itemFn aidx
| aidx == idx = valparse
| otherwise = ignoreVal
indexedArrayOf :: Parser a -> Parser (Int, a)
indexedArrayOf valparse = array' (\(!key) -> (key,) <$> valparse)
object' :: Bool -> (T.Text -> Parser a) -> Parser a
object' once valparse = Parser $ \tp ->
case tp of
(PartialResult ObjectBegin ntp) -> moreData (nextitem False) ntp
(PartialResult _ _) -> callParse ignoreVal tp
(TokMoreData ntok) -> MoreData (object' once valparse, ntok)
(TokFailed) -> Failed "Array - token failed"
where
nextitem _ _ (ObjectEnd ctx) ntok = Done ctx ntok
nextitem yielded _ (JValue (AE.String key)) ntok = objcontent yielded (callParse (valparse key) ntok)
nextitem yielded _ (StringContent str) ntok =
objcontent yielded $ moreData (getLongKey [str] (BS.length str)) ntok
nextitem _ _ el _ = Failed $ "Object - unexpected item: " ++ show el
objcontent yielded (Done _ ntp)
| once && yielded = callParse (ignoreVal' 1) ntp
| otherwise = moreData (nextitem yielded) ntp
objcontent yielded (MoreData (Parser np, ntok)) = MoreData (Parser (objcontent yielded. np), ntok)
objcontent _ (Yield v np) = Yield v (objcontent True np)
objcontent _ (Failed err) = Failed err
getLongKey acc !len _ el ntok =
case el of
StringEnd
| Right key <- unescapeText (BS.concat $ reverse acc) ->
callParse (valparse key) ntok
| otherwise -> Failed "Error decoding UTF8"
StringContent str
| len > objectKeyStringLimit -> callParse (ignoreStrRestThen ignoreVal) ntok
| otherwise -> moreData (getLongKey (str:acc) (len + BS.length str)) ntok
_ -> Failed "Object longstr - lexer failed."
moreData :: (TokenResult -> Element -> TokenResult -> ParseResult v) -> TokenResult -> ParseResult v
moreData parser tok =
case tok of
PartialResult el ntok -> parser tok el ntok
TokMoreData ntok -> MoreData (Parser (moreData parser), ntok)
TokFailed -> Failed "More data - lexer failed."
objectItems :: Parser a -> Parser (T.Text, a)
objectItems valparse = object' False $ \(!key) -> (key,) <$> valparse
objectValues :: Parser a -> Parser a
objectValues valparse = object' False (const valparse)
objectWithKey :: T.Text -> Parser a -> Parser a
objectWithKey name valparse = object' True itemFn
where
itemFn key
| key == name = valparse
| otherwise = ignoreVal
aeValue :: Parser AE.Value
aeValue = Parser $ moreData value'
where
value' tok el ntok =
case el of
JValue val -> Yield val (Done "" ntok)
JInteger val -> Yield (AE.Number $ fromIntegral val) (Done "" ntok)
StringContent _ -> callParse (AE.String <$> longString Nothing) tok
ArrayBegin -> AE.Array . Vec.fromList <$> callParse (many (arrayOf aeValue)) tok
ObjectBegin -> AE.Object . HMap.fromList <$> callParse (manyReverse (objectItems aeValue)) tok
_ -> Failed ("aeValue - unexpected token: " ++ show el)
manyReverse :: Parser a -> Parser [a]
manyReverse f = Parser $ \ntok -> loop [] (callParse f ntok)
where
loop acc (Done ctx ntp) = Yield acc (Done ctx ntp)
loop acc (MoreData (Parser np, ntok)) = MoreData (Parser (loop acc . np), ntok)
loop acc (Yield v np) = loop (v : acc) np
loop _ (Failed err) = Failed err
jvalue :: (AE.Value -> Maybe a) -> (CLong -> Maybe a) -> Parser a
jvalue convert cvtint = Parser (moreData value')
where
value' tok el ntok =
case el of
JValue val
| Just convValue <- convert val -> Yield convValue (Done "" ntok)
| otherwise -> Done "" ntok
JInteger val
| Just convValue <- cvtint val -> Yield convValue (Done "" ntok)
| otherwise -> Done "" ntok
_ -> callParse ignoreVal tok
longString :: Maybe Int -> Parser T.Text
longString mbounds = Parser $ moreData (handle (BS.empty :) 0)
where
handle acc !len tok el ntok =
case el of
JValue (AE.String str) -> Yield str (Done "" ntok)
StringContent str
| (Just bounds) <- mbounds, len > bounds
-> callParse (ignoreStrRestThen (Parser $ Done "")) ntok
| otherwise -> moreData (handle (acc . (str:)) (len + BS.length str)) ntok
StringEnd
| Right val <- unescapeText (BS.concat (acc []))
-> Yield val (Done "" ntok)
| otherwise -> Failed "Error decoding UTF8"
_ -> callParse ignoreVal tok
string :: Parser T.Text
string = longString Nothing
safeString :: Int -> Parser T.Text
safeString limit = longString (Just limit)
number :: Parser Scientific
number = jvalue cvt (Just . fromIntegral)
where
cvt (AE.Number num) = Just num
cvt _ = Nothing
integer :: forall i. (Integral i, Bounded i) => Parser i
integer = jvalue cvt clongToBounded
where
clmax = toInteger (maxBound :: CLong)
clmin = toInteger (minBound :: CLong)
imax = toInteger (maxBound :: i)
imin = toInteger (minBound :: i)
clongIsSmaller = clmax <= imax && clmin >= imin
clongIsPartial = clmax < imax || clmin > imin
inBounds num
| clongIsPartial = toInteger num <= imax && toInteger num >= imin
| otherwise = num <= fromIntegral (maxBound :: i) && num >= fromIntegral (minBound :: i)
clongToBounded :: CLong -> Maybe i
clongToBounded num
| clongIsSmaller || inBounds num = Just (fromIntegral num)
| otherwise = Nothing
cvt (AE.Number num)
| isInteger num = toBoundedInteger num
cvt _ = Nothing
real :: RealFloat a => Parser a
real = jvalue cvt (Just . fromIntegral)
where
cvt (AE.Number num) = Just $ toRealFloat num
cvt _ = Nothing
bool :: Parser Bool
bool = jvalue cvt (const Nothing)
where
cvt (AE.Bool b) = Just b
cvt _ = Nothing
jNull :: Parser ()
jNull = jvalue cvt (const Nothing)
where
cvt (AE.Null) = Just ()
cvt _ = Nothing
nullable :: Parser a -> Parser (Maybe a)
nullable valparse = Parser (moreData value')
where
value' _ (JValue AE.Null) ntok = Yield Nothing (Done "" ntok)
value' tok _ _ = callParse (Just <$> valparse) tok
value :: AE.FromJSON a => Parser a
value = Parser $ \ntok -> loop (callParse aeValue ntok)
where
loop (Done ctx ntp) = Done ctx ntp
loop (Failed err) = Failed err
loop (MoreData (Parser np, ntok)) = MoreData (Parser (loop . np), ntok)
loop (Yield v np) =
case AE.fromJSON v of
AE.Error _ -> loop np
AE.Success res -> Yield res (loop np)
takeI :: Int -> Parser a -> Parser a
takeI num valparse = Parser $ \tok -> loop num (callParse valparse tok)
where
loop _ (Done ctx ntp) = Done ctx ntp
loop _ (Failed err) = Failed err
loop n (MoreData (Parser np, ntok)) = MoreData (Parser (loop n . np), ntok)
loop 0 (Yield _ np) = loop 0 np
loop n (Yield v np) = Yield v (loop (n-1) np)
ignoreStrRestThen :: Parser a -> Parser a
ignoreStrRestThen next = Parser $ moreData handle
where
handle _ el ntok =
case el of
StringContent _ -> moreData handle ntok
StringEnd -> callParse next ntok
_ -> Failed "Unexpected result in ignoreStrRestPlusOne"
ignoreVal :: Parser a
ignoreVal = ignoreVal' 0
ignoreVal' :: Int -> Parser a
ignoreVal' stval = Parser $ moreData (handleTok stval)
where
handleLongString level _ (StringContent _) ntok = moreData (handleLongString level) ntok
handleLongString 0 _ StringEnd ntok = Done "" ntok
handleLongString level _ StringEnd ntok = moreData (handleTok level) ntok
handleLongString _ _ el _ = Failed $ "Unexpected element in handleLongStr: " ++ show el
handleTok :: Int -> TokenResult -> Element -> TokenResult -> ParseResult a
handleTok 0 _ (JValue _) ntok = Done "" ntok
handleTok 0 _ (JInteger _) ntok = Done "" ntok
handleTok 0 _ (ArrayEnd _) _ = Failed "ArrayEnd in ignoreval on 0 level"
handleTok 0 _ (ObjectEnd _) _ = Failed "ObjectEnd in ignoreval on 0 level"
handleTok 1 _ (ArrayEnd ctx) ntok = Done ctx ntok
handleTok 1 _ (ObjectEnd ctx) ntok = Done ctx ntok
handleTok level _ el ntok =
case el of
JValue _ -> moreData (handleTok level) ntok
JInteger _ -> moreData (handleTok level) ntok
StringContent _ -> moreData (handleLongString level) ntok
ArrayEnd _ -> moreData (handleTok (level - 1)) ntok
ObjectEnd _ -> moreData (handleTok (level - 1)) ntok
ArrayBegin -> moreData (handleTok (level + 1)) ntok
ObjectBegin -> moreData (handleTok (level + 1)) ntok
StringEnd -> Failed "Internal error - out of order StringEnd"
filterI :: (a -> Bool) -> Parser a -> Parser a
filterI cond valparse = Parser $ \ntok -> loop (callParse valparse ntok)
where
loop (Done ctx ntp) = Done ctx ntp
loop (Failed err) = Failed err
loop (MoreData (Parser np, ntok)) = MoreData (Parser (loop . np), ntok)
loop (Yield v np)
| cond v = Yield v (loop np)
| otherwise = loop np
mapWithFailure :: (a -> Either String b) -> Parser a -> Parser b
mapWithFailure mapping =
updateParser
where
updateParser (Parser run) = Parser $ updateParseResult . run
updateParseResult x = case x of
MoreData (parser, continuation) -> MoreData (updateParser parser, continuation)
Failed message -> Failed message
Done a b -> Done a b
Yield val parseResult -> case mapping val of
Left message -> Failed message
Right val' -> Yield val' (updateParseResult parseResult)
(.:) :: T.Text -> Parser a -> Parser a
(.:) = objectWithKey
infixr 7 .:
(.:?) :: T.Text -> Parser a -> Parser (Maybe a)
key .:? val = optional (key .: val)
infixr 7 .:?
(.|) :: Parser a -> a -> Parser a
p .| defval = p <|> pure defval
infixl 6 .|
(.!) :: Int -> Parser a -> Parser a
(.!) = arrayWithIndexOf
infixr 7 .!
data ParseOutput a = ParseYield a (ParseOutput a)
| ParseNeedData (BS.ByteString -> ParseOutput a)
| ParseFailed String
| ParseDone BS.ByteString
runParser' :: Parser a -> BS.ByteString -> ParseOutput a
runParser' parser startdata = parse $ callParse parser (tokenParser startdata)
where
parse (MoreData (np, ntok)) = ParseNeedData (parse . callParse np .ntok)
parse (Failed err) = ParseFailed err
parse (Yield v np) = ParseYield v (parse np)
parse (Done ctx _) = ParseDone ctx
runParser :: Parser a -> ParseOutput a
runParser parser = runParser' parser BS.empty
parseByteString :: Parser a -> BS.ByteString -> [a]
parseByteString parser startdata = loop (runParser' parser startdata)
where
loop (ParseNeedData _) = error "Not enough data."
loop (ParseDone _) = []
loop (ParseFailed err) = error err
loop (ParseYield v np) = v : loop np
parseLazyByteString :: Parser a -> BL.ByteString -> [a]
parseLazyByteString parser input = loop input (runParser parser)
where
loop BL.Empty (ParseNeedData _) = error "Not enough data."
loop (BL.Chunk dta rest) (ParseNeedData np) = loop rest (np dta)
loop _ (ParseDone _) = []
loop _ (ParseFailed err) = error err
loop rest (ParseYield v np) = v : loop rest np
decode :: AE.FromJSON a => BL.ByteString -> Maybe a
decode bs =
case eitherDecode bs of
Right val -> Just val
Left _ -> Nothing
eitherDecode :: AE.FromJSON a => BL.ByteString -> Either String a
eitherDecode bs = loop bs (runParser value)
where
loop BL.Empty (ParseNeedData _) = Left "Not enough data."
loop (BL.Chunk dta rest) (ParseNeedData np) = loop rest (np dta)
loop _ (ParseDone _) = Left "Nothing parsed."
loop _ (ParseFailed err) = Left err
loop rest (ParseYield v next) = checkExit v next rest
checkExit v (ParseDone srest) rest
| BS.all isSpace srest && BL.all isSpace rest = Right v
| otherwise = Left "Data followed by non-whitespace characters."
checkExit _ (ParseYield _ _) _ = Left "Multiple value parses?"
checkExit _ (ParseFailed err) _ = Left err
checkExit _ (ParseNeedData _) BL.Empty = Left "Incomplete json structure."
checkExit v (ParseNeedData cont) (BL.Chunk dta rest) = checkExit v (cont dta) rest
decodeStrict :: AE.FromJSON a => BS.ByteString -> Maybe a
decodeStrict bs =
case eitherDecodeStrict bs of
Right val -> Just val
Left _ -> Nothing
eitherDecodeStrict :: AE.FromJSON a => BS.ByteString -> Either String a
eitherDecodeStrict bs =
case runParser' value bs of
ParseYield next v -> checkExit v next
ParseNeedData _ -> Left "Incomplete json structure."
ParseFailed err -> Left err
ParseDone _ -> Left "No data found."
where
checkExit (ParseDone rest) v
| BS.all isSpace rest = Right v
checkExit _ _ = Left "Data folowed by non-whitespace characters."