Copyright | (c) 2020 Composewell Technologies |
---|---|
License | BSD-3-Clause |
Maintainer | streamly@composewell.com |
Stability | experimental |
Portability | GHC |
Safe Haskell | Safe-Inferred |
Language | Haskell2010 |
Synopsis
- data Parser a m b = forall s. Parser (s -> a -> m (Step s b)) (m (Initial s b)) (s -> m (Step s b))
- newtype ParseError = ParseError String
- data Step s b
- data Initial s b
- toFold :: Monad m => Parser a m b -> Fold m a b
- fromFold :: Monad m => Fold m a b -> Parser a m b
- fromFoldMaybe :: Monad m => String -> Fold m a (Maybe b) -> Parser a m b
- fromPure :: Monad m => b -> Parser a m b
- fromEffect :: Monad m => m b -> Parser a m b
- die :: Monad m => String -> Parser a m b
- dieM :: Monad m => m String -> Parser a m b
- lmap :: (a -> b) -> Parser b m r -> Parser a m r
- lmapM :: Monad m => (a -> m b) -> Parser b m r -> Parser a m r
- postscan :: Fold m a b -> Parser b m c -> Parser a m c
- filter :: Monad m => (a -> Bool) -> Parser a m b -> Parser a m b
- rmapM :: Monad m => (b -> m c) -> Parser a m b -> Parser a m c
- peek :: Monad m => Parser a m a
- one :: Monad m => Parser a m a
- oneEq :: (Monad m, Eq a) => a -> Parser a m a
- oneNotEq :: (Monad m, Eq a) => a -> Parser a m a
- oneOf :: (Monad m, Eq a, Foldable f) => f a -> Parser a m a
- noneOf :: (Monad m, Eq a, Foldable f) => f a -> Parser a m a
- eof :: Monad m => Parser a m ()
- satisfy :: Monad m => (a -> Bool) -> Parser a m a
- maybe :: Monad m => (a -> Maybe b) -> Parser a m b
- either :: Monad m => (a -> Either String b) -> Parser a m b
- lookAhead :: Monad m => Parser a m b -> Parser a m b
- takeBetween :: Monad m => Int -> Int -> Fold m a b -> Parser a m b
- takeEQ :: Monad m => Int -> Fold m a b -> Parser a m b
- takeGE :: Monad m => Int -> Fold m a b -> Parser a m b
- takeP :: Monad m => Int -> Parser a m b -> Parser a m b
- listEq :: (Monad m, Eq a) => [a] -> Parser a m [a]
- listEqBy :: Monad m => (a -> a -> Bool) -> [a] -> Parser a m [a]
- streamEqBy :: Monad m => (a -> a -> Bool) -> Stream m a -> Parser a m ()
- subsequenceBy :: (a -> a -> Bool) -> Stream m a -> Parser a m ()
- takeWhile :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b
- takeWhileP :: Monad m => (a -> Bool) -> Parser a m b -> Parser a m b
- takeWhile1 :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b
- dropWhile :: Monad m => (a -> Bool) -> Parser a m ()
- takeEndBy :: Monad m => (a -> Bool) -> Parser a m b -> Parser a m b
- takeEndBy_ :: (a -> Bool) -> Parser a m b -> Parser a m b
- takeEndByEsc :: Monad m => (a -> Bool) -> (a -> Bool) -> Parser a m b -> Parser a m b
- takeStartBy :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b
- takeStartBy_ :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b
- takeEitherSepBy :: (a -> Bool) -> Fold m (Either a b) c -> Parser a m c
- wordBy :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b
- groupBy :: Monad m => (a -> a -> Bool) -> Fold m a b -> Parser a m b
- groupByRolling :: Monad m => (a -> a -> Bool) -> Fold m a b -> Parser a m b
- groupByRollingEither :: Monad m => (a -> a -> Bool) -> Fold m a b -> Fold m a c -> Parser a m (Either b c)
- wordFramedBy :: Monad m => (a -> Bool) -> (a -> Bool) -> (a -> Bool) -> (a -> Bool) -> Fold m a b -> Parser a m b
- wordWithQuotes :: (Monad m, Eq a) => Bool -> (a -> a -> Maybe a) -> a -> (a -> Maybe a) -> (a -> Bool) -> Fold m a b -> Parser a m b
- wordKeepQuotes :: (Monad m, Eq a) => a -> (a -> Maybe a) -> (a -> Bool) -> Fold m a b -> Parser a m b
- wordProcessQuotes :: (Monad m, Eq a) => a -> (a -> Maybe a) -> (a -> Bool) -> Fold m a b -> Parser a m b
- takeFramedBy_ :: Monad m => (a -> Bool) -> (a -> Bool) -> Fold m a b -> Parser a m b
- takeFramedByEsc_ :: Monad m => (a -> Bool) -> (a -> Bool) -> (a -> Bool) -> Fold m a b -> Parser a m b
- takeFramedByGeneric :: Monad m => Maybe (a -> Bool) -> Maybe (a -> Bool) -> Maybe (a -> Bool) -> Fold m a b -> Parser a m b
- blockWithQuotes :: (Monad m, Eq a) => (a -> Bool) -> (a -> Bool) -> a -> a -> Fold m a b -> Parser a m b
- span :: Monad m => (a -> Bool) -> Fold m a b -> Fold m a c -> Parser a m (b, c)
- spanBy :: Monad m => (a -> a -> Bool) -> Fold m a b -> Fold m a c -> Parser a m (b, c)
- spanByRolling :: Monad m => (a -> a -> Bool) -> Fold m a b -> Fold m a c -> Parser a m (b, c)
- splitWith :: Monad m => (a -> b -> c) -> Parser x m a -> Parser x m b -> Parser x m c
- split_ :: Monad m => Parser x m a -> Parser x m b -> Parser x m b
- alt :: Monad m => Parser x m a -> Parser x m a -> Parser x m a
- sequence :: Monad m => Stream m (Parser a m b) -> Fold m b c -> Parser a m c
- concatMap :: Monad m => (b -> Parser a m c) -> Parser a m b -> Parser a m c
- count :: Int -> Parser a m b -> Fold m b c -> Parser a m c
- countBetween :: Int -> Int -> Parser a m b -> Fold m b c -> Parser a m c
- manyP :: Parser a m b -> Parser b m c -> Parser a m c
- many :: Monad m => Parser a m b -> Fold m b c -> Parser a m c
- some :: Monad m => Parser a m b -> Fold m b c -> Parser a m c
- deintercalate :: Monad m => Parser a m x -> Parser a m y -> Fold m (Either x y) z -> Parser a m z
- deintercalate1 :: Monad m => Parser a m x -> Parser a m y -> Fold m (Either x y) z -> Parser a m z
- deintercalateAll :: Monad m => Parser a m x -> Parser a m y -> Fold m (Either x y) z -> Parser a m z
- sepBy1 :: Monad m => Parser a m b -> Parser a m x -> Fold m b c -> Parser a m c
- sepBy :: Monad m => Parser a m b -> Parser a m x -> Fold m b c -> Parser a m c
- sepByAll :: Monad m => Parser a m b -> Parser a m x -> Fold m b c -> Parser a m c
- manyTillP :: Parser a m b -> Parser a m x -> Parser b m c -> Parser a m c
- manyTill :: Monad m => Parser a m b -> Parser a m x -> Fold m b c -> Parser a m c
- manyThen :: Parser a m b -> Parser a m x -> Fold m b c -> Parser a m c
- roundRobin :: t (Parser a m b) -> Fold m b c -> Parser a m c
- retryMaxTotal :: Int -> Parser a m b -> Fold m b c -> Parser a m c
- retryMaxSuccessive :: Int -> Parser a m b -> Fold m b c -> Parser a m c
- retry :: Parser a m b -> Parser a m b
- zipWithM :: Monad m => (a -> b -> m c) -> Stream m a -> Fold m c x -> Parser b m x
- zip :: Monad m => Stream m a -> Fold m (a, b) x -> Parser b m x
- indexed :: forall m a b. Monad m => Fold m (Int, a) b -> Parser a m b
- makeIndexFilter :: (Fold m (s, a) b -> Parser a m b) -> (((s, a) -> Bool) -> Fold m (s, a) b -> Fold m (s, a) b) -> ((s, a) -> Bool) -> Fold m a b -> Parser a m b
- sampleFromthen :: Monad m => Int -> Int -> Fold m a b -> Parser a m b
- next :: Monad m => Parser a m (Maybe a)
Setup
>>>
:m
>>>
import Control.Applicative ((<|>))
>>>
import Data.Bifunctor (second)
>>>
import Data.Char (isSpace)
>>>
import qualified Data.Foldable as Foldable
>>>
import qualified Data.Maybe as Maybe
>>>
import Streamly.Data.Fold (Fold)
>>>
import Streamly.Data.Parser (Parser)
>>>
import qualified Streamly.Data.Fold as Fold
>>>
import qualified Streamly.Data.Parser as Parser
>>>
import qualified Streamly.Data.Stream as Stream
For APIs that have not been released yet.
>>>
import qualified Streamly.Internal.Data.Fold as Fold
>>>
import qualified Streamly.Internal.Data.Parser as Parser
Types
A parser is a fold that can fail and is represented as Parser step
initial extract
. Before we drive a parser we call the initial
action to
retrieve the initial state of the fold. The parser driver invokes step
with the state returned by the previous step and the next input element. It
results into a new state and a command to the driver represented by Step
type. The driver keeps invoking the step function until it stops or fails.
At any point of time the driver can call extract
to inspect the result of
the fold. If the parser hits the end of input extract
is called.
It may result in an error or an output value.
Pre-release
Instances
Monad m => MonadFail (Parser a m) Source # | |
Defined in Streamly.Internal.Data.Parser.ParserD.Type | |
(Monad m, MonadIO m) => MonadIO (Parser a m) Source # | |
Defined in Streamly.Internal.Data.Parser.ParserD.Type | |
Monad m => Alternative (Parser a m) Source # | Sequential alternative. The input is first passed to the first parser, and if it succeeds, the result is returned. However, if the first parser fails, the parser driver backtracks and tries the same input on the second parser, returning the result if it succeeds. Note: The implementation of
WARNING! this is not suitable for large scale use. As a thumb rule stream
fusion works well for less than 8 compositions of this operation, otherwise
consider using |
Monad m => Applicative (Parser a m) Source # |
|
Defined in Streamly.Internal.Data.Parser.ParserD.Type | |
Functor m => Functor (Parser a m) Source # | |
Monad m => Monad (Parser a m) Source # | See documentation of Although this implementation allows stream fusion, it has quadratic
complexity, making it suitable only for a small number of compositions. As a
thumb rule use it for less than 8 compositions, use |
newtype ParseError Source #
This exception is used when a parser ultimately fails, the user of the parser is intimated via this exception.
Pre-release
Instances
Exception ParseError Source # | |
Defined in Streamly.Internal.Data.Parser.ParserD.Type toException :: ParseError -> SomeException # fromException :: SomeException -> Maybe ParseError # displayException :: ParseError -> String # | |
Show ParseError Source # | |
Defined in Streamly.Internal.Data.Parser.ParserD.Type showsPrec :: Int -> ParseError -> ShowS # show :: ParseError -> String # showList :: [ParseError] -> ShowS # |
The return type of a Parser
step.
The parse operation feeds the input stream to the parser one element at a
time, representing a parse Step
. The parser may or may not consume the
item and returns a result. If the result is Partial
we can either extract
the result or feed more input to the parser. If the result is Continue
, we
must feed more input in order to get a result. If the parser returns Done
then the parser can no longer take any more input.
If the result is Continue
, the parse operation retains the input in a
backtracking buffer, in case the parser may ask to backtrack in future.
Whenever a 'Partial n' result is returned we first backtrack by n
elements
in the input and then release any remaining backtracking buffer. Similarly,
'Continue n' backtracks to n
elements before the current position and
starts feeding the input from that point for future invocations of the
parser.
If parser is not yet done, we can use the extract
operation on the state
of the parser to extract a result. If the parser has not yet yielded a
result, the operation fails with a ParseError
exception. If the parser
yielded a Partial
result in the past the last partial result is returned.
Therefore, if a parser yields a partial result once it cannot fail later on.
The parser can never backtrack beyond the position where the last partial result left it at. The parser must ensure that the backtrack position is always after that.
Pre-release
Partial !Int !s |
|
Continue !Int !s |
|
Done !Int !b | Done with leftover input count and result.
|
Error !String | Parser failed without generating any output. The parsing operation may backtrack to the beginning and try another alternative. |
The type of a Parser'
s initial action.
Internal
IPartial !s | Wait for step function to be called with state |
IDone !b | Return a result right away without an input. |
IError !String | Return an error right away without an input. |
Downgrade to Fold
Accumulators
fromFoldMaybe :: Monad m => String -> Fold m a (Maybe b) -> Parser a m b Source #
Convert a Maybe returning fold to an error returning parser. The first argument is the error message that the parser would return when the fold returns Nothing.
Pre-release
fromPure :: Monad m => b -> Parser a m b Source #
A parser that always yields a pure value without consuming any input.
fromEffect :: Monad m => m b -> Parser a m b Source #
A parser that always yields the result of an effectful action without consuming any input.
die :: Monad m => String -> Parser a m b Source #
A parser that always fails with an error message without consuming any input.
dieM :: Monad m => m String -> Parser a m b Source #
A parser that always fails with an effectful error message and without consuming any input.
Pre-release
Map on input
lmap :: (a -> b) -> Parser b m r -> Parser a m r Source #
lmap f parser
maps the function f
on the input of the parser.
>>>
Stream.parse (Parser.lmap (\x -> x * x) (Parser.fromFold Fold.sum)) (Stream.enumerateFromTo 1 100)
Right 338350
lmap = Parser.lmapM return
lmapM :: Monad m => (a -> m b) -> Parser b m r -> Parser a m r Source #
lmapM f parser
maps the monadic function f
on the input of the parser.
postscan :: Fold m a b -> Parser b m c -> Parser a m c Source #
Stateful scan on the input of a parser using a Fold.
Unimplemented
filter :: Monad m => (a -> Bool) -> Parser a m b -> Parser a m b Source #
Include only those elements that pass a predicate.
>>>
Stream.parse (Parser.filter (> 5) (Parser.fromFold Fold.sum)) $ Stream.fromList [1..10]
Right 40
Map on output
rmapM :: Monad m => (b -> m c) -> Parser a m b -> Parser a m c Source #
rmapM f parser
maps the monadic function f
on the output of the parser.
>>>
rmap = fmap
Element parsers
peek :: Monad m => Parser a m a Source #
Peek the head element of a stream, without consuming it. Fails if it encounters end of input.
>>>
Stream.parse ((,) <$> Parser.peek <*> Parser.satisfy (> 0)) $ Stream.fromList [1]
Right (1,1)
peek = lookAhead (satisfy True)
one :: Monad m => Parser a m a Source #
Consume one element from the head of the stream. Fails if it encounters end of input.
>>>
one = Parser.satisfy $ const True
oneEq :: (Monad m, Eq a) => a -> Parser a m a Source #
Match a specific element.
>>>
oneEq x = Parser.satisfy (== x)
oneNotEq :: (Monad m, Eq a) => a -> Parser a m a Source #
Match anything other than the supplied element.
>>>
oneNotEq x = Parser.satisfy (/= x)
oneOf :: (Monad m, Eq a, Foldable f) => f a -> Parser a m a Source #
Match any one of the elements in the supplied list.
>>>
oneOf xs = Parser.satisfy (`Foldable.elem` xs)
When performance matters a pattern matching predicate could be more
efficient than a Foldable
datatype:
let p x = case x ofa
-> Truee
-> True _ -> False in satisfy p
GHC may use a binary search instead of linear search in the list. Alternatively, you can also use an array instead of list for storage and search.
noneOf :: (Monad m, Eq a, Foldable f) => f a -> Parser a m a Source #
See performance notes in oneOf
.
>>>
noneOf xs = Parser.satisfy (`Foldable.notElem` xs)
eof :: Monad m => Parser a m () Source #
Succeeds if we are at the end of input, fails otherwise.
>>>
Stream.parse ((,) <$> Parser.satisfy (> 0) <*> Parser.eof) $ Stream.fromList [1]
Right (1,())
satisfy :: Monad m => (a -> Bool) -> Parser a m a Source #
Returns the next element if it passes the predicate, fails otherwise.
>>>
Stream.parse (Parser.satisfy (== 1)) $ Stream.fromList [1,0,1]
Right 1
>>>
toMaybe f x = if f x then Just x else Nothing
>>>
satisfy f = Parser.maybe (toMaybe f)
maybe :: Monad m => (a -> Maybe b) -> Parser a m b Source #
Map a Maybe
returning function on the next element in the stream. The
parser fails if the function returns Nothing
otherwise returns the Just
value.
>>>
toEither = Maybe.maybe (Left "maybe: predicate failed") Right
>>>
maybe f = Parser.either (toEither . f)
>>>
maybe f = Parser.fromFoldMaybe "maybe: predicate failed" (Fold.maybe f)
Pre-release
Sequence parsers (tokenizers)
Parsers chained in series, if one parser terminates the composition terminates. Currently we are using folds to collect the output of the parsers but we can use Parsers instead of folds to make the composition more powerful. For example, we can do:
takeEndByOrMax cond n p = takeEndBy cond (take n p) takeEndByBetween cond m n p = takeEndBy cond (takeBetween m n p) takeWhileBetween cond m n p = takeWhile cond (takeBetween m n p)
lookAhead :: Monad m => Parser a m b -> Parser a m b Source #
Run a parser without consuming the input.
By length
Grab a sequence of input elements without inspecting them
takeBetween :: Monad m => Int -> Int -> Fold m a b -> Parser a m b Source #
takeBetween m n
takes a minimum of m
and a maximum of n
input
elements and folds them using the supplied fold.
Stops after n
elements.
Fails if the stream ends before m
elements could be taken.
Examples: -
>>> :{ takeBetween' low high ls = Stream.parse prsr (Stream.fromList ls) where prsr = Parser.takeBetween low high Fold.toList :}
>>>
takeBetween' 2 4 [1, 2, 3, 4, 5]
Right [1,2,3,4]
>>>
takeBetween' 2 4 [1, 2]
Right [1,2]
>>>
takeBetween' 2 4 [1]
Left (ParseError "takeBetween: Expecting alteast 2 elements, got 1")
>>>
takeBetween' 0 0 [1, 2]
Right []
>>>
takeBetween' 0 1 []
Right []
takeBetween
is the most general take operation, other take operations can
be defined in terms of takeBetween. For example:
>>>
take n = Parser.takeBetween 0 n
>>>
takeEQ n = Parser.takeBetween n n
>>>
takeGE n = Parser.takeBetween n maxBound
Pre-release
takeEQ :: Monad m => Int -> Fold m a b -> Parser a m b Source #
Stops after taking exactly n
input elements.
- Stops - after consuming
n
elements. - Fails - if the stream or the collecting fold ends before it can collect
exactly
n
elements.
>>>
Stream.parse (Parser.takeEQ 2 Fold.toList) $ Stream.fromList [1,0,1]
Right [1,0]
>>>
Stream.parse (Parser.takeEQ 4 Fold.toList) $ Stream.fromList [1,0,1]
Left (ParseError "takeEQ: Expecting exactly 4 elements, input terminated on 3")
takeGE :: Monad m => Int -> Fold m a b -> Parser a m b Source #
Take at least n
input elements, but can collect more.
- Stops - when the collecting fold stops.
- Fails - if the stream or the collecting fold ends before producing
n
elements.
>>>
Stream.parse (Parser.takeGE 4 Fold.toList) $ Stream.fromList [1,0,1]
Left (ParseError "takeGE: Expecting at least 4 elements, input terminated on 3")
>>>
Stream.parse (Parser.takeGE 4 Fold.toList) $ Stream.fromList [1,0,1,0,1]
Right [1,0,1,0,1]
Pre-release
takeP :: Monad m => Int -> Parser a m b -> Parser a m b Source #
Takes at-most n
input elements.
- Stops - when the collecting parser stops.
- Fails - when the collecting parser fails.
>>>
Stream.parse (Parser.takeP 4 (Parser.takeEQ 2 Fold.toList)) $ Stream.fromList [1, 2, 3, 4, 5]
Right [1,2]
>>>
Stream.parse (Parser.takeP 4 (Parser.takeEQ 5 Fold.toList)) $ Stream.fromList [1, 2, 3, 4, 5]
Left (ParseError "takeEQ: Expecting exactly 5 elements, input terminated on 4")
Internal
Exact match
listEq :: (Monad m, Eq a) => [a] -> Parser a m [a] Source #
Match the input sequence with the supplied list and return it if successful.
>>>
listEq = Parser.listEqBy (==)
listEqBy :: Monad m => (a -> a -> Bool) -> [a] -> Parser a m [a] Source #
Match the given sequence of elements using the given comparison function. Returns the original sequence if successful.
Definition:
>>>
listEqBy cmp xs = Parser.streamEqBy cmp (Stream.fromList xs) *> Parser.fromPure xs
Examples:
>>>
Stream.parse (Parser.listEqBy (==) "string") $ Stream.fromList "string"
Right "string"
>>>
Stream.parse (Parser.listEqBy (==) "mismatch") $ Stream.fromList "match"
Left (ParseError "streamEqBy: mismtach occurred")
streamEqBy :: Monad m => (a -> a -> Bool) -> Stream m a -> Parser a m () Source #
Like listEqBy
but uses a stream instead of a list and does not return
the stream.
subsequenceBy :: (a -> a -> Bool) -> Stream m a -> Parser a m () Source #
Match if the input stream is a subsequence of the argument stream i.e. all the elements of the input stream occur, in order, in the argument stream. The elements do not have to occur consecutively. A sequence is considered a subsequence of itself.
By predicate
takeWhile :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b Source #
Collect stream elements until an element fails the predicate. The element on which the predicate fails is returned back to the input stream.
- Stops - when the predicate fails or the collecting fold stops.
- Fails - never.
>>>
Stream.parse (Parser.takeWhile (== 0) Fold.toList) $ Stream.fromList [0,0,1,0,1]
Right [0,0]
>>>
takeWhile cond f = Parser.takeWhileP cond (Parser.fromFold f)
We can implement a breakOn
using takeWhile
:
breakOn p = takeWhile (not p)
takeWhileP :: Monad m => (a -> Bool) -> Parser a m b -> Parser a m b Source #
Like takeWhile
but uses a Parser
instead of a Fold
to collect the
input. The combinator stops when the condition fails or if the collecting
parser stops.
Other interesting parsers can be implemented in terms of this parser:
>>>
takeWhile1 cond p = Parser.takeWhileP cond (Parser.takeBetween 1 maxBound p)
>>>
takeWhileBetween cond m n p = Parser.takeWhileP cond (Parser.takeBetween m n p)
Stops: when the condition fails or the collecting parser stops. Fails: when the collecting parser fails.
Pre-release
takeWhile1 :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b Source #
Like takeWhile
but takes at least one element otherwise fails.
>>>
takeWhile1 cond p = Parser.takeWhileP cond (Parser.takeBetween 1 maxBound p)
dropWhile :: Monad m => (a -> Bool) -> Parser a m () Source #
Drain the input as long as the predicate succeeds, running the effects and discarding the results.
This is also called skipWhile
in some parsing libraries.
>>>
dropWhile p = Parser.takeWhile p Fold.drain
Separated by elements
Separator could be in prefix postion (takeStartBy
), or suffix
position (takeEndBy
). See deintercalate
, sepBy
etc for infix
separator parsing, also see intersperseQuotedBy
fold.
takeEndBy :: Monad m => (a -> Bool) -> Parser a m b -> Parser a m b Source #
takeEndBy cond parser
parses a token that ends by a separator chosen by
the supplied predicate. The separator is also taken with the token.
This can be combined with other parsers to implement other interesting parsers as follows:
>>>
takeEndByLE cond n p = Parser.takeEndBy cond (Parser.fromFold $ Fold.take n p)
>>>
takeEndByBetween cond m n p = Parser.takeEndBy cond (Parser.takeBetween m n p)
>>>
takeEndBy = Parser.takeEndByEsc (const False)
See also "Streamly.Data.Fold.takeEndBy". Unlike the fold, the collecting parser in the takeEndBy parser can decide whether to fail or not if the stream does not end with separator.
Pre-release
takeEndByEsc :: Monad m => (a -> Bool) -> (a -> Bool) -> Parser a m b -> Parser a m b Source #
Like takeEndBy
but the separator elements can be escaped using an
escape char determined by the first predicate. The escape characters are
removed.
pre-release
takeStartBy :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b Source #
Parse a token that starts with an element chosen by the predicate. The parser fails if the input does not start with the selected element.
- Stops - when the predicate succeeds in non-leading position.
- Fails - when the predicate fails in the leading position.
>>>
splitWithPrefix p f = Stream.parseMany (Parser.takeStartBy p f)
Examples: -
>>>
p = Parser.takeStartBy (== ',') Fold.toList
>>>
leadingComma = Stream.parse p . Stream.fromList
>>>
leadingComma "a,b"
Left (ParseError "takeStartBy: missing frame start") ...>>>
leadingComma ",,"
Right ",">>>
leadingComma ",a,b"
Right ",a">>>
leadingComma ""
Right ""
Pre-release
takeStartBy_ :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b Source #
Like takeStartBy
but drops the separator.
>>>
takeStartBy_ isBegin = Parser.takeFramedByGeneric Nothing (Just isBegin) Nothing
takeEitherSepBy :: (a -> Bool) -> Fold m (Either a b) c -> Parser a m c Source #
Take either the separator or the token. Separator is a Left value and token is Right value.
Unimplemented
wordBy :: Monad m => (a -> Bool) -> Fold m a b -> Parser a m b Source #
Like splitOn
but strips leading, trailing, and repeated separators.
Therefore, ".a..b."
having .
as the separator would be parsed as
["a","b"]
. In other words, its like parsing words from whitespace
separated text.
- Stops - when it finds a word separator after a non-word element
- Fails - never.
>>>
wordBy = Parser.wordFramedBy (const False) (const False) (const False)
S.wordsBy pred f = S.parseMany (PR.wordBy pred f)
Grouped by element comparison
groupBy :: Monad m => (a -> a -> Bool) -> Fold m a b -> Parser a m b Source #
Given an input stream [a,b,c,...]
and a comparison function cmp
, the
parser assigns the element a
to the first group, then if a `cmp` b
is
True
b
is also assigned to the same group. If a `cmp` c
is True
then c
is also assigned to the same group and so on. When the comparison
fails the parser is terminated. Each group is folded using the Fold
f
and
the result of the fold is the result of the parser.
- Stops - when the comparison fails.
- Fails - never.
>>>
:{
runGroupsBy eq = Stream.fold Fold.toList . Stream.parseMany (Parser.groupBy eq Fold.toList) . Stream.fromList :}
>>>
runGroupsBy (<) []
[]
>>>
runGroupsBy (<) [1]
[Right [1]]
>>>
runGroupsBy (<) [3, 5, 4, 1, 2, 0]
[Right [3,5,4],Right [1,2],Right [0]]
groupByRolling :: Monad m => (a -> a -> Bool) -> Fold m a b -> Parser a m b Source #
Unlike groupBy
this combinator performs a rolling comparison of two
successive elements in the input stream. Assuming the input stream
is [a,b,c,...]
and the comparison function is cmp
, the parser
first assigns the element a
to the first group, then if a `cmp` b
is
True
b
is also assigned to the same group. If b `cmp` c
is True
then c
is also assigned to the same group and so on. When the comparison
fails the parser is terminated. Each group is folded using the Fold
f
and
the result of the fold is the result of the parser.
- Stops - when the comparison fails.
- Fails - never.
>>>
:{
runGroupsByRolling eq = Stream.fold Fold.toList . Stream.parseMany (Parser.groupByRolling eq Fold.toList) . Stream.fromList :}
>>>
runGroupsByRolling (<) []
[]
>>>
runGroupsByRolling (<) [1]
[Right [1]]
>>>
runGroupsByRolling (<) [3, 5, 4, 1, 2, 0]
[Right [3,5],Right [4],Right [1,2],Right [0]]
Pre-release
groupByRollingEither :: Monad m => (a -> a -> Bool) -> Fold m a b -> Fold m a c -> Parser a m (Either b c) Source #
Like groupByRolling
, but if the predicate is True
then collects using
the first fold as long as the predicate holds True
, if the predicate is
False
collects using the second fold as long as it remains False
.
Returns Left
for the first case and Right
for the second case.
For example, if we want to detect sorted sequences in a stream, both ascending and descending cases we can use 'groupByRollingEither (<=) Fold.toList Fold.toList'.
Pre-release
Framed by elements
Also see intersperseQuotedBy
fold.
Framed by a one or more ocurrences of a separator around a word like
spaces or quotes. No nesting.
:: Monad m | |
=> (a -> Bool) | Matches escape elem? |
-> (a -> Bool) | Matches left quote? |
-> (a -> Bool) | matches right quote? |
-> (a -> Bool) | matches word separator? |
-> Fold m a b | |
-> Parser a m b |
Like wordBy
but treats anything inside a pair of quotes as a single
word, the quotes can be escaped by an escape character. Recursive quotes
are possible if quote begin and end characters are different, quotes must be
balanced. Outermost quotes are stripped.
>>>
braces = Parser.wordFramedBy (== '\\') (== '{') (== '}') isSpace Fold.toList
>>>
Stream.parse braces $ Stream.fromList "{ab} cd"
Right "ab">>>
Stream.parse braces $ Stream.fromList "{ab}{cd}"
Right "abcd">>>
Stream.parse braces $ Stream.fromList "a{b} cd"
Right "ab">>>
Stream.parse braces $ Stream.fromList "a{{b}} cd"
Right "a{b}"
>>>
quotes = Parser.wordFramedBy (== '\\') (== '"') (== '"') isSpace Fold.toList
>>>
Stream.parse quotes $ Stream.fromList "\"a\"\"b\""
Right "ab"
:: (Monad m, Eq a) | |
=> Bool | Retain the quotes and escape chars in the output |
-> (a -> a -> Maybe a) | quote char -> escaped char -> translated char |
-> a | Matches an escape elem? |
-> (a -> Maybe a) | If left quote, return right quote, else Nothing. |
-> (a -> Bool) | Matches a word separator? |
-> Fold m a b | |
-> Parser a m b |
Quote and bracket aware word splitting with escaping. Like wordBy
but
word separators within specified quotes or brackets are ignored. Quotes and
escape characters can be processed. If the end quote is different from the
start quote it is called a bracket. The following quoting rules apply:
- In an unquoted string a character may be preceded by an escape character. The escape character is removed and the character following it is treated literally with no special meaning e.g. e.g. h e l l o is a single word, n is same as n.
- Any part of the word can be placed within quotes. Inside quotes all characters are treated literally with no special meaning. Quoting character itself cannot be used within quotes unless escape processing within quotes is applied to allow it.
- Optionally escape processing for quoted part can be specified. Escape character has no special meaning inside quotes unless it is followed by a character that has a escape translation specified, in that case the escape character is removed, and the specified translation is applied to the character following it. This can be used to escape the quoting character itself within quotes.
- There can be multiple quoting characters, when a quote starts, all other quoting characters within that quote lose any special meaning until the quote is closed.
- A starting quote char without an ending char generates a parse error. An ending bracket char without a corresponding bracket begin is ignored.
- Brackets can be nested.
We should note that unquoted and quoted escape processing are different. In unquoted part escape character is always removed. In quoted part it is removed only if followed by a special meaning character. This is consistent with how shell performs escape processing.
:: (Monad m, Eq a) | |
=> a | Escape char |
-> (a -> Maybe a) | If left quote, return right quote, else Nothing. |
-> (a -> Bool) | Matches a word separator? |
-> Fold m a b | |
-> Parser a m b |
wordWithQuotes
without processing the quotes and escape function
supplied to escape the quote char within a quote. Can be used to parse words
keeping the quotes and escapes intact.
>>>
wordKeepQuotes = Parser.wordWithQuotes True (\_ _ -> Nothing)
:: (Monad m, Eq a) | |
=> a | Escape char |
-> (a -> Maybe a) | If left quote, return right quote, else Nothing. |
-> (a -> Bool) | Matches a word separator? |
-> Fold m a b | |
-> Parser a m b |
wordWithQuotes
with quote processing applied and escape function
supplied to escape the quote char within a quote. Can be ysed to parse words
and processing the quoting and escaping at the same time.
>>>
wordProcessQuotes = Parser.wordWithQuotes False (\_ _ -> Nothing)
takeFramedBy_ :: Monad m => (a -> Bool) -> (a -> Bool) -> Fold m a b -> Parser a m b Source #
takeFramedBy_ isBegin isEnd fold
parses a token framed by a begin and an
end predicate.
>>>
takeFramedBy_ = Parser.takeFramedByEsc_ (const False)
takeFramedByEsc_ :: Monad m => (a -> Bool) -> (a -> Bool) -> (a -> Bool) -> Fold m a b -> Parser a m b Source #
takeFramedByEsc_ isEsc isBegin isEnd fold
parses a token framed using a
begin and end predicate, and an escape character. The frame begin and end
characters lose their special meaning if preceded by the escape character.
Nested frames are allowed if begin and end markers are different, nested frames must be balanced unless escaped, nested frame markers are emitted as it is.
For example,
>>>
p = Parser.takeFramedByEsc_ (== '\\') (== '{') (== '}') Fold.toList
>>>
Stream.parse p $ Stream.fromList "{hello}"
Right "hello">>>
Stream.parse p $ Stream.fromList "{hello {world}}"
Right "hello {world}">>>
Stream.parse p $ Stream.fromList "{hello \\{world}"
Right "hello {world">>>
Stream.parse p $ Stream.fromList "{hello {world}"
Left (ParseError "takeFramedByEsc_: missing frame end")
Pre-release
takeFramedByGeneric :: Monad m => Maybe (a -> Bool) -> Maybe (a -> Bool) -> Maybe (a -> Bool) -> Fold m a b -> Parser a m b Source #
:: (Monad m, Eq a) | |
=> (a -> Bool) | escape char |
-> (a -> Bool) | quote char, to quote inside brackets |
-> a | Block opening bracket |
-> a | Block closing bracket |
-> Fold m a b | |
-> Parser a m b |
Parse a block enclosed within open, close brackets. Block contents may be quoted, brackets inside quotes are ignored. Quoting characters can be used within quotes if escaped. A block can have a nested block inside it.
Quote begin and end chars are the same. Block brackets and quote chars must not overlap. Block start and end brackets must be different for nesting blocks within blocks.
>>>
p = Parser.blockWithQuotes (== '\\') (== '"') '{' '}' Fold.toList
>>>
Stream.parse p $ Stream.fromList "{msg: \"hello world\"}"
Right "msg: \"hello world\""
Spanning
span :: Monad m => (a -> Bool) -> Fold m a b -> Fold m a c -> Parser a m (b, c) Source #
span p f1 f2
composes folds f1
and f2
such that f1
consumes the
input as long as the predicate p
is True
. f2
consumes the rest of the
input.
> let span_ p xs = Stream.parse (Parser.span p Fold.toList Fold.toList) $ Stream.fromList xs > span_ (< 1) 1,2,3 > span_ (< 2) 1,2,3 > span_ (< 4) 1,2,3
Pre-release
spanBy :: Monad m => (a -> a -> Bool) -> Fold m a b -> Fold m a c -> Parser a m (b, c) Source #
Break the input stream into two groups, the first group takes the input as
long as the predicate applied to the first element of the stream and next
input element holds True
, the second group takes the rest of the input.
Pre-release
spanByRolling :: Monad m => (a -> a -> Bool) -> Fold m a b -> Fold m a c -> Parser a m (b, c) Source #
Like spanBy
but applies the predicate in a rolling fashion i.e.
predicate is applied to the previous and the next input elements.
Pre-release
Binary Combinators
Sequential Applicative
splitWith :: Monad m => (a -> b -> c) -> Parser x m a -> Parser x m b -> Parser x m c Source #
Sequential parser application.
Apply two parsers sequentially to an input stream. The first parser runs and processes the input, the remaining input is then passed to the second parser. If both parsers succeed, their outputs are combined using the supplied function. If either parser fails, the operation fails.
This implementation is strict in the second argument, therefore, the following will fail:
>>>
Stream.parse (Parser.splitWith const (Parser.satisfy (> 0)) undefined) $ Stream.fromList [1]
*** Exception: Prelude.undefined ...
Although this implementation allows stream fusion, it has quadratic complexity, making it suitable only for a small number of compositions. As a thumb rule use it for less than 8 compositions, use ParserK otherwise.
Below are some common idioms that can be expressed using splitWith
and
other parser primitives:
>>>
span p f1 f2 = Parser.splitWith (,) (Parser.takeWhile p f1) (Parser.fromFold f2)
>>>
spanBy eq f1 f2 = Parser.splitWith (,) (Parser.groupBy eq f1) (Parser.fromFold f2)
Pre-release
split_ :: Monad m => Parser x m a -> Parser x m b -> Parser x m b Source #
Sequential parser application ignoring the output of the first parser. Apply two parsers sequentially to an input stream. The input is provided to the first parser, when it is done the remaining input is provided to the second parser. The output of the parser is the output of the second parser. The operation fails if any of the parsers fail.
This implementation is strict in the second argument, therefore, the following will fail:
>>>
Stream.parse (Parser.split_ (Parser.satisfy (> 0)) undefined) $ Stream.fromList [1]
*** Exception: Prelude.undefined ...
Although this implementation allows stream fusion, it has quadratic complexity, making it suitable only for a small number of compositions. As a thumb rule use it for less than 8 compositions, use ParserK otherwise.
Pre-release
Sequential Alternative
alt :: Monad m => Parser x m a -> Parser x m a -> Parser x m a Source #
Sequential alternative. The input is first passed to the first parser, and if it succeeds, the result is returned. However, if the first parser fails, the parser driver backtracks and tries the same input on the second parser, returning the result if it succeeds.
Note: This implementation is not lazy in the second argument. The following will fail:
> Stream.parse (Parser.satisfy (> 0) `Parser.alt` undefined) $ Stream.fromList [1..10]
- ** Exception: Prelude.undefined
Although this implementation allows stream fusion, it has quadratic complexity, making it suitable only for a small number of compositions. As a thumb rule use it for less than 8 compositions, use ParserK otherwise.
Time Complexity: O(n^2) where n is the number of compositions.
Pre-release
N-ary Combinators
Sequential Collection
sequence :: Monad m => Stream m (Parser a m b) -> Fold m b c -> Parser a m c Source #
sequence f p
collects sequential parses of parsers in a
serial stream p
using the fold f
. Fails if the input ends or any
of the parsers fail.
Pre-release
Sequential Repetition
count :: Int -> Parser a m b -> Fold m b c -> Parser a m c Source #
count n f p
collects exactly n
sequential parses of parser p
using
the fold f
. Fails if the input ends or the parser fails before n
results are collected.
>>>
count n = Parser.countBetween n n
>>>
count n p f = Parser.manyP p (Parser.takeEQ n f)
Unimplemented
countBetween :: Int -> Int -> Parser a m b -> Fold m b c -> Parser a m c Source #
countBetween m n f p
collects between m
and n
sequential parses of
parser p
using the fold f
. Stop after collecting n
results. Fails if
the input ends or the parser fails before m
results are collected.
>>>
countBetween m n p f = Parser.manyP p (Parser.takeBetween m n f)
Unimplemented
many :: Monad m => Parser a m b -> Fold m b c -> Parser a m c Source #
Collect zero or more parses. Apply the supplied parser repeatedly on the input stream and push the parse results to a downstream fold.
Stops: when the downstream fold stops or the parser fails. Fails: never, produces zero or more results.
>>>
many = Parser.countBetween 0 maxBound
Compare with many
.
some :: Monad m => Parser a m b -> Fold m b c -> Parser a m c Source #
Collect one or more parses. Apply the supplied parser repeatedly on the input stream and push the parse results to a downstream fold.
Stops: when the downstream fold stops or the parser fails. Fails: if it stops without producing a single result.
>>>
some p f = Parser.manyP p (Parser.takeGE 1 f)
>>>
some = Parser.countBetween 1 maxBound
Compare with some
.
Interleaved Repetition
deintercalate :: Monad m => Parser a m x -> Parser a m y -> Fold m (Either x y) z -> Parser a m z Source #
Apply two parsers alternately to an input stream. The input stream is considered an interleaving of two patterns. The two parsers represent the two patterns. Parsing starts at the first parser and stops at the first parser. It can be used to parse a infix style pattern e.g. p1 p2 p1 . Empty input or single parse of the first parser is accepted.
>>>
p1 = Parser.takeWhile1 (not . (== '+')) Fold.toList
>>>
p2 = Parser.satisfy (== '+')
>>>
p = Parser.deintercalate p1 p2 Fold.toList
>>>
Stream.parse p $ Stream.fromList ""
Right []>>>
Stream.parse p $ Stream.fromList "1"
Right [Left "1"]>>>
Stream.parse p $ Stream.fromList "1+"
Right [Left "1"]>>>
Stream.parse p $ Stream.fromList "1+2+3"
Right [Left "1",Right '+',Left "2",Right '+',Left "3"]
deintercalate1 :: Monad m => Parser a m x -> Parser a m y -> Fold m (Either x y) z -> Parser a m z Source #
Apply two parsers alternately to an input stream. The input stream is considered an interleaving of two patterns. The two parsers represent the two patterns. Parsing starts at the first parser and stops at the first parser. It can be used to parse a infix style pattern e.g. p1 p2 p1 . Empty input or single parse of the first parser is accepted.
>>>
p1 = Parser.takeWhile1 (not . (== '+')) Fold.toList
>>>
p2 = Parser.satisfy (== '+')
>>>
p = Parser.deintercalate1 p1 p2 Fold.toList
>>>
Stream.parse p $ Stream.fromList ""
Left (ParseError "takeWhile1: end of input")>>>
Stream.parse p $ Stream.fromList "1"
Right [Left "1"]>>>
Stream.parse p $ Stream.fromList "1+"
Right [Left "1"]>>>
Stream.parse p $ Stream.fromList "1+2+3"
Right [Left "1",Right '+',Left "2",Right '+',Left "3"]
deintercalateAll :: Monad m => Parser a m x -> Parser a m y -> Fold m (Either x y) z -> Parser a m z Source #
Like deintercalate
but the entire input must satisfy the pattern
otherwise the parser fails. This is many times faster than deintercalate.
>>>
p1 = Parser.takeWhile1 (not . (== '+')) Fold.toList
>>>
p2 = Parser.satisfy (== '+')
>>>
p = Parser.deintercalateAll p1 p2 Fold.toList
>>>
Stream.parse p $ Stream.fromList ""
Right []>>>
Stream.parse p $ Stream.fromList "1"
Right [Left "1"]>>>
Stream.parse p $ Stream.fromList "1+"
Left (ParseError "takeWhile1: end of input")>>>
Stream.parse p $ Stream.fromList "1+2+3"
Right [Left "1",Right '+',Left "2",Right '+',Left "3"]
Special cases
TODO: traditional implmentations of these may be of limited use. For
example, consider parsing lines separated by \r\n
. The main parser
will have to detect and exclude the sequence \r\n
anyway so that we
can apply the "sep" parser.
We can instead implement these as special cases of deintercalate.
, endBy , sepEndBy , beginBy , sepBeginBy , sepAroundBy
sepBy1 :: Monad m => Parser a m b -> Parser a m x -> Fold m b c -> Parser a m c Source #
Like sepBy
but requires at least one successful parse.
Definition:
>>>
sepBy1 p1 p2 f = Parser.deintercalate1 p1 p2 (Fold.catLefts f)
Examples:
>>>
p1 = Parser.takeWhile1 (not . (== '+')) Fold.toList
>>>
p2 = Parser.satisfy (== '+')
>>>
p = Parser.sepBy1 p1 p2 Fold.toList
>>>
Stream.parse p $ Stream.fromList ""
Left (ParseError "takeWhile1: end of input")>>>
Stream.parse p $ Stream.fromList "1"
Right ["1"]>>>
Stream.parse p $ Stream.fromList "1+"
Right ["1"]>>>
Stream.parse p $ Stream.fromList "1+2+3"
Right ["1","2","3"]
sepBy :: Monad m => Parser a m b -> Parser a m x -> Fold m b c -> Parser a m c Source #
Apply two parsers alternately to an input stream. Parsing starts at the first parser and stops at the first parser. The output of the first parser is emiited and the output of the second parser is discarded. It can be used to parse a infix style pattern e.g. p1 p2 p1 . Empty input or single parse of the first parser is accepted.
Definitions:
>>>
sepBy p1 p2 f = Parser.deintercalate p1 p2 (Fold.catLefts f)
>>>
sepBy p1 p2 f = Parser.sepBy1 p1 p2 f <|> Parser.fromEffect (Fold.extractM f)
Examples:
>>>
p1 = Parser.takeWhile1 (not . (== '+')) Fold.toList
>>>
p2 = Parser.satisfy (== '+')
>>>
p = Parser.sepBy p1 p2 Fold.toList
>>>
Stream.parse p $ Stream.fromList ""
Right []>>>
Stream.parse p $ Stream.fromList "1"
Right ["1"]>>>
Stream.parse p $ Stream.fromList "1+"
Right ["1"]>>>
Stream.parse p $ Stream.fromList "1+2+3"
Right ["1","2","3"]
sepByAll :: Monad m => Parser a m b -> Parser a m x -> Fold m b c -> Parser a m c Source #
Non-backtracking version of sepBy. Several times faster.
manyTill :: Monad m => Parser a m b -> Parser a m x -> Fold m b c -> Parser a m c Source #
manyTill chunking test f
tries the parser test
on the input, if test
fails it backtracks and tries chunking
, after chunking
succeeds test
is
tried again and so on. The parser stops when test
succeeds. The output of
test
is discarded and the output of chunking
is accumulated by the
supplied fold. The parser fails if chunking
fails.
Stops when the fold f
stops.
manyThen :: Parser a m b -> Parser a m x -> Fold m b c -> Parser a m c Source #
manyThen f collect recover
repeats the parser collect
on the input and
collects the output in the supplied fold. If the the parser collect
fails,
parser recover
is run until it stops and then we start repeating the
parser collect
again. The parser fails if the recovery parser fails.
For example, this can be used to find a key frame in a video stream after an error.
Unimplemented
Interleaved collection
- Round robin
- Priority based
roundRobin :: t (Parser a m b) -> Fold m b c -> Parser a m c Source #
Apply a collection of parsers to an input stream in a round robin fashion. Each parser is applied until it stops and then we repeat starting with the the first parser again.
Unimplemented
Collection of Alternatives
Unimplemented
, shortestN , longestN , fastestN -- first N successful in time , choiceN -- first N successful in position
, choice -- first successful in position
Repeated Alternatives
retryMaxTotal :: Int -> Parser a m b -> Fold m b c -> Parser a m c Source #
Keep trying a parser up to a maximum of n
failures. When the parser
fails the input consumed till now is dropped and the new instance is tried
on the fresh input.
Unimplemented
retryMaxSuccessive :: Int -> Parser a m b -> Fold m b c -> Parser a m c Source #
Like retryMaxTotal
but aborts after n
successive failures.
Unimplemented
retry :: Parser a m b -> Parser a m b Source #
Keep trying a parser until it succeeds. When the parser fails the input consumed till now is dropped and the new instance is tried on the fresh input.
Unimplemented
Zipping Input
zip :: Monad m => Stream m a -> Fold m (a, b) x -> Parser b m x Source #
Zip the input of a fold with a stream.
Pre-release
indexed :: forall m a b. Monad m => Fold m (Int, a) b -> Parser a m b Source #
Pair each element of a fold input with its index, starting from index 0.
Pre-release
makeIndexFilter :: (Fold m (s, a) b -> Parser a m b) -> (((s, a) -> Bool) -> Fold m (s, a) b -> Fold m (s, a) b) -> ((s, a) -> Bool) -> Fold m a b -> Parser a m b Source #
makeIndexFilter indexer filter predicate
generates a fold filtering
function using a fold indexing function that attaches an index to each input
element and a filtering function that filters using @(index, element) ->
Bool) as predicate.
For example:
filterWithIndex = makeIndexFilter indexed filter filterWithAbsTime = makeIndexFilter timestamped filter filterWithRelTime = makeIndexFilter timeIndexed filter
Pre-release
sampleFromthen :: Monad m => Int -> Int -> Fold m a b -> Parser a m b Source #
sampleFromthen offset stride
samples the element at offset
index and
then every element at strides of stride
.
Pre-release