Copyright | © 2015–2017 Megaparsec contributors © 2007 Paolo Martini © 1999–2001 Daan Leijen |
---|---|
License | FreeBSD |
Maintainer | Mark Karpov <markkarpov92@gmail.com> |
Stability | experimental |
Portability | portable |
Safe Haskell | None |
Language | Haskell2010 |
This module includes everything you need to get started writing a parser. If you are new to Megaparsec and don't know where to begin, take a look at the tutorials https://markkarpov.com/learn-haskell.html#megaparsec-tutorials.
By default this module is set up to parse character data. If you'd like to parse the result of your own tokenizer you should start with the following imports:
import Text.Megaparsec.Prim import Text.Megaparsec.Combinator
Then you can implement your own version of satisfy
on top of the
token
primitive, etc.
The typical import section looks like this:
import Text.Megaparsec import Text.Megaparsec.String -- import Text.Megaparsec.ByteString -- import Text.Megaparsec.ByteString.Lazy -- import Text.Megaparsec.Text -- import Text.Megaparsec.Text.Lazy
As you can see the second import depends on the data type you want to use
as input stream. It just defines the useful type-synonym Parser
.
Megaparsec 5 uses some type-level machinery to provide flexibility
without compromising on type safety. Thus type signatures are sometimes
necessary to avoid ambiguous types. If you're seeing a error message that
reads like “Ambiguous type variable e0
arising from … prevents the
constraint (ErrorComponent e0)
from being resolved”, you need to give
an explicit signature to your parser to resolve the ambiguity. It's a
good idea to provide type signatures for all top-level definitions.
Megaparsec is capable of a lot. Apart from this standard functionality you can parse permutation phrases with Text.Megaparsec.Perm, expressions with Text.Megaparsec.Expr, and even entire languages with Text.Megaparsec.Lexer. These modules should be imported explicitly along with the two modules mentioned above.
- type Parsec e s = ParsecT e s Identity
- data ParsecT e s m a
- parse :: Parsec e s a -> String -> s -> Either (ParseError (Token s) e) a
- parseMaybe :: (ErrorComponent e, Stream s) => Parsec e s a -> s -> Maybe a
- parseTest :: (ShowErrorComponent e, Ord (Token s), ShowToken (Token s), Show a) => Parsec e s a -> s -> IO ()
- runParser :: Parsec e s a -> String -> s -> Either (ParseError (Token s) e) a
- runParser' :: Parsec e s a -> State s -> (State s, Either (ParseError (Token s) e) a)
- runParserT :: Monad m => ParsecT e s m a -> String -> s -> m (Either (ParseError (Token s) e) a)
- runParserT' :: Monad m => ParsecT e s m a -> State s -> m (State s, Either (ParseError (Token s) e) a)
- (<|>) :: Alternative f => forall a. f a -> f a -> f a
- many :: Alternative f => forall a. f a -> f [a]
- some :: Alternative f => forall a. f a -> f [a]
- optional :: Alternative f => f a -> f (Maybe a)
- unexpected :: MonadParsec e s m => ErrorItem (Token s) -> m a
- match :: MonadParsec e s m => m a -> m ([Token s], a)
- region :: MonadParsec e s m => (ParseError (Token s) e -> ParseError (Token s) e) -> m a -> m a
- failure :: MonadParsec e s m => Set (ErrorItem (Token s)) -> Set (ErrorItem (Token s)) -> Set e -> m a
- (<?>) :: MonadParsec e s m => m a -> String -> m a
- label :: MonadParsec e s m => String -> m a -> m a
- hidden :: MonadParsec e s m => m a -> m a
- try :: MonadParsec e s m => m a -> m a
- lookAhead :: MonadParsec e s m => m a -> m a
- notFollowedBy :: MonadParsec e s m => m a -> m ()
- withRecovery :: MonadParsec e s m => (ParseError (Token s) e -> m a) -> m a -> m a
- observing :: MonadParsec e s m => m a -> m (Either (ParseError (Token s) e) a)
- eof :: MonadParsec e s m => m ()
- token :: MonadParsec e s m => (Token s -> Either (Set (ErrorItem (Token s)), Set (ErrorItem (Token s)), Set e) a) -> Maybe (Token s) -> m a
- tokens :: MonadParsec e s m => (Token s -> Token s -> Bool) -> [Token s] -> m [Token s]
- between :: Applicative m => m open -> m close -> m a -> m a
- choice :: (Foldable f, Alternative m) => f (m a) -> m a
- count :: Applicative m => Int -> m a -> m [a]
- count' :: Alternative m => Int -> Int -> m a -> m [a]
- eitherP :: Alternative m => m a -> m b -> m (Either a b)
- endBy :: Alternative m => m a -> m sep -> m [a]
- endBy1 :: Alternative m => m a -> m sep -> m [a]
- manyTill :: Alternative m => m a -> m end -> m [a]
- someTill :: Alternative m => m a -> m end -> m [a]
- option :: Alternative m => a -> m a -> m a
- sepBy :: Alternative m => m a -> m sep -> m [a]
- sepBy1 :: Alternative m => m a -> m sep -> m [a]
- sepEndBy :: Alternative m => m a -> m sep -> m [a]
- sepEndBy1 :: Alternative m => m a -> m sep -> m [a]
- skipMany :: Alternative m => m a -> m ()
- skipSome :: Alternative m => m a -> m ()
- newline :: (MonadParsec e s m, Token s ~ Char) => m Char
- crlf :: (MonadParsec e s m, Token s ~ Char) => m String
- eol :: (MonadParsec e s m, Token s ~ Char) => m String
- tab :: (MonadParsec e s m, Token s ~ Char) => m Char
- space :: (MonadParsec e s m, Token s ~ Char) => m ()
- controlChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- spaceChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- upperChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- lowerChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- letterChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- alphaNumChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- printChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- digitChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- octDigitChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- hexDigitChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- markChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- numberChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- punctuationChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- symbolChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- separatorChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- asciiChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- latin1Char :: (MonadParsec e s m, Token s ~ Char) => m Char
- charCategory :: (MonadParsec e s m, Token s ~ Char) => GeneralCategory -> m Char
- char :: (MonadParsec e s m, Token s ~ Char) => Char -> m Char
- char' :: (MonadParsec e s m, Token s ~ Char) => Char -> m Char
- anyChar :: (MonadParsec e s m, Token s ~ Char) => m Char
- oneOf :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char
- oneOf' :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char
- noneOf :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char
- noneOf' :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char
- satisfy :: (MonadParsec e s m, Token s ~ Char) => (Char -> Bool) -> m Char
- string :: (MonadParsec e s m, Token s ~ Char) => String -> m String
- string' :: (MonadParsec e s m, Token s ~ Char) => String -> m String
- data Pos
- mkPos :: (Integral a, MonadThrow m) => a -> m Pos
- unPos :: Pos -> Word
- unsafePos :: Word -> Pos
- data InvalidPosException = InvalidPosException
- data SourcePos = SourcePos {
- sourceName :: FilePath
- sourceLine :: !Pos
- sourceColumn :: !Pos
- initialPos :: String -> SourcePos
- sourcePosPretty :: SourcePos -> String
- data ErrorItem t
- class Ord e => ErrorComponent e where
- data Dec
- data ParseError t e = ParseError {
- errorPos :: NonEmpty SourcePos
- errorUnexpected :: Set (ErrorItem t)
- errorExpected :: Set (ErrorItem t)
- errorCustom :: Set e
- class ShowToken a where
- class Ord a => ShowErrorComponent a where
- parseErrorPretty :: (Ord t, ShowToken t, ShowErrorComponent e) => ParseError t e -> String
- dbg :: forall e s m a. (Stream s, ShowToken (Token s), ShowErrorComponent e, Show a) => String -> ParsecT e s m a -> ParsecT e s m a
- class Ord (Token s) => Stream s where
- data State s = State {
- stateInput :: s
- statePos :: NonEmpty SourcePos
- stateTokensProcessed :: !Word
- stateTabWidth :: Pos
- getInput :: MonadParsec e s m => m s
- setInput :: MonadParsec e s m => s -> m ()
- getPosition :: MonadParsec e s m => m SourcePos
- getNextTokenPosition :: forall e s m. MonadParsec e s m => m (Maybe SourcePos)
- setPosition :: MonadParsec e s m => SourcePos -> m ()
- pushPosition :: MonadParsec e s m => SourcePos -> m ()
- popPosition :: MonadParsec e s m => m ()
- getTokensProcessed :: MonadParsec e s m => m Word
- setTokensProcessed :: MonadParsec e s m => Word -> m ()
- getTabWidth :: MonadParsec e s m => m Pos
- setTabWidth :: MonadParsec e s m => Pos -> m ()
- getParserState :: MonadParsec e s m => m (State s)
- setParserState :: MonadParsec e s m => State s -> m ()
- updateParserState :: MonadParsec e s m => (State s -> State s) -> m ()
Running parser
type Parsec e s = ParsecT e s Identity Source #
Parsec
is a non-transformer variant of the more general ParsecT
monad transformer.
ParsecT e s m a
is a parser with custom data component of error e
,
stream type s
, underlying monad m
and return type a
.
:: Parsec e s a | Parser to run |
-> String | Name of source file |
-> s | Input for parser |
-> Either (ParseError (Token s) e) a |
parse p file input
runs parser p
over Identity
(see runParserT
if you're using the ParsecT
monad transformer; parse
itself is just a
synonym for runParser
). It returns either a ParseError
(Left
) or a
value of type a
(Right
). parseErrorPretty
can be used to turn
ParseError
into the string representation of the error message. See
Text.Megaparsec.Error if you need to do more advanced error analysis.
main = case (parse numbers "" "11,2,43") of Left err -> putStr (parseErrorPretty err) Right xs -> print (sum xs) numbers = integer `sepBy` char ','
parseMaybe :: (ErrorComponent e, Stream s) => Parsec e s a -> s -> Maybe a Source #
parseMaybe p input
runs the parser p
on input
and returns the
result inside Just
on success and Nothing
on failure. This function
also parses eof
, so if the parser doesn't consume all of its input, it
will fail.
The function is supposed to be useful for lightweight parsing, where error messages (and thus file name) are not important and entire input should be parsed. For example it can be used when parsing of a single number according to specification of its format is desired.
:: (ShowErrorComponent e, Ord (Token s), ShowToken (Token s), Show a) | |
=> Parsec e s a | Parser to run |
-> s | Input for parser |
-> IO () |
The expression parseTest p input
applies the parser p
against input
input
and prints the result to stdout. Useful for testing.
:: Parsec e s a | Parser to run |
-> String | Name of source file |
-> s | Input for parser |
-> Either (ParseError (Token s) e) a |
runParser p file input
runs parser p
on the input stream of tokens
input
, obtained from source file
. The file
is only used in error
messages and may be the empty string. Returns either a ParseError
(Left
) or a value of type a
(Right
).
parseFromFile p file = runParser p file <$> readFile file
:: Monad m | |
=> ParsecT e s m a | Parser to run |
-> String | Name of source file |
-> s | Input for parser |
-> m (Either (ParseError (Token s) e) a) |
runParserT p file input
runs parser p
on the input list of tokens
input
, obtained from source file
. The file
is only used in error
messages and may be the empty string. Returns a computation in the
underlying monad m
that returns either a ParseError
(Left
) or a
value of type a
(Right
).
:: Monad m | |
=> ParsecT e s m a | Parser to run |
-> State s | Initial state |
-> m (State s, Either (ParseError (Token s) e) a) |
This function is similar to runParserT
, but like runParser'
it
accepts and returns parser state. This is thus the most general way to
run a parser.
Since: 4.2.0
Combinators
(<|>) :: Alternative f => forall a. f a -> f a -> f a #
An associative binary operation
This combinator implements choice. The parser p <|> q
first applies
p
. If it succeeds, the value of p
is returned. If p
fails
without consuming any input, parser q
is tried.
The parser is called predictive since q
is only tried when parser p
didn't consume any input (i.e. the look ahead is 1). This
non-backtracking behaviour allows for both an efficient implementation of
the parser combinators and the generation of good error messages.
many :: Alternative f => forall a. f a -> f [a] #
Zero or more.
many p
applies the parser p
zero or more times and returns a list
of the returned values of p
. Note that if the p
parser fails
consuming input, then the entire many p
parser fails with the error
message p
produced instead of just stopping iterating. In these cases
wrapping p
with try
may be desirable.
identifier = (:) <$> letter <*> many (alphaNumChar <|> char '_')
some :: Alternative f => forall a. f a -> f [a] #
One or more.
some p
applies the parser p
one or more times and returns a list of
the returned values of p
. The note about behavior of the combinator in
the case when p
fails consuming input (see many
) applies to some
as well.
word = some letter
optional :: Alternative f => f a -> f (Maybe a) #
One or none.
optional p
tries to apply the parser p
. It will parse p
or nothing.
It only fails if p
fails after consuming input. On success result of
p
is returned inside of Just
, on failure Nothing
is returned.
unexpected :: MonadParsec e s m => ErrorItem (Token s) -> m a Source #
The parser unexpected item
fails with an error message telling about
unexpected item item
without consuming any input.
match :: MonadParsec e s m => m a -> m ([Token s], a) Source #
Return both the result of a parse and the list of tokens that were
consumed during parsing. This relies on the change of the
stateTokensProcessed
value to evaluate how many tokens were consumed.
Since: 5.3.0
:: MonadParsec e s m | |
=> (ParseError (Token s) e -> ParseError (Token s) e) | How to process |
-> m a | The “region” that processing applies to |
-> m a |
Specify how to process ParseError
s that happen inside of this
wrapper. As a side effect of the current implementation changing
errorPos
with this combinator will also change the final statePos
in
the parser state.
Since: 5.3.0
failure :: MonadParsec e s m => Set (ErrorItem (Token s)) -> Set (ErrorItem (Token s)) -> Set e -> m a Source #
The most general way to stop parsing and report a ParseError
.
unexpected
is defined in terms of this function:
unexpected item = failure (Set.singleton item) Set.empty Set.empty
Since: 4.2.0
(<?>) :: MonadParsec e s m => m a -> String -> m a infix 0 Source #
A synonym for label
in the form of an operator.
label :: MonadParsec e s m => String -> m a -> m a Source #
The parser label name p
behaves as parser p
, but whenever the
parser p
fails without consuming any input, it replaces names of
“expected” tokens with the name name
.
MonadParsec e s m => m a -> m a Source #
::hidden p
behaves just like parser p
, but it doesn't show any
“expected” tokens in error message when p
fails.
try :: MonadParsec e s m => m a -> m a Source #
The parser try p
behaves like parser p
, except that it backtracks
the parser state when p
fails (either consuming input or not).
This combinator is used whenever arbitrary look ahead is needed. Since
it pretends that it hasn't consumed any input when p
fails, the
(<|>
) combinator will try its second alternative even when the
first parser failed while consuming input.
For example, here is a parser that is supposed to parse the word “let” or the word “lexical”:
>>>
parseTest (string "let" <|> string "lexical") "lexical"
1:1: unexpected "lex" expecting "let"
What happens here? The first parser consumes “le” and fails (because it
doesn't see a “t”). The second parser, however, isn't tried, since the
first parser has already consumed some input! try
fixes this behavior
and allows backtracking to work:
>>>
parseTest (try (string "let") <|> string "lexical") "lexical"
"lexical"
try
also improves error messages in case of overlapping alternatives,
because Megaparsec's hint system can be used:
>>>
parseTest (try (string "let") <|> string "lexical") "le"
1:1: unexpected "le" expecting "let" or "lexical"
Please note that as of Megaparsec 4.4.0, string
backtracks
automatically (see tokens
), so it does not need try
. However, the
examples above demonstrate the idea behind try
so well that it was
decided to keep them. You still need to use try
when your
alternatives are complex, composite parsers.
lookAhead :: MonadParsec e s m => m a -> m a Source #
If p
in lookAhead p
succeeds (either consuming input or not) the
whole parser behaves like p
succeeded without consuming anything
(parser state is not updated as well). If p
fails, lookAhead
has no
effect, i.e. it will fail consuming input if p
fails consuming input.
Combine with try
if this is undesirable.
notFollowedBy :: MonadParsec e s m => m a -> m () Source #
notFollowedBy p
only succeeds when the parser p
fails. This
parser never consumes any input and never modifies parser state. It
can be used to implement the “longest match” rule.
withRecovery :: MonadParsec e s m => (ParseError (Token s) e -> m a) -> m a -> m a Source #
withRecovery r p
allows continue parsing even if parser p
fails.
In this case r
is called with the actual ParseError
as its
argument. Typical usage is to return a value signifying failure to
parse this particular object and to consume some part of the input up
to the point where the next object starts.
Note that if r
fails, original error message is reported as if
without withRecovery
. In no way recovering parser r
can influence
error messages.
Since: 4.4.0
observing :: MonadParsec e s m => m a -> m (Either (ParseError (Token s) e) a) Source #
observing p
allows to “observe” failure of the p
parser, should
it happen, without actually ending parsing, but instead getting the
ParseError
in Left
. On success parsed value is returned in Right
as usual. Note that this primitive just allows you to observe parse
errors as they happen, it does not backtrack or change how the p
parser works in any way.
Since: 5.1.0
eof :: MonadParsec e s m => m () Source #
This parser only succeeds at the end of the input.
token :: MonadParsec e s m => (Token s -> Either (Set (ErrorItem (Token s)), Set (ErrorItem (Token s)), Set e) a) -> Maybe (Token s) -> m a Source #
The parser token test mrep
accepts a token t
with result x
when
the function test t
returns
. Right
xmrep
may provide
representation of the token to report in error messages when input
stream in empty.
This is the most primitive combinator for accepting tokens. For
example, the satisfy
parser is implemented as:
satisfy f = token testChar Nothing where testChar x = if f x then Right x else Left (Set.singleton (Tokens (x:|[])), Set.empty, Set.empty)
tokens :: MonadParsec e s m => (Token s -> Token s -> Bool) -> [Token s] -> m [Token s] Source #
The parser tokens test
parses a list of tokens and returns it.
Supplied predicate test
is used to check equality of given and parsed
tokens.
This can be used for example to write string
:
string = tokens (==)
Note that beginning from Megaparsec 4.4.0, this is an auto-backtracking
primitive, which means that if it fails, it never consumes any input.
This is done to make its consumption model match how error messages for
this primitive are reported (which becomes an important thing as user
gets more control with primitives like withRecovery
):
>>>
parseTest (string "abc") "abd"
1:1: unexpected "abd" expecting "abc"
This means, in particular, that it's no longer necessary to use try
with tokens
-based parsers, such as string
and
string'
. This feature does not affect
performance in any way.
between :: Applicative m => m open -> m close -> m a -> m a Source #
between open close p
parses open
, followed by p
and close
.
Returns the value returned by p
.
braces = between (symbol "{") (symbol "}")
choice :: (Foldable f, Alternative m) => f (m a) -> m a Source #
choice ps
tries to apply the parsers in the list ps
in order, until
one of them succeeds. Returns the value of the succeeding parser.
count :: Applicative m => Int -> m a -> m [a] Source #
count n p
parses n
occurrences of p
. If n
is smaller or equal
to zero, the parser equals to return []
. Returns a list of n
values.
count' :: Alternative m => Int -> Int -> m a -> m [a] Source #
count' m n p
parses from m
to n
occurrences of p
. If n
is not
positive or m > n
, the parser equals to return []
. Returns a list of
parsed values.
Please note that m
may be negative, in this case effect is the same
as if it were equal to zero.
eitherP :: Alternative m => m a -> m b -> m (Either a b) Source #
Combine two alternatives.
Since: 4.4.0
endBy :: Alternative m => m a -> m sep -> m [a] Source #
endBy p sep
parses zero or more occurrences of p
, separated and
ended by sep
. Returns a list of values returned by p
.
cStatements = cStatement `endBy` semicolon
endBy1 :: Alternative m => m a -> m sep -> m [a] Source #
endBy1 p sep
parses one or more occurrences of p
, separated and
ended by sep
. Returns a list of values returned by p
.
manyTill :: Alternative m => m a -> m end -> m [a] Source #
manyTill p end
applies parser p
zero or more times until parser
end
succeeds. Returns the list of values returned by p
. This parser
can be used to scan comments:
simpleComment = string "<!--" >> manyTill anyChar (string "-->")
someTill :: Alternative m => m a -> m end -> m [a] Source #
someTill p end
works similarly to manyTill p end
, but p
should
succeed at least once.
option :: Alternative m => a -> m a -> m a Source #
option x p
tries to apply the parser p
. If p
fails without
consuming input, it returns the value x
, otherwise the value returned
by p
.
priority = option 0 (digitToInt <$> digitChar)
sepBy :: Alternative m => m a -> m sep -> m [a] Source #
sepBy p sep
parses zero or more occurrences of p
, separated by
sep
. Returns a list of values returned by p
.
commaSep p = p `sepBy` comma
sepBy1 :: Alternative m => m a -> m sep -> m [a] Source #
sepBy1 p sep
parses one or more occurrences of p
, separated by
sep
. Returns a list of values returned by p
.
sepEndBy :: Alternative m => m a -> m sep -> m [a] Source #
sepEndBy p sep
parses zero or more occurrences of p
, separated
and optionally ended by sep
. Returns a list of values returned by p
.
sepEndBy1 :: Alternative m => m a -> m sep -> m [a] Source #
sepEndBy1 p sep
parses one or more occurrences of p
, separated
and optionally ended by sep
. Returns a list of values returned by p
.
skipMany :: Alternative m => m a -> m () Source #
skipMany p
applies the parser p
zero or more times, skipping its
result.
space = skipMany spaceChar
skipSome :: Alternative m => m a -> m () Source #
skipSome p
applies the parser p
one or more times, skipping its
result.
Character parsing
crlf :: (MonadParsec e s m, Token s ~ Char) => m String Source #
Parse a carriage return character followed by a newline character. Return the sequence of characters parsed.
controlChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parse a control character (a non-printing character of the Latin-1 subset of Unicode).
spaceChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parse a Unicode space character, and the control characters: tab, newline, carriage return, form feed, and vertical tab.
upperChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parse an upper-case or title-case alphabetic Unicode character. Title case is used by a small number of letter ligatures like the single-character form of Lj.
lowerChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parse a lower-case alphabetic Unicode character.
letterChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parse an alphabetic Unicode character: lower-case, upper-case, or title-case letter, or a letter of case-less scripts/modifier letter.
alphaNumChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parse an alphabetic or numeric digit Unicode characters.
Note that the numeric digits outside the ASCII range are parsed by this
parser but not by digitChar
. Such digits may be part of identifiers but
are not used by the printer and reader to represent numbers.
printChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parse a printable Unicode character: letter, number, mark, punctuation, symbol or space.
digitChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parse an ASCII digit, i.e between “0” and “9”.
octDigitChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parse an octal digit, i.e. between “0” and “7”.
hexDigitChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parse a hexadecimal digit, i.e. between “0” and “9”, or “a” and “f”, or “A” and “F”.
markChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parse a Unicode mark character (accents and the like), which combines with preceding characters.
numberChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parse a Unicode numeric character, including digits from various scripts, Roman numerals, etc.
punctuationChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parse a Unicode punctuation character, including various kinds of connectors, brackets and quotes.
symbolChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parse a Unicode symbol characters, including mathematical and currency symbols.
separatorChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parse a Unicode space and separator characters.
asciiChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parse a character from the first 128 characters of the Unicode character set, corresponding to the ASCII character set.
latin1Char :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
Parse a character from the first 256 characters of the Unicode character set, corresponding to the ISO 8859-1 (Latin-1) character set.
charCategory :: (MonadParsec e s m, Token s ~ Char) => GeneralCategory -> m Char Source #
charCategory cat
parses character in Unicode General Category cat
,
see GeneralCategory
.
char :: (MonadParsec e s m, Token s ~ Char) => Char -> m Char Source #
char c
parses a single character c
.
semicolon = char ';'
char' :: (MonadParsec e s m, Token s ~ Char) => Char -> m Char Source #
The same as char
but case-insensitive. This parser returns the
actually parsed character preserving its case.
>>>
parseTest (char' 'e') "E"
'E'>>>
parseTest (char' 'e') "G"
1:1: unexpected 'G' expecting 'E' or 'e'
anyChar :: (MonadParsec e s m, Token s ~ Char) => m Char Source #
This parser succeeds for any character. Returns the parsed character.
oneOf :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char Source #
oneOf cs
succeeds if the current character is in the supplied
collection of characters cs
. Returns the parsed character. Note that
this parser cannot automatically generate the “expected” component of
error message, so usually you should label it manually with label
or
(<?>
).
See also: satisfy
.
digit = oneOf ['0'..'9'] <?> "digit"
oneOf' :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char Source #
The same as oneOf
, but case-insensitive. Returns the parsed character
preserving its case.
vowel = oneOf' "aeiou" <?> "vowel"
noneOf :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char Source #
As the dual of oneOf
, noneOf cs
succeeds if the current character
not in the supplied list of characters cs
. Returns the parsed
character.
noneOf' :: (Foldable f, MonadParsec e s m, Token s ~ Char) => f Char -> m Char Source #
The same as noneOf
, but case-insensitive.
consonant = noneOf' "aeiou" <?> "consonant"
satisfy :: (MonadParsec e s m, Token s ~ Char) => (Char -> Bool) -> m Char Source #
The parser satisfy f
succeeds for any character for which the
supplied function f
returns True
. Returns the character that is
actually parsed.
digitChar = satisfy isDigit <?> "digit" oneOf cs = satisfy (`elem` cs)
string :: (MonadParsec e s m, Token s ~ Char) => String -> m String Source #
string s
parses a sequence of characters given by s
. Returns the
parsed string (i.e. s
).
divOrMod = string "div" <|> string "mod"
string' :: (MonadParsec e s m, Token s ~ Char) => String -> m String Source #
The same as string
, but case-insensitive. On success returns string
cased as actually parsed input.
>>>
parseTest (string' "foobar") "foObAr"
"foObAr"
Textual source position
mkPos :: (Integral a, MonadThrow m) => a -> m Pos Source #
Construction of Pos
from an instance of Integral
. The function
throws InvalidPosException
when given non-positive argument. Note that
the function is polymorphic with respect to MonadThrow
m
, so you can
get result inside of Maybe
, for example.
Since: 5.0.0
unsafePos :: Word -> Pos Source #
Dangerous construction of Pos
. Use when you know for sure that
argument is positive.
Since: 5.0.0
data InvalidPosException Source #
The exception is thrown by mkPos
when its argument is not a positive
number.
Since: 5.0.0
The data type SourcePos
represents source positions. It contains the
name of the source file, a line number, and a column number. Source line
and column positions change intensively during parsing, so we need to
make them strict to avoid memory leaks.
SourcePos | |
|
initialPos :: String -> SourcePos Source #
Construct initial position (line 1, column 1) given name of source file.
Error messages
Data type that is used to represent “unexpected/expected” items in
ParseError
. The data type is parametrized over the token type t
.
Since: 5.0.0
Tokens (NonEmpty t) | Non-empty stream of tokens |
Label (NonEmpty Char) | Label (cannot be empty) |
EndOfInput | End of input |
Eq t => Eq (ErrorItem t) Source # | |
Data t => Data (ErrorItem t) Source # | |
Ord t => Ord (ErrorItem t) Source # | |
Read t => Read (ErrorItem t) Source # | |
Show t => Show (ErrorItem t) Source # | |
Generic (ErrorItem t) Source # | |
Arbitrary t => Arbitrary (ErrorItem t) Source # | |
NFData t => NFData (ErrorItem t) Source # | |
(Ord t, ShowToken t) => ShowErrorComponent (ErrorItem t) Source # | |
type Rep (ErrorItem t) Source # | |
class Ord e => ErrorComponent e where Source #
The type class defines how to represent information about various
exceptional situations. Data types that are used as custom data component
in ParseError
must be instances of this type class.
Since: 5.0.0
“Default error component”. This is our instance of ErrorComponent
provided out-of-box.
Since: 5.0.0
data ParseError t e Source #
ParseError
represents… parse errors. It provides the stack of source
positions, a set of expected and unexpected tokens as well as a set of
custom associated data. The data type is parametrized over the token type
t
and the custom data e
.
Note that the stack of source positions contains current position as its head, and the rest of positions allows to track full sequence of include files with topmost source file at the end of the list.
Semigroup
(and Monoid
) instance of the data type allows to merge
parse errors from different branches of parsing. When merging two
ParseError
s, the longest match is preferred; if positions are the same,
custom data sets and collections of message items are combined.
ParseError | |
|
(Eq e, Eq t) => Eq (ParseError t e) Source # | |
(Ord e, Ord t, Data e, Data t) => Data (ParseError t e) Source # | |
(Ord e, Ord t, Read e, Read t) => Read (ParseError t e) Source # | |
(Show e, Show t) => Show (ParseError t e) Source # | |
Generic (ParseError t e) Source # | |
(Ord t, Ord e) => Semigroup (ParseError t e) Source # | |
(Ord t, Ord e) => Monoid (ParseError t e) Source # | |
(Arbitrary t, Ord t, Arbitrary e, Ord e) => Arbitrary (ParseError t e) Source # | |
(Show t, Typeable * t, Ord t, ShowToken t, Show e, Typeable * e, ShowErrorComponent e) => Exception (ParseError t e) Source # | |
(NFData t, NFData e) => NFData (ParseError t e) Source # | |
type Rep (ParseError t e) Source # | |
class ShowToken a where Source #
Type class ShowToken
includes methods that allow to pretty-print
single token as well as stream of tokens. This is used for rendering of
error messages.
showTokens :: NonEmpty a -> String Source #
Pretty-print non-empty stream of tokens. This function is also used to print single tokens (represented as singleton lists).
Since: 5.0.0
class Ord a => ShowErrorComponent a where Source #
The type class defines how to print custom data component of
ParseError
.
Since: 5.0.0
showErrorComponent :: a -> String Source #
Pretty-print custom data component of ParseError
.
ShowErrorComponent Dec Source # | |
(Ord t, ShowToken t) => ShowErrorComponent (ErrorItem t) Source # | |
:: (Ord t, ShowToken t, ShowErrorComponent e) | |
=> ParseError t e | Parse error to render |
-> String | Result of rendering |
Pretty-print a ParseError
. The rendered String
always ends with a
newline.
The function is defined as:
parseErrorPretty e = sourcePosStackPretty (errorPos e) ++ ":\n" ++ parseErrorTextPretty e
Since: 5.0.0
Debugging
:: (Stream s, ShowToken (Token s), ShowErrorComponent e, Show a) | |
=> String | Debugging label |
-> ParsecT e s m a | Parser to debug |
-> ParsecT e s m a | Parser that prints debugging messages |
dbg label p
parser works exactly like p
, but when it's evaluated it
also prints information useful for debugging. The label
is only used to
refer to this parser in the debugging output. This combinator uses the
trace
function from Debug.Trace under the hood.
Typical usage is to wrap every sub-parser in misbehaving parser with
dbg
assigning meaningful labels. Then give it a shot and go through the
print-out. As of current version, this combinator prints all available
information except for hints, which are probably only interesting to
the maintainer of Megaparsec itself and may be quite verbose to output in
general. Let me know if you would like to be able to see hints in the
debugging output.
The output itself is pretty self-explanatory, although the following abbreviations should be clarified (they are derived from the low-level source code):
COK
—“consumed OK”. The parser consumed input and succeeded.CERR
—“consumed error”. The parser consumed input and failed.EOK
—“empty OK”. The parser succeeded without consuming input.EERR
—“empty error”. The parser failed without consuming input.
Finally, it's not possible to lift this function into some monad
transformers without introducing surprising behavior (e.g. unexpected
state backtracking) or adding otherwise redundant constraints (e.g.
Show
instance for state), so this helper is only available for
ParsecT
monad, not MonadParsec
in general.
Since: 5.1.0
Low-level operations
class Ord (Token s) => Stream s where Source #
An instance of Stream s
has stream type s
. Token type is determined
by the stream and can be found via Token
type function.
uncons :: s -> Maybe (Token s, s) Source #
Get next token from the stream. If the stream is empty, return
Nothing
.
updatePos :: Proxy s -> Pos -> SourcePos -> Token s -> (SourcePos, SourcePos) Source #
Update position in stream given tab width, current position, and
current token. The result is a tuple where the first element will be
used to report parse errors for current token, while the second element
is the incremented position that will be stored in the parser's state.
The stored (incremented) position is used whenever position can't
be/shouldn't be updated by consuming a token. For example, when using
failure
, we don't grab a new token (we need to fail right were we are
now), so error position will be taken from parser's state.
When you work with streams where elements do not contain information
about their position in input, the result is usually consists of the
third argument unchanged and incremented position calculated with
respect to current token. This is how default instances of Stream
work (they use defaultUpdatePos
, which may be a good starting point
for your own position-advancing function).
When you wish to deal with a stream of tokens where every token “knows” its start and end position in input (for example, you have produced the stream with Happy/Alex), then the best strategy is to use the start position as the actual element position and provide the end position of the token as the incremented one.
Since: 5.0.0
This is the Megaparsec's state, it's parametrized over stream type s
.
State | |
|
getInput :: MonadParsec e s m => m s Source #
Return the current input.
setInput :: MonadParsec e s m => s -> m () Source #
getPosition :: MonadParsec e s m => m SourcePos Source #
Return the current source position.
See also: setPosition
, pushPosition
, popPosition
, and SourcePos
.
getNextTokenPosition :: forall e s m. MonadParsec e s m => m (Maybe SourcePos) Source #
Get the position where the next token in the stream begins. If the
stream is empty, return Nothing
.
Since: 5.3.0
setPosition :: MonadParsec e s m => SourcePos -> m () Source #
setPosition pos
sets the current source position to pos
.
See also: getPosition
, pushPosition
, popPosition
, and SourcePos
.
pushPosition :: MonadParsec e s m => SourcePos -> m () Source #
Push a position into stack of positions and continue parsing working with this position. Useful for working with include files and the like.
See also: getPosition
, setPosition
, popPosition
, and SourcePos
.
Since: 5.0.0
popPosition :: MonadParsec e s m => m () Source #
Pop a position from the stack of positions unless it only contains one
element (in that case the stack of positions remains the same). This is
how to return to previous source file after pushPosition
.
See also: getPosition
, setPosition
, pushPosition
, and SourcePos
.
Since: 5.0.0
getTokensProcessed :: MonadParsec e s m => m Word Source #
Get the number of tokens processed so far.
Since: 5.2.0
setTokensProcessed :: MonadParsec e s m => Word -> m () Source #
Set the number of tokens processed so far.
Since: 5.2.0
getTabWidth :: MonadParsec e s m => m Pos Source #
Return the tab width. The default tab width is equal to
defaultTabWidth
. You can set a different tab width with the help of
setTabWidth
.
setTabWidth :: MonadParsec e s m => Pos -> m () Source #
Set tab width. If the argument of the function is not a positive
number, defaultTabWidth
will be used.
getParserState :: MonadParsec e s m => m (State s) Source #
Return the full parser state as a State
record.
setParserState :: MonadParsec e s m => State s -> m () Source #
setParserState st
sets the parser state to st
.
updateParserState :: MonadParsec e s m => (State s -> State s) -> m () Source #
updateParserState f
applies the function f
to the parser state.