Safe Haskell | None |
---|---|
Language | Haskell2010 |
High-level XML parsers, built on top of Data.XML.Parser.Mid:
- entity references are expanded
- CDATAs are abstracted away
- comments are ignored
- whitespace between tokens is ignored
- duplicate attributes are ignored
All documentation examples assume the following setup:
:set -XOverloadedStrings import Data.Attoparsec.ByteString
Synopsis
- module Data.XML.Parser.High.AttrParser
- module Data.XML.Parser.High.NameParser
- data Prolog = Prolog {}
- data Token
- data TokenParser m a
- data ContentParser m a
- noContent :: Applicative m => ContentParser m ()
- withContent :: TokenParser m a -> ContentParser m a
- anyContent :: CharParsing m => Monad m => ContentParser m ()
- runTokenParser :: TokenParser m a -> m a
- prolog :: CharParsing m => Monad m => TokenParser m Prolog
- instruction :: CharParsing m => Monad m => TokenParser m Instruction
- textContent :: CharParsing m => Monad m => EntityDecoder -> TokenParser m Text
- textContent' :: CharParsing m => Monad m => TokenParser m Text
- tag :: CharParsing m => Monad m => EntityDecoder -> NameParser a -> (a -> AttrParser b) -> (b -> ContentParser m c) -> TokenParser m c
- tag' :: CharParsing m => Monad m => NameParser a -> AttrParser b -> ContentParser m c -> TokenParser m c
- anyTag :: CharParsing m => Monad m => TokenParser m ()
- anyToken :: CharParsing m => Monad m => EntityDecoder -> TokenParser m Token
- anyToken' :: CharParsing m => Monad m => TokenParser m Token
Documentation
XML document prolog.
TokenProlog Prolog | |
TokenInstruction Instruction | |
TokenTag QName (Map QName Text) [Token] | |
TokenTextContent Text |
data TokenParser m a Source #
A parser that consumes whole Token
s.
Instances
data ContentParser m a Source #
How to parse tag content.
Instances
Functor m => Functor (ContentParser m) Source # | |
Defined in Data.XML.Parser.High fmap :: (a -> b) -> ContentParser m a -> ContentParser m b # (<$) :: a -> ContentParser m b -> ContentParser m a # |
noContent :: Applicative m => ContentParser m () Source #
Assert that content is empty.
>>>
parseOnly (runTokenParser $ tag' anyName anyAttr noContent) "<tag key='value'>body</tag>"
Left ...>>>
parseOnly (runTokenParser $ tag' anyName anyAttr noContent) "<tag key='value'></tag>"
Right ()>>>
parseOnly (runTokenParser $ tag' anyName anyAttr noContent) "<tag key='value'/>"
Right ()
withContent :: TokenParser m a -> ContentParser m a Source #
Assert that content is not empty, and parse it using given token parser.
>>>
parseOnly (runTokenParser $ tag' anyName anyAttr (withContent textContent')) "<tag key='value'>body</tag>"
Right "body">>>
parseOnly (runTokenParser $ tag' anyName anyAttr (withContent textContent')) "<tag key='value'></tag>"
Left ...>>>
parseOnly (runTokenParser $ tag' anyName anyAttr (withContent textContent')) "<tag key='value'/>"
Left ...>>>
parseOnly (runTokenParser $ tag' anyName anyAttr (withContent $ pure ())) "<tag key='value'/>"
Left ...
anyContent :: CharParsing m => Monad m => ContentParser m () Source #
Accept any content, including empty content.
>>>
parseOnly (runTokenParser $ tag' anyName anyAttr anyContent) "<tag key='value'>body</tag>"
Right ()>>>
parseOnly (runTokenParser $ tag' anyName anyAttr anyContent) "<tag key='value'></tag>"
Right ()>>>
parseOnly (runTokenParser $ tag' anyName anyAttr anyContent) "<tag key='value'/>"
Right ()
runTokenParser :: TokenParser m a -> m a Source #
prolog :: CharParsing m => Monad m => TokenParser m Prolog Source #
https://www.w3.org/TR/REC-xml/#NT-prolog
>>>
parseOnly (runTokenParser prolog) "<?xml version='1.0'?><!DOCTYPE greeting>"
Right (Prolog {prologXmlDeclaration = Just (XMLDeclaration "1.0" ...), prologInstructions = [], prologDoctype = Just (Doctype "greeting" ...)})>>>
parseOnly (runTokenParser prolog) "<?xml version='1.0'?> <!-- comments and whitespace are ignored --><!DOCTYPE greeting>"
Right (Prolog {prologXmlDeclaration = Just (XMLDeclaration "1.0" ...), prologInstructions = [], prologDoctype = Just (Doctype "greeting" ...)})
instruction :: CharParsing m => Monad m => TokenParser m Instruction Source #
Parse a processing instruction.
https://www.w3.org/TR/REC-xml/#dt-pi
>>>
parseOnly (runTokenParser instruction) "<?php echo 'Hello World!'; ?>"
Right (Instruction "php" "echo 'Hello World!'; ")>>>
parseOnly (runTokenParser instruction) "<!-- comments and whitespace are ignored --> <?php echo 'Hello World!'; ?>"
Right (Instruction "php" "echo 'Hello World!'; ")
textContent :: CharParsing m => Monad m => EntityDecoder -> TokenParser m Text Source #
Parse textual content of a tag, including CDATA.
textContent' :: CharParsing m => Monad m => TokenParser m Text Source #
Same as textContent (decodePredefinedEntities <> decodeHtmlEntities)
, provided for convenience.
>>>
parseOnly (runTokenParser $ tag' anyName anyAttr (withContent textContent')) "<tag>body<!-- Ignored comment --><![CDATA[<innertag>innerbody</innertag>]]></tag>"
Right "body<innertag>innerbody</innertag>"
:: CharParsing m | |
=> Monad m | |
=> EntityDecoder | How to expand entity references |
-> NameParser a | How to parse tag name |
-> (a -> AttrParser b) | How to parse tag attributes |
-> (b -> ContentParser m c) | How to parse tag content |
-> TokenParser m c |
Generic tag parser.
:: CharParsing m | |
=> Monad m | |
=> NameParser a | How to parse tag name |
-> AttrParser b | How to parse tag attributes |
-> ContentParser m c | How to parse tag content |
-> TokenParser m c |
Simplified version of tag
:
- no state forwarding between name, attributes and content parsers
- uses
decodePredefinedEntities <> decodeHtmlEntities
to expand entity references
anyTag :: CharParsing m => Monad m => TokenParser m () Source #
Parse a tag with any name, any attributes and any content.
>>>
parseOnly (runTokenParser anyTag) "<tag key='value'>body</tag>"
Right ()>>>
parseOnly (runTokenParser anyTag) "<tag key='value'/>"
Right ()>>>
parseOnly (runTokenParser anyTag) "<!-- ignored comment --><tag key='value'/>"
Right ()>>>
parseOnly (runTokenParser anyTag) "<tag key='value'>body<!-- ignored comment --></tag>"
Right ()
anyToken :: CharParsing m => Monad m => EntityDecoder -> TokenParser m Token Source #
Parse any Token
.
anyToken' :: CharParsing m => Monad m => TokenParser m Token Source #
Same as anyToken (decodePredefinedEntities <> decodeHtmlEntities)
, provided for convenience.