Safe Haskell	Safe-Inferred
Language	GHC2021

MarkupParse

Contents

Usage
Markup
Warnings
Element
Token components
Tokens
XML specific Parsers
bytestring support
Tree support

Description

A Markup parser and printer of strict ByteStrings focused on optimising performance. Markup is a representation of data such as HTML, SVG or XML but the parsing is not always at standards.

Synopsis

newtype Markup = Markup {
- elements :: [Element]
}
data Standard
- = Html
- | Xml
markup :: Standard -> ByteString -> Warn Markup
markup_ :: Standard -> ByteString -> Markup
data RenderStyle
- = Compact
- | Indented Int
markdown :: RenderStyle -> Standard -> Markup -> Warn ByteString
markdown_ :: RenderStyle -> Standard -> Markup -> ByteString
normalize :: Markup -> Markup
normContent :: Markup -> Markup
wellFormed :: Standard -> Markup -> [MarkupWarning]
isWellFormed :: Standard -> Markup -> Bool
data MarkupWarning
- = BadEmptyElemTag
- | SelfCloserWithChildren
- | LeafWithChildren
- | TagMismatch NameTag NameTag
- | UnmatchedEndTag
- | UnclosedTag
- | EndTagInTree
- | EmptyContent
- | BadDecl
- | MarkupParser ParserWarning
type Warn a = These [MarkupWarning] a
warnError :: Warn a -> a
warnEither :: Warn a -> Either [MarkupWarning] a
warnMaybe :: Warn a -> Maybe a
type Element = Tree Token
element :: NameTag -> [Attr] -> Markup -> Markup
element_ :: NameTag -> [Attr] -> Markup
emptyElem :: NameTag -> [Attr] -> Markup
elementc :: NameTag -> [Attr] -> ByteString -> Markup
content :: ByteString -> Markup
contentRaw :: ByteString -> Markup
type NameTag = ByteString
selfClosers :: [NameTag]
doctypeHtml :: Markup
doctypeXml :: Markup
type AttrName = ByteString
type AttrValue = ByteString
data Attr = Attr {
- attrName :: !AttrName
- attrValue :: !AttrValue
}
addAttrs :: [Attr] -> Token -> Maybe Token
attrsP :: Standard -> Parser a [Attr]
nameP :: Standard -> Parser e ByteString
data OpenTagType
- = StartTag
- | EmptyElemTag
data Token
- = OpenTag !OpenTagType !NameTag ![Attr]
- | EndTag !NameTag
- | Content !ByteString
- | Comment !ByteString
- | Decl !ByteString ![Attr]
- | Doctype !ByteString
tokenize :: Standard -> ByteString -> Warn [Token]
tokenize_ :: Standard -> ByteString -> [Token]
tokenP :: Standard -> Parser e Token
detokenize :: Standard -> Token -> ByteString
gather :: Standard -> [Token] -> Warn Markup
gather_ :: Standard -> [Token] -> Markup
degather :: Standard -> Markup -> Warn [Token]
degather_ :: Standard -> Markup -> [Token]
xmlVersionInfoP :: Parser e ByteString
xmlEncodingDeclP :: Parser e ByteString
xmlStandaloneP :: Parser e ByteString
xmlVersionNumP :: Parser e ByteString
xmlEncNameP :: Parser e ByteString
xmlYesNoP :: Parser e ByteString
utf8ToStr :: ByteString -> String
strToUtf8 :: String -> ByteString
escapeChar :: Char -> ByteString
escape :: ByteString -> ByteString
data Tree a = Node {
- rootLabel :: a
- subForest :: [Tree a]
}

Usage

import MarkupParse
import Data.ByteString qualified as B

bs <- B.readFile "other/line.svg"
m = markup_ bs

markdown_ . markup_ is an isomorphic round trip from Markup to ByteString to Markup:

This is subject to the Markup being wellFormed.
The round-trip markup_ . markdown_ is not isomorphic as parsing forgets whitespace within tags, comments and declarations.
The underscores represent versions of main functions that throw an exception on warnings encountered along the way.

At a lower level, a round trip pipeline might look something like:

tokenize Html >=>

tokenize converts a ByteString to a Token list.

gather Html >=>

gather takes the tokens and gathers them into Trees of Tokens which is what Markup is.

(normalize >>> pure) >=>

normalize concatenates content, and normalizes attributes,

degather Html >=>

degather turns the markup tree back into a token list. Finally,

fmap (detokenize Html) >>> pure

detokenize turns a token back into a bytestring.

Along the way, the kleisi fishies and compose forward usage accumulates any warnings via the These monad instance, which is wrapped into a type synonym named Warn.

Markup

newtype Markup Source #

A list of Elements or Tree Tokens

>>> markup Html "<foo class=\"bar\">baz</foo>"
That (Markup {elements = [Node {rootLabel = OpenTag StartTag "foo" [Attr {attrName = "class", attrValue = "bar"}], subForest = [Node {rootLabel = Content "baz", subForest = []}]}]})

Constructors

Markup
Fields elements :: [Element]

Instances

Instances details

Monoid Markup Source #
Instance details Defined in MarkupParse Methods mempty :: Markup # mappend :: Markup -> Markup -> Markup # mconcat :: [Markup] -> Markup #
Semigroup Markup Source #
Instance details Defined in MarkupParse Methods (<>) :: Markup -> Markup -> Markup # sconcat :: NonEmpty Markup -> Markup # stimes :: Integral b => b -> Markup -> Markup #
Generic Markup Source #
Instance details Defined in MarkupParse Associated Types type Rep Markup :: Type -> Type # Methods from :: Markup -> Rep Markup x # to :: Rep Markup x -> Markup #
Show Markup Source #
Instance details Defined in MarkupParse Methods showsPrec :: Int -> Markup -> ShowS # show :: Markup -> String # showList :: [Markup] -> ShowS #
NFData Markup Source #
Instance details Defined in MarkupParse Methods rnf :: Markup -> () #
Eq Markup Source #
Instance details Defined in MarkupParse Methods (==) :: Markup -> Markup -> Bool # (/=) :: Markup -> Markup -> Bool #
Ord Markup Source #
Instance details Defined in MarkupParse Methods compare :: Markup -> Markup -> Ordering # (<) :: Markup -> Markup -> Bool # (<=) :: Markup -> Markup -> Bool # (>) :: Markup -> Markup -> Bool # (>=) :: Markup -> Markup -> Bool # max :: Markup -> Markup -> Markup # min :: Markup -> Markup -> Markup #
ToExpr Markup Source #
Instance details Defined in MarkupParse Methods toExpr :: Markup -> Expr # listToExpr :: [Markup] -> Expr #
type Rep Markup Source #
Instance details Defined in MarkupParse type Rep Markup = D1 ('MetaData "Markup" "MarkupParse" "markup-parse-0.1.1-BdYhv7KNpuBDovL7j9hJZ4" 'True) (C1 ('MetaCons "Markup" 'PrefixI 'True) (S1 ('MetaSel ('Just "elements") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 [Element])))

data Standard Source #

From a parsing pov, Html & Xml (& Svg) are close enough that they share a lot of parsing logic, so that parsing and printing just need some tweaking.

The xml parsing logic is based on the XML productions found in https://www.w3.org/TR/xml/

The html parsing was based on a reading of html-parse, but ignores the various 'x00' to 'xfffd' & eof directives that form part of the html standards.

Constructors

Html
Xml

Instances

Instances details

Generic Standard Source #
Instance details Defined in MarkupParse Associated Types type Rep Standard :: Type -> Type # Methods from :: Standard -> Rep Standard x # to :: Rep Standard x -> Standard #
Show Standard Source #
Instance details Defined in MarkupParse Methods showsPrec :: Int -> Standard -> ShowS # show :: Standard -> String # showList :: [Standard] -> ShowS #
NFData Standard Source #
Instance details Defined in MarkupParse Methods rnf :: Standard -> () #
Eq Standard Source #
Instance details Defined in MarkupParse Methods (==) :: Standard -> Standard -> Bool # (/=) :: Standard -> Standard -> Bool #
Ord Standard Source #
Instance details Defined in MarkupParse Methods compare :: Standard -> Standard -> Ordering # (<) :: Standard -> Standard -> Bool # (<=) :: Standard -> Standard -> Bool # (>) :: Standard -> Standard -> Bool # (>=) :: Standard -> Standard -> Bool # max :: Standard -> Standard -> Standard # min :: Standard -> Standard -> Standard #
ToExpr Standard Source #
Instance details Defined in MarkupParse Methods toExpr :: Standard -> Expr # listToExpr :: [Standard] -> Expr #
type Rep Standard Source #
Instance details Defined in MarkupParse type Rep Standard = D1 ('MetaData "Standard" "MarkupParse" "markup-parse-0.1.1-BdYhv7KNpuBDovL7j9hJZ4" 'False) (C1 ('MetaCons "Html" 'PrefixI 'False) (U1 :: Type -> Type) :+: C1 ('MetaCons "Xml" 'PrefixI 'False) (U1 :: Type -> Type))

markup :: Standard -> ByteString -> Warn Markup Source #

Convert bytestrings to Markup

>>> markup Html "<foo><br></foo><baz"
These [MarkupParser (ParserLeftover "<baz")] (Markup {elements = [Node {rootLabel = OpenTag StartTag "foo" [], subForest = [Node {rootLabel = OpenTag StartTag "br" [], subForest = []}]}]})

markup_ :: Standard -> ByteString -> Markup Source #

markup but errors on warnings.

data RenderStyle Source #

Indented 0 puts newlines in between the tags.

Constructors

Compact
Indented Int

Instances

Instances details

Generic RenderStyle Source #
Instance details Defined in MarkupParse Associated Types type Rep RenderStyle :: Type -> Type # Methods from :: RenderStyle -> Rep RenderStyle x # to :: Rep RenderStyle x -> RenderStyle #
Show RenderStyle Source #
Instance details Defined in MarkupParse Methods showsPrec :: Int -> RenderStyle -> ShowS # show :: RenderStyle -> String # showList :: [RenderStyle] -> ShowS #
Eq RenderStyle Source #
Instance details Defined in MarkupParse Methods (==) :: RenderStyle -> RenderStyle -> Bool # (/=) :: RenderStyle -> RenderStyle -> Bool #
type Rep RenderStyle Source #
Instance details Defined in MarkupParse type Rep RenderStyle = D1 ('MetaData "RenderStyle" "MarkupParse" "markup-parse-0.1.1-BdYhv7KNpuBDovL7j9hJZ4" 'False) (C1 ('MetaCons "Compact" 'PrefixI 'False) (U1 :: Type -> Type) :+: C1 ('MetaCons "Indented" 'PrefixI 'False) (S1 ('MetaSel ('Nothing :: Maybe Symbol) 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Int)))

markdown :: RenderStyle -> Standard -> Markup -> Warn ByteString Source #

Convert Markup to bytestrings

>>> markdown (Indented 4) Html (markup_ Html [i|<foo><br></foo>|])
That "<foo>\n    <br>\n</foo>"

markdown_ :: RenderStyle -> Standard -> Markup -> ByteString Source #

Convert Markup to ByteString and error on warnings.

>>> B.putStr $ markdown_ (Indented 4) Html (markup_ Html [i|<foo><br></foo>|])
<foo>
    <br>
</foo>

normalize :: Markup -> Markup Source #

Concatenate sequential content and normalize attributes; unwording class values and removing duplicate attributes (taking last).

>>> B.putStr $ warnError $ markdown Compact Xml $ normalize (markup_ Xml [i|<foo class="a" class="b" bar="first" bar="last"/>|])
<foo bar="last" class="a b"/>

normContent :: Markup -> Markup Source #

Normalise Content in Markup, concatenating adjacent Content, and removing mempty Content.

>>> normContent $ content "a" <> content "" <> content "b"
Markup {elements = [Node {rootLabel = Content "ab", subForest = []}]}

wellFormed :: Standard -> Markup -> [MarkupWarning] Source #

Check for well-formedness and return warnings encountered.

>>> wellFormed Html $ Markup [Node (Comment "") [], Node (EndTag "foo") [], Node (OpenTag EmptyElemTag "foo" []) [Node (Content "bar") []], Node (OpenTag EmptyElemTag "foo" []) []]
[EmptyContent,EndTagInTree,LeafWithChildren,BadEmptyElemTag]

isWellFormed :: Standard -> Markup -> Bool Source #

Are the trees in the markup well-formed?

Warnings

data MarkupWarning Source #

markup-parse generally tries to continue on parse errors, and return what has/can still be parsed, together with any warnings.

Constructors

BadEmptyElemTag	A tag ending with "/>" that is not an element of `selfClosers` (Html only).
SelfCloserWithChildren	A tag ending with "/>" that has children. Cannot happen in the parsing phase.
LeafWithChildren	Only a `StartTag` can have child tokens.
TagMismatch NameTag NameTag	A CloseTag with a different name to the currently open StartTag.
UnmatchedEndTag	An EndTag with no corresponding StartTag.
UnclosedTag	An StartTag with no corresponding EndTag.
EndTagInTree	An EndTag should never appear in `Markup`
EmptyContent	Empty Content, Comment, Decl or Doctype
BadDecl	Badly formed declaration
MarkupParser ParserWarning

Instances

Instances details

Generic MarkupWarning Source #
Instance details Defined in MarkupParse Associated Types type Rep MarkupWarning :: Type -> Type # Methods from :: MarkupWarning -> Rep MarkupWarning x # to :: Rep MarkupWarning x -> MarkupWarning #
Show MarkupWarning Source #
Instance details Defined in MarkupParse Methods showsPrec :: Int -> MarkupWarning -> ShowS # show :: MarkupWarning -> String # showList :: [MarkupWarning] -> ShowS #
NFData MarkupWarning Source #
Instance details Defined in MarkupParse Methods rnf :: MarkupWarning -> () #
Eq MarkupWarning Source #
Instance details Defined in MarkupParse Methods (==) :: MarkupWarning -> MarkupWarning -> Bool # (/=) :: MarkupWarning -> MarkupWarning -> Bool #
Ord MarkupWarning Source #
Instance details Defined in MarkupParse Methods compare :: MarkupWarning -> MarkupWarning -> Ordering # (<) :: MarkupWarning -> MarkupWarning -> Bool # (<=) :: MarkupWarning -> MarkupWarning -> Bool # (>) :: MarkupWarning -> MarkupWarning -> Bool # (>=) :: MarkupWarning -> MarkupWarning -> Bool # max :: MarkupWarning -> MarkupWarning -> MarkupWarning # min :: MarkupWarning -> MarkupWarning -> MarkupWarning #
type Rep MarkupWarning Source #
Instance details Defined in MarkupParse type Rep MarkupWarning = D1 ('MetaData "MarkupWarning" "MarkupParse" "markup-parse-0.1.1-BdYhv7KNpuBDovL7j9hJZ4" 'False) (((C1 ('MetaCons "BadEmptyElemTag" 'PrefixI 'False) (U1 :: Type -> Type) :+: C1 ('MetaCons "SelfCloserWithChildren" 'PrefixI 'False) (U1 :: Type -> Type)) :+: (C1 ('MetaCons "LeafWithChildren" 'PrefixI 'False) (U1 :: Type -> Type) :+: (C1 ('MetaCons "TagMismatch" 'PrefixI 'False) (S1 ('MetaSel ('Nothing :: Maybe Symbol) 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 NameTag) :*: S1 ('MetaSel ('Nothing :: Maybe Symbol) 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 NameTag)) :+: C1 ('MetaCons "UnmatchedEndTag" 'PrefixI 'False) (U1 :: Type -> Type)))) :+: ((C1 ('MetaCons "UnclosedTag" 'PrefixI 'False) (U1 :: Type -> Type) :+: C1 ('MetaCons "EndTagInTree" 'PrefixI 'False) (U1 :: Type -> Type)) :+: (C1 ('MetaCons "EmptyContent" 'PrefixI 'False) (U1 :: Type -> Type) :+: (C1 ('MetaCons "BadDecl" 'PrefixI 'False) (U1 :: Type -> Type) :+: C1 ('MetaCons "MarkupParser" 'PrefixI 'False) (S1 ('MetaSel ('Nothing :: Maybe Symbol) 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 ParserWarning))))))

type Warn a = These [MarkupWarning] a Source #

A type synonym for the common returning type of many functions. A common computation pipeline is to take advantage of the These Monad instance eg

markup s bs = bs & (tokenize s >=> gather s) & second (Markup s)

warnError :: Warn a -> a Source #

Convert any warnings to an error

>>> warnError $ (tokenize Html) "<foo"
*** Exception: MarkupParser (ParserLeftover "<foo")
...

warnEither :: Warn a -> Either [MarkupWarning] a Source #

Returns Left on any warnings

>>> warnEither $ (tokenize Html) "<foo><baz"
Left [MarkupParser (ParserLeftover "<baz")]

warnMaybe :: Warn a -> Maybe a Source #

Returns results, if any, ignoring warnings.

>>> warnMaybe $ (tokenize Html) "<foo><baz"
Just [OpenTag StartTag "foo" []]

Element

type Element = Tree Token Source #

Most functions return a Markup rather than an Element because it is often more ergonomic to use the free monoid (aka a list) in preference to returning a Maybe Element (say).

element :: NameTag -> [Attr] -> Markup -> Markup Source #

Create Markup from a name tag and attributes that wraps some other markup.

>>> element "div" [] (element_ "br" [])
Markup {elements = [Node {rootLabel = OpenTag StartTag "div" [], subForest = [Node {rootLabel = OpenTag StartTag "br" [], subForest = []}]}]}

element_ :: NameTag -> [Attr] -> Markup Source #

Create Markup from a name tag and attributes that doesn't wrap some other markup. The OpenTagType used is StartTag. Use emptyElem if you want to create EmptyElemTag based markup.

>>> (element_ "br" [])
Markup {elements = [Node {rootLabel = OpenTag StartTag "br" [], subForest = []}]}

emptyElem :: NameTag -> [Attr] -> Markup Source #

Create Markup from a name tag and attributes using EmptyElemTag, that doesn't wrap some other markup. No checks are made on whether this creates well-formed markup.

>>> emptyElem "br" []
Markup {elements = [Node {rootLabel = OpenTag EmptyElemTag "br" [], subForest = []}]}

elementc :: NameTag -> [Attr] -> ByteString -> Markup Source #

Create Markup from a name tag and attributes that wraps some Content. No escaping is performed.

>>> elementc "div" [] "content"
Markup {elements = [Node {rootLabel = OpenTag StartTag "div" [], subForest = [Node {rootLabel = Content "content", subForest = []}]}]}

content :: ByteString -> Markup Source #

Create Markup Content from a bytestring, escaping the usual characters.

>>> content "<content>"
Markup {elements = [Node {rootLabel = Content "&lt;content&gt;", subForest = []}]}

contentRaw :: ByteString -> Markup Source #

Create a Markup element from a bytestring, not escaping the usual characters.

>>> contentRaw "<content>"
Markup {elements = [Node {rootLabel = Content "<content>", subForest = []}]}

>>> markup_ Html $ markdown_ Compact Html $ contentRaw "<content>"
*** Exception: UnclosedTag
...

Token components

type NameTag = ByteString Source #

Name of token

selfClosers :: [NameTag] Source #

Html tags that self-close

doctypeHtml :: Markup Source #

Standard Html Doctype

>>> markdown_ Compact Html doctypeHtml
"<!DOCTYPE html>"

doctypeXml :: Markup Source #

Standard Xml Doctype

>>> markdown_ Compact Xml doctypeXml
"<?xml version=\"1.0\" encoding=\"utf-8\"?><!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n    \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">"

type AttrName = ByteString Source #

Name of an attribute.

type AttrValue = ByteString Source #

Value of an attribute. "" is equivalent to true with respect to boolean attributes.

data Attr Source #

An attribute of a tag

In parsing, boolean attributes, which are not required to have a value in HTML, will be set a value of "", which is ok. But this will then be rendered.

>>> detokenize Html <$> tokenize_ Html [i|<input checked>|]
["<input checked=\"\">"]

Constructors

Attr
Fields attrName :: !AttrName attrValue :: !AttrValue

Instances

Instances details

Generic Attr Source #
Instance details Defined in MarkupParse Associated Types type Rep Attr :: Type -> Type # Methods from :: Attr -> Rep Attr x # to :: Rep Attr x -> Attr #
Show Attr Source #
Instance details Defined in MarkupParse Methods showsPrec :: Int -> Attr -> ShowS # show :: Attr -> String # showList :: [Attr] -> ShowS #
NFData Attr Source #
Instance details Defined in MarkupParse Methods rnf :: Attr -> () #
Eq Attr Source #
Instance details Defined in MarkupParse Methods (==) :: Attr -> Attr -> Bool # (/=) :: Attr -> Attr -> Bool #
Ord Attr Source #
Instance details Defined in MarkupParse Methods compare :: Attr -> Attr -> Ordering # (<) :: Attr -> Attr -> Bool # (<=) :: Attr -> Attr -> Bool # (>) :: Attr -> Attr -> Bool # (>=) :: Attr -> Attr -> Bool # max :: Attr -> Attr -> Attr # min :: Attr -> Attr -> Attr #
ToExpr Attr Source #
Instance details Defined in MarkupParse Methods toExpr :: Attr -> Expr # listToExpr :: [Attr] -> Expr #
type Rep Attr Source #
Instance details Defined in MarkupParse type Rep Attr = D1 ('MetaData "Attr" "MarkupParse" "markup-parse-0.1.1-BdYhv7KNpuBDovL7j9hJZ4" 'False) (C1 ('MetaCons "Attr" 'PrefixI 'True) (S1 ('MetaSel ('Just "attrName") 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 AttrName) :*: S1 ('MetaSel ('Just "attrValue") 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 AttrValue)))

addAttrs :: [Attr] -> Token -> Maybe Token Source #

Append attributes to an existing Token attribute list. Returns Nothing for tokens that do not have attributes.

attrsP :: Standard -> Parser a [Attr] Source #

Parse attributions

nameP :: Standard -> Parser e ByteString Source #

Parse a tag name. Each standard is slightly different.

Tokens

data OpenTagType Source #

Whether an opening tag is a start tag or an empty element tag.

Constructors

StartTag
EmptyElemTag

Instances

Instances details

Generic OpenTagType Source #
Instance details Defined in MarkupParse Associated Types type Rep OpenTagType :: Type -> Type # Methods from :: OpenTagType -> Rep OpenTagType x # to :: Rep OpenTagType x -> OpenTagType #
Show OpenTagType Source #
Instance details Defined in MarkupParse Methods showsPrec :: Int -> OpenTagType -> ShowS # show :: OpenTagType -> String # showList :: [OpenTagType] -> ShowS #
NFData OpenTagType Source #
Instance details Defined in MarkupParse Methods rnf :: OpenTagType -> () #
Eq OpenTagType Source #
Instance details Defined in MarkupParse Methods (==) :: OpenTagType -> OpenTagType -> Bool # (/=) :: OpenTagType -> OpenTagType -> Bool #
Ord OpenTagType Source #
Instance details Defined in MarkupParse Methods compare :: OpenTagType -> OpenTagType -> Ordering # (<) :: OpenTagType -> OpenTagType -> Bool # (<=) :: OpenTagType -> OpenTagType -> Bool # (>) :: OpenTagType -> OpenTagType -> Bool # (>=) :: OpenTagType -> OpenTagType -> Bool # max :: OpenTagType -> OpenTagType -> OpenTagType # min :: OpenTagType -> OpenTagType -> OpenTagType #
ToExpr OpenTagType Source #
Instance details Defined in MarkupParse Methods toExpr :: OpenTagType -> Expr # listToExpr :: [OpenTagType] -> Expr #
type Rep OpenTagType Source #
Instance details Defined in MarkupParse type Rep OpenTagType = D1 ('MetaData "OpenTagType" "MarkupParse" "markup-parse-0.1.1-BdYhv7KNpuBDovL7j9hJZ4" 'False) (C1 ('MetaCons "StartTag" 'PrefixI 'False) (U1 :: Type -> Type) :+: C1 ('MetaCons "EmptyElemTag" 'PrefixI 'False) (U1 :: Type -> Type))

data Token Source #

A Markup token. The term is borrowed from HTML standards but is used across Html and Xml in this library.

Note that the Token type is used in two slightly different contexts:

As an intermediary representation of markup between ByteString and Markup.
As the primitives of Markup Elements

Specifically, an EndTag will occur in a list of tokens, but not as a primitive in Markup. It may turn out to be better to have two different types for these two uses and future iterations of this library may head in this direction.

>>> runParser_ (many (tokenP Html)) [i|<foo>content</foo>|]
[OpenTag StartTag "foo" [],Content "content",EndTag "foo"]

>>> runParser_ (tokenP Xml) [i|<foo/>|]
OpenTag EmptyElemTag "foo" []

>>> runParser_ (tokenP Html) "<!-- Comment -->"
Comment " Comment "

>>> runParser_ (tokenP Xml) [i|<?xml version="1.0" encoding="UTF-8"?>|]
Decl "xml" [Attr {attrName = "version", attrValue = " version=\"1.0\""},Attr {attrName = "encoding", attrValue = "UTF-8"}]

>>> runParser_ (tokenP Html) "<!DOCTYPE html>"
Doctype "DOCTYPE html"

>>> runParser_ (tokenP Xml) "<!DOCTYPE foo [ declarations ]>"
Doctype "DOCTYPE foo [ declarations ]"

>>> runParser (tokenP Html) [i|<foo a="a" b="b" c=c check>|]
OK (OpenTag StartTag "foo" [Attr {attrName = "a", attrValue = "a"},Attr {attrName = "b", attrValue = "b"},Attr {attrName = "c", attrValue = "c"},Attr {attrName = "check", attrValue = ""}]) ""

>>> runParser (tokenP Xml) [i|<foo a="a" b="b" c=c check>|]
Fail

Constructors

OpenTag !OpenTagType !NameTag ![Attr]	A tag. https://developer.mozilla.org/en-US/docs/Glossary/Tag
EndTag !NameTag	A closing tag.
Content !ByteString	The content between tags.
Comment !ByteString	Contents of a comment.
Decl !ByteString ![Attr]	Contents of a declaration
Doctype !ByteString	Contents of a doctype declaration.

Instances

Instances details

Generic Token Source #
Instance details Defined in MarkupParse Associated Types type Rep Token :: Type -> Type # Methods from :: Token -> Rep Token x # to :: Rep Token x -> Token #
Show Token Source #
Instance details Defined in MarkupParse Methods showsPrec :: Int -> Token -> ShowS # show :: Token -> String # showList :: [Token] -> ShowS #
NFData Token Source #
Instance details Defined in MarkupParse Methods rnf :: Token -> () #
Eq Token Source #
Instance details Defined in MarkupParse Methods (==) :: Token -> Token -> Bool # (/=) :: Token -> Token -> Bool #
Ord Token Source #
Instance details Defined in MarkupParse Methods compare :: Token -> Token -> Ordering # (<) :: Token -> Token -> Bool # (<=) :: Token -> Token -> Bool # (>) :: Token -> Token -> Bool # (>=) :: Token -> Token -> Bool # max :: Token -> Token -> Token # min :: Token -> Token -> Token #
ToExpr Token Source #
Instance details Defined in MarkupParse Methods toExpr :: Token -> Expr # listToExpr :: [Token] -> Expr #
type Rep Token Source #
Instance details Defined in MarkupParse type Rep Token = D1 ('MetaData "Token" "MarkupParse" "markup-parse-0.1.1-BdYhv7KNpuBDovL7j9hJZ4" 'False) ((C1 ('MetaCons "OpenTag" 'PrefixI 'False) (S1 ('MetaSel ('Nothing :: Maybe Symbol) 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 OpenTagType) :: (S1 ('MetaSel ('Nothing :: Maybe Symbol) 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 NameTag) :: S1 ('MetaSel ('Nothing :: Maybe Symbol) 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 [Attr]))) :+: (C1 ('MetaCons "EndTag" 'PrefixI 'False) (S1 ('MetaSel ('Nothing :: Maybe Symbol) 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 NameTag)) :+: C1 ('MetaCons "Content" 'PrefixI 'False) (S1 ('MetaSel ('Nothing :: Maybe Symbol) 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 ByteString)))) :+: (C1 ('MetaCons "Comment" 'PrefixI 'False) (S1 ('MetaSel ('Nothing :: Maybe Symbol) 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 ByteString)) :+: (C1 ('MetaCons "Decl" 'PrefixI 'False) (S1 ('MetaSel ('Nothing :: Maybe Symbol) 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 ByteString) :*: S1 ('MetaSel ('Nothing :: Maybe Symbol) 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 [Attr])) :+: C1 ('MetaCons "Doctype" 'PrefixI 'False) (S1 ('MetaSel ('Nothing :: Maybe Symbol) 'NoSourceUnpackedness 'SourceStrict 'DecidedStrict) (Rec0 ByteString)))))

tokenize :: Standard -> ByteString -> Warn [Token] Source #

Parse a bytestring into tokens

>>> tokenize Html [i|<foo>content</foo>|]
That [OpenTag StartTag "foo" [],Content "content",EndTag "foo"]

tokenize_ :: Standard -> ByteString -> [Token] Source #

tokenize but errors on warnings.

tokenP :: Standard -> Parser e Token Source #

A flatparse Token parser.

>>> runParser (tokenP Html) "<foo>content</foo>"
OK (OpenTag StartTag "foo" []) "content</foo>"

detokenize :: Standard -> Token -> ByteString Source #

bytestring representation of Token.

>>> detokenize Html (OpenTag StartTag "foo" [])
"<foo>"

gather :: Standard -> [Token] -> Warn Markup Source #

Gather together token trees from a token list, placing child elements in nodes and removing EndTags.

>>> gather Html =<< tokenize Html "<foo class=\"bar\">baz</foo>"
That (Markup {elements = [Node {rootLabel = OpenTag StartTag "foo" [Attr {attrName = "class", attrValue = "bar"}], subForest = [Node {rootLabel = Content "baz", subForest = []}]}]})

gather_ :: Standard -> [Token] -> Markup Source #

gather but errors on warnings.

degather :: Standard -> Markup -> Warn [Token] Source #

Convert a markup into a token list, adding end tags.

>>> degather Html =<< markup Html "<foo class=\"bar\">baz</foo>"
That [OpenTag StartTag "foo" [Attr {attrName = "class", attrValue = "bar"}],Content "baz",EndTag "foo"]

degather_ :: Standard -> Markup -> [Token] Source #

degather but errors on warning

XML specific Parsers

xmlVersionInfoP :: Parser e ByteString Source #

xml production [24]

xmlEncodingDeclP :: Parser e ByteString Source #

xml production [80]

xmlStandaloneP :: Parser e ByteString Source #

xml production [32]

xmlVersionNumP :: Parser e ByteString Source #

xml production [26]

xmlEncNameP :: Parser e ByteString Source #

xml production [81]

xmlYesNoP :: Parser e ByteString Source #

xml yes/no

bytestring support

utf8ToStr :: ByteString -> String #

Convert a ByteString to an UTF8-encoded String.

strToUtf8 :: String -> ByteString #

Convert an UTF8-encoded String to a ByteString.

escapeChar :: Char -> ByteString Source #

Escape a single character.

escape :: ByteString -> ByteString Source #

Escape the following predefined character entity references:

escapeChar '<' = "&lt;"
escapeChar '>' = "&gt;"
escapeChar '&' = "&amp;"
escapeChar '\'' = "&apos;"
escapeChar '"' = "&quot;"

No attempt is made to meet the HTML Standards

>>> escape [i|<foo class="a" bar='b'>|]
"&lt;foo class=&quot;a&quot; bar=&apos;b&apos;&gt;"

Tree support

data Tree a #

Non-empty, possibly infinite, multi-way trees; also known as rose trees.

Constructors

Node
Fields rootLabel :: a label value subForest :: [Tree a] zero or more child trees

Instances

Instances details

MonadFix Tree	Since: containers-0.5.11
Instance details Defined in Data.Tree Methods mfix :: (a -> Tree a) -> Tree a #
MonadZip Tree
Instance details Defined in Data.Tree Methods mzip :: Tree a -> Tree b -> Tree (a, b) # mzipWith :: (a -> b -> c) -> Tree a -> Tree b -> Tree c # munzip :: Tree (a, b) -> (Tree a, Tree b) #
Foldable Tree
Instance details Defined in Data.Tree Methods fold :: Monoid m => Tree m -> m # foldMap :: Monoid m => (a -> m) -> Tree a -> m # foldMap' :: Monoid m => (a -> m) -> Tree a -> m # foldr :: (a -> b -> b) -> b -> Tree a -> b # foldr' :: (a -> b -> b) -> b -> Tree a -> b # foldl :: (b -> a -> b) -> b -> Tree a -> b # foldl' :: (b -> a -> b) -> b -> Tree a -> b # foldr1 :: (a -> a -> a) -> Tree a -> a # foldl1 :: (a -> a -> a) -> Tree a -> a # toList :: Tree a -> [a] # null :: Tree a -> Bool # length :: Tree a -> Int # elem :: Eq a => a -> Tree a -> Bool # maximum :: Ord a => Tree a -> a # minimum :: Ord a => Tree a -> a # sum :: Num a => Tree a -> a # product :: Num a => Tree a -> a #
Eq1 Tree	Since: containers-0.5.9
Instance details Defined in Data.Tree Methods liftEq :: (a -> b -> Bool) -> Tree a -> Tree b -> Bool #
Ord1 Tree	Since: containers-0.5.9
Instance details Defined in Data.Tree Methods liftCompare :: (a -> b -> Ordering) -> Tree a -> Tree b -> Ordering #
Read1 Tree	Since: containers-0.5.9
Instance details Defined in Data.Tree Methods liftReadsPrec :: (Int -> ReadS a) -> ReadS [a] -> Int -> ReadS (Tree a) # liftReadList :: (Int -> ReadS a) -> ReadS [a] -> ReadS [Tree a] # liftReadPrec :: ReadPrec a -> ReadPrec [a] -> ReadPrec (Tree a) # liftReadListPrec :: ReadPrec a -> ReadPrec [a] -> ReadPrec [Tree a] #
Show1 Tree	Since: containers-0.5.9
Instance details Defined in Data.Tree Methods liftShowsPrec :: (Int -> a -> ShowS) -> ([a] -> ShowS) -> Int -> Tree a -> ShowS # liftShowList :: (Int -> a -> ShowS) -> ([a] -> ShowS) -> [Tree a] -> ShowS #
Traversable Tree
Instance details Defined in Data.Tree Methods traverse :: Applicative f => (a -> f b) -> Tree a -> f (Tree b) # sequenceA :: Applicative f => Tree (f a) -> f (Tree a) # mapM :: Monad m => (a -> m b) -> Tree a -> m (Tree b) # sequence :: Monad m => Tree (m a) -> m (Tree a) #
Applicative Tree
Instance details Defined in Data.Tree Methods pure :: a -> Tree a # (<>) :: Tree (a -> b) -> Tree a -> Tree b # liftA2 :: (a -> b -> c) -> Tree a -> Tree b -> Tree c # (>) :: Tree a -> Tree b -> Tree b # (<*) :: Tree a -> Tree b -> Tree a #
Functor Tree
Instance details Defined in Data.Tree Methods fmap :: (a -> b) -> Tree a -> Tree b # (<$) :: a -> Tree b -> Tree a #
Monad Tree
Instance details Defined in Data.Tree Methods (>>=) :: Tree a -> (a -> Tree b) -> Tree b # (>>) :: Tree a -> Tree b -> Tree b # return :: a -> Tree a #
Hashable1 Tree	Since: hashable-1.3.4.0
Instance details Defined in Data.Hashable.Class Methods liftHashWithSalt :: (Int -> a -> Int) -> Int -> Tree a -> Int #
Generic1 Tree
Instance details Defined in Data.Tree Associated Types type Rep1 Tree :: k -> Type # Methods from1 :: forall (a :: k). Tree a -> Rep1 Tree a # to1 :: forall (a :: k). Rep1 Tree a -> Tree a #
Data a => Data (Tree a)
Instance details Defined in Data.Tree Methods gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b) -> (forall g. g -> c g) -> Tree a -> c (Tree a) # gunfold :: (forall b r. Data b => c (b -> r) -> c r) -> (forall r. r -> c r) -> Constr -> c (Tree a) # toConstr :: Tree a -> Constr # dataTypeOf :: Tree a -> DataType # dataCast1 :: Typeable t => (forall d. Data d => c (t d)) -> Maybe (c (Tree a)) # dataCast2 :: Typeable t => (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c (Tree a)) # gmapT :: (forall b. Data b => b -> b) -> Tree a -> Tree a # gmapQl :: (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> Tree a -> r # gmapQr :: forall r r'. (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> Tree a -> r # gmapQ :: (forall d. Data d => d -> u) -> Tree a -> [u] # gmapQi :: Int -> (forall d. Data d => d -> u) -> Tree a -> u # gmapM :: Monad m => (forall d. Data d => d -> m d) -> Tree a -> m (Tree a) # gmapMp :: MonadPlus m => (forall d. Data d => d -> m d) -> Tree a -> m (Tree a) # gmapMo :: MonadPlus m => (forall d. Data d => d -> m d) -> Tree a -> m (Tree a) #
Generic (Tree a)
Instance details Defined in Data.Tree Associated Types type Rep (Tree a) :: Type -> Type # Methods from :: Tree a -> Rep (Tree a) x # to :: Rep (Tree a) x -> Tree a #
Read a => Read (Tree a)
Instance details Defined in Data.Tree Methods readsPrec :: Int -> ReadS (Tree a) # readList :: ReadS [Tree a] # readPrec :: ReadPrec (Tree a) # readListPrec :: ReadPrec [Tree a] #
Show a => Show (Tree a)
Instance details Defined in Data.Tree Methods showsPrec :: Int -> Tree a -> ShowS # show :: Tree a -> String # showList :: [Tree a] -> ShowS #
NFData a => NFData (Tree a)
Instance details Defined in Data.Tree Methods rnf :: Tree a -> () #
Eq a => Eq (Tree a)
Instance details Defined in Data.Tree Methods (==) :: Tree a -> Tree a -> Bool # (/=) :: Tree a -> Tree a -> Bool #
Ord a => Ord (Tree a)	Since: containers-0.6.5
Instance details Defined in Data.Tree Methods compare :: Tree a -> Tree a -> Ordering # (<) :: Tree a -> Tree a -> Bool # (<=) :: Tree a -> Tree a -> Bool # (>) :: Tree a -> Tree a -> Bool # (>=) :: Tree a -> Tree a -> Bool # max :: Tree a -> Tree a -> Tree a # min :: Tree a -> Tree a -> Tree a #
Hashable v => Hashable (Tree v)	Since: hashable-1.3.4.0
Instance details Defined in Data.Hashable.Class Methods hashWithSalt :: Int -> Tree v -> Int # hash :: Tree v -> Int #
ToExpr a => ToExpr (Tree a)
Instance details Defined in Data.TreeDiff.Class Methods toExpr :: Tree a -> Expr # listToExpr :: [Tree a] -> Expr #
type Rep1 Tree	Since: containers-0.5.8
Instance details Defined in Data.Tree type Rep1 Tree = D1 ('MetaData "Tree" "Data.Tree" "containers-0.6.5.1" 'False) (C1 ('MetaCons "Node" 'PrefixI 'True) (S1 ('MetaSel ('Just "rootLabel") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) Par1 :*: S1 ('MetaSel ('Just "subForest") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) ([] :.: Rec1 Tree)))
type Rep (Tree a)	Since: containers-0.5.8
Instance details Defined in Data.Tree type Rep (Tree a) = D1 ('MetaData "Tree" "Data.Tree" "containers-0.6.5.1" 'False) (C1 ('MetaCons "Node" 'PrefixI 'True) (S1 ('MetaSel ('Just "rootLabel") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 a) :*: S1 ('MetaSel ('Just "subForest") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 [Tree a])))