{-# LANGUAGE DeriveFunctor              #-}
{-# LANGUAGE FlexibleInstances          #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE RankNTypes                 #-}
{-# LANGUAGE StandaloneDeriving         #-}
{-# LANGUAGE TupleSections              #-}
{-# LANGUAGE TypeFamilies               #-}
-- | High-level XML parsers, built on top of "Data.XML.Parser.Mid":
--
-- - entity references are expanded
-- - CDATAs are abstracted away
-- - comments are ignored
-- - whitespace between tokens is ignored
-- - duplicate attributes are ignored
--
-- All documentation examples assume the following setup:
--
-- > :set -XOverloadedStrings
-- > import Data.Attoparsec.ByteString
module Data.XML.Parser.High
  ( module Data.XML.Parser.High.AttrParser
  , module Data.XML.Parser.High.NameParser
  , Prolog(..)
  , Token(..)
  , TokenParser()
  , ContentParser()
  , noContent
  , withContent
  , anyContent
  , runTokenParser
  , prolog
  , instruction
  , textContent
  , textContent'
  , tag
  , tag'
  , anyTag
  , anyToken
  , anyToken'
  ) where

import           Control.Applicative
import           Control.Arrow
import           Control.Monad.Compat
import           Control.Monad.Fail.Compat
import           Data.Function
import           Data.Map                        (Map)
import qualified Data.Map                        as Map
import           Data.Maybe
import           Data.String
import           Data.Text                       (Text)
import qualified Data.Text                       as Text
import           Data.XML.Parser.High.AttrParser
import           Data.XML.Parser.High.NameParser
import           Data.XML.Parser.Low
import qualified Data.XML.Parser.Mid             as Mid
import           Data.XML.Parser.Mid.Attribute
import           Prelude                         ()
import           Prelude.Compat
import           Text.Parser.Char
import           Text.Parser.Combinators
import           Text.ParserCombinators.ReadP    (readP_to_S)


-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import Data.Attoparsec.ByteString

-- | XML document prolog.
data Prolog = Prolog
  { Prolog -> Maybe XMLDeclaration
prologXmlDeclaration :: Maybe Mid.XMLDeclaration
  , Prolog -> [Instruction]
prologInstructions   :: [Mid.Instruction]
  , Prolog -> Maybe Doctype
prologDoctype        :: Maybe Mid.Doctype
  } deriving (Prolog -> Prolog -> Bool
(Prolog -> Prolog -> Bool)
-> (Prolog -> Prolog -> Bool) -> Eq Prolog
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Prolog -> Prolog -> Bool
$c/= :: Prolog -> Prolog -> Bool
== :: Prolog -> Prolog -> Bool
$c== :: Prolog -> Prolog -> Bool
Eq, Eq Prolog
Eq Prolog
-> (Prolog -> Prolog -> Ordering)
-> (Prolog -> Prolog -> Bool)
-> (Prolog -> Prolog -> Bool)
-> (Prolog -> Prolog -> Bool)
-> (Prolog -> Prolog -> Bool)
-> (Prolog -> Prolog -> Prolog)
-> (Prolog -> Prolog -> Prolog)
-> Ord Prolog
Prolog -> Prolog -> Bool
Prolog -> Prolog -> Ordering
Prolog -> Prolog -> Prolog
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Prolog -> Prolog -> Prolog
$cmin :: Prolog -> Prolog -> Prolog
max :: Prolog -> Prolog -> Prolog
$cmax :: Prolog -> Prolog -> Prolog
>= :: Prolog -> Prolog -> Bool
$c>= :: Prolog -> Prolog -> Bool
> :: Prolog -> Prolog -> Bool
$c> :: Prolog -> Prolog -> Bool
<= :: Prolog -> Prolog -> Bool
$c<= :: Prolog -> Prolog -> Bool
< :: Prolog -> Prolog -> Bool
$c< :: Prolog -> Prolog -> Bool
compare :: Prolog -> Prolog -> Ordering
$ccompare :: Prolog -> Prolog -> Ordering
$cp1Ord :: Eq Prolog
Ord, ReadPrec [Prolog]
ReadPrec Prolog
Int -> ReadS Prolog
ReadS [Prolog]
(Int -> ReadS Prolog)
-> ReadS [Prolog]
-> ReadPrec Prolog
-> ReadPrec [Prolog]
-> Read Prolog
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Prolog]
$creadListPrec :: ReadPrec [Prolog]
readPrec :: ReadPrec Prolog
$creadPrec :: ReadPrec Prolog
readList :: ReadS [Prolog]
$creadList :: ReadS [Prolog]
readsPrec :: Int -> ReadS Prolog
$creadsPrec :: Int -> ReadS Prolog
Read, Int -> Prolog -> ShowS
[Prolog] -> ShowS
Prolog -> String
(Int -> Prolog -> ShowS)
-> (Prolog -> String) -> ([Prolog] -> ShowS) -> Show Prolog
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Prolog] -> ShowS
$cshowList :: [Prolog] -> ShowS
show :: Prolog -> String
$cshow :: Prolog -> String
showsPrec :: Int -> Prolog -> ShowS
$cshowsPrec :: Int -> Prolog -> ShowS
Show)

data Token
  = TokenProlog Prolog
  | TokenInstruction Mid.Instruction
  | TokenTag QName (Map QName Text) [Token]
  | TokenTextContent Text
  deriving (Token -> Token -> Bool
(Token -> Token -> Bool) -> (Token -> Token -> Bool) -> Eq Token
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Token -> Token -> Bool
$c/= :: Token -> Token -> Bool
== :: Token -> Token -> Bool
$c== :: Token -> Token -> Bool
Eq, Eq Token
Eq Token
-> (Token -> Token -> Ordering)
-> (Token -> Token -> Bool)
-> (Token -> Token -> Bool)
-> (Token -> Token -> Bool)
-> (Token -> Token -> Bool)
-> (Token -> Token -> Token)
-> (Token -> Token -> Token)
-> Ord Token
Token -> Token -> Bool
Token -> Token -> Ordering
Token -> Token -> Token
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Token -> Token -> Token
$cmin :: Token -> Token -> Token
max :: Token -> Token -> Token
$cmax :: Token -> Token -> Token
>= :: Token -> Token -> Bool
$c>= :: Token -> Token -> Bool
> :: Token -> Token -> Bool
$c> :: Token -> Token -> Bool
<= :: Token -> Token -> Bool
$c<= :: Token -> Token -> Bool
< :: Token -> Token -> Bool
$c< :: Token -> Token -> Bool
compare :: Token -> Token -> Ordering
$ccompare :: Token -> Token -> Ordering
$cp1Ord :: Eq Token
Ord, ReadPrec [Token]
ReadPrec Token
Int -> ReadS Token
ReadS [Token]
(Int -> ReadS Token)
-> ReadS [Token]
-> ReadPrec Token
-> ReadPrec [Token]
-> Read Token
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Token]
$creadListPrec :: ReadPrec [Token]
readPrec :: ReadPrec Token
$creadPrec :: ReadPrec Token
readList :: ReadS [Token]
$creadList :: ReadS [Token]
readsPrec :: Int -> ReadS Token
$creadsPrec :: Int -> ReadS Token
Read, Int -> Token -> ShowS
[Token] -> ShowS
Token -> String
(Int -> Token -> ShowS)
-> (Token -> String) -> ([Token] -> ShowS) -> Show Token
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Token] -> ShowS
$cshowList :: [Token] -> ShowS
show :: Token -> String
$cshow :: Token -> String
showsPrec :: Int -> Token -> ShowS
$cshowsPrec :: Int -> Token -> ShowS
Show)


-- | A parser that consumes whole 'Token's.
newtype TokenParser m a = TokenParser { TokenParser m a -> m a
runTokenParser :: m a }

deriving instance Functor m => Functor (TokenParser m)
deriving instance Applicative m => Applicative (TokenParser m)
deriving instance Alternative m => Alternative (TokenParser m)
deriving instance Monad m => Monad (TokenParser m)

instance (Parsing m, Monad m) => MonadFail (TokenParser m) where
  fail :: String -> TokenParser m a
fail = m a -> TokenParser m a
forall (m :: * -> *) a. m a -> TokenParser m a
TokenParser (m a -> TokenParser m a)
-> (String -> m a) -> String -> TokenParser m a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> m a
forall (m :: * -> *) a. Parsing m => String -> m a
unexpected

-- | How to parse tag content.
data ContentParser m a
  = NoContent (m a)
  | AnyContent ([Token] -> m a)
  | WithContent (TokenParser m a)

deriving instance Functor m => Functor (ContentParser m)

-- | Assert that content is not empty, and parse it using given token parser.
--
-- >>> parseOnly (runTokenParser $ tag' anyName anyAttr (withContent textContent')) "<tag key='value'>body</tag>"
-- Right "body"
-- >>> parseOnly (runTokenParser $ tag' anyName anyAttr (withContent textContent')) "<tag key='value'></tag>"
-- Left ...
-- >>> parseOnly (runTokenParser $ tag' anyName anyAttr (withContent textContent')) "<tag key='value'/>"
-- Left ...
-- >>> parseOnly (runTokenParser $ tag' anyName anyAttr (withContent $ pure ())) "<tag key='value'/>"
-- Left ...
withContent :: TokenParser m a -> ContentParser m a
withContent :: TokenParser m a -> ContentParser m a
withContent = TokenParser m a -> ContentParser m a
forall (m :: * -> *) a. TokenParser m a -> ContentParser m a
WithContent

-- | Assert that content is empty.
--
-- >>> parseOnly (runTokenParser $ tag' anyName anyAttr noContent) "<tag key='value'>body</tag>"
-- Left ...
-- >>> parseOnly (runTokenParser $ tag' anyName anyAttr noContent) "<tag key='value'></tag>"
-- Right ()
-- >>> parseOnly (runTokenParser $ tag' anyName anyAttr noContent) "<tag key='value'/>"
-- Right ()
noContent :: Applicative m => ContentParser m ()
noContent :: ContentParser m ()
noContent = m () -> ContentParser m ()
forall (m :: * -> *) a. m a -> ContentParser m a
NoContent (m () -> ContentParser m ()) -> m () -> ContentParser m ()
forall a b. (a -> b) -> a -> b
$ () -> m ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()

-- | Accept any content, including empty content.
--
-- >>> parseOnly (runTokenParser $ tag' anyName anyAttr anyContent) "<tag key='value'>body</tag>"
-- Right ()
-- >>> parseOnly (runTokenParser $ tag' anyName anyAttr anyContent) "<tag key='value'></tag>"
-- Right ()
-- >>> parseOnly (runTokenParser $ tag' anyName anyAttr anyContent) "<tag key='value'/>"
-- Right ()
anyContent :: CharParsing m => Monad m => ContentParser m ()
anyContent :: ContentParser m ()
anyContent = ([Token] -> m ()) -> ContentParser m ()
forall (m :: * -> *) a. ([Token] -> m a) -> ContentParser m a
AnyContent (([Token] -> m ()) -> ContentParser m ())
-> ([Token] -> m ()) -> ContentParser m ()
forall a b. (a -> b) -> a -> b
$ m () -> [Token] -> m ()
forall a b. a -> b -> a
const (m () -> [Token] -> m ()) -> m () -> [Token] -> m ()
forall a b. (a -> b) -> a -> b
$ () -> m ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()

-- | Parse a processing instruction.
--
-- <https://www.w3.org/TR/REC-xml/#dt-pi>
--
-- >>> parseOnly (runTokenParser instruction) "<?php echo 'Hello World!'; ?>"
-- Right (Instruction "php" "echo 'Hello World!'; ")
-- >>> parseOnly (runTokenParser instruction) "<!-- comments and whitespace are ignored -->  <?php echo 'Hello World!'; ?>"
-- Right (Instruction "php" "echo 'Hello World!'; ")
instruction :: CharParsing m => Monad m => TokenParser m Mid.Instruction
instruction :: TokenParser m Instruction
instruction = m Instruction -> TokenParser m Instruction
forall (m :: * -> *) a. m a -> TokenParser m a
TokenParser (m Instruction -> TokenParser m Instruction)
-> m Instruction -> TokenParser m Instruction
forall a b. (a -> b) -> a -> b
$ do
  m ()
forall (m :: * -> *). (CharParsing m, Monad m) => m ()
skipCommentsWhitespace
  TokenParser m Instruction -> m Instruction
forall (m :: * -> *) a. TokenParser m a -> m a
Mid.runTokenParser TokenParser m Instruction
forall (m :: * -> *).
(CharParsing m, Monad m) =>
TokenParser m Instruction
Mid.tokenInstruction

-- | <https://www.w3.org/TR/REC-xml/#NT-prolog>
--
-- >>> parseOnly (runTokenParser prolog) "<?xml version='1.0'?><!DOCTYPE greeting>"
-- Right (Prolog {prologXmlDeclaration = Just (XMLDeclaration "1.0" ...), prologInstructions = [], prologDoctype = Just (Doctype "greeting" ...)})
-- >>> parseOnly (runTokenParser prolog) "<?xml version='1.0'?>  <!-- comments and whitespace are ignored --><!DOCTYPE greeting>"
-- Right (Prolog {prologXmlDeclaration = Just (XMLDeclaration "1.0" ...), prologInstructions = [], prologDoctype = Just (Doctype "greeting" ...)})
prolog :: CharParsing m => Monad m => TokenParser m Prolog
prolog :: TokenParser m Prolog
prolog = m Prolog -> TokenParser m Prolog
forall (m :: * -> *) a. m a -> TokenParser m a
TokenParser (m Prolog -> TokenParser m Prolog)
-> m Prolog -> TokenParser m Prolog
forall a b. (a -> b) -> a -> b
$ do
  Maybe XMLDeclaration
xmlDeclaration <- m XMLDeclaration -> m (Maybe XMLDeclaration)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (m XMLDeclaration -> m (Maybe XMLDeclaration))
-> m XMLDeclaration -> m (Maybe XMLDeclaration)
forall a b. (a -> b) -> a -> b
$ TokenParser m XMLDeclaration -> m XMLDeclaration
forall (m :: * -> *) a. TokenParser m a -> m a
Mid.runTokenParser TokenParser m XMLDeclaration
forall (m :: * -> *).
(CharParsing m, Monad m) =>
TokenParser m XMLDeclaration
Mid.tokenXmlDeclaration
  m ()
forall (m :: * -> *). (CharParsing m, Monad m) => m ()
skipCommentsWhitespace
  [Instruction]
instructions <- TokenParser m [Instruction] -> m [Instruction]
forall (m :: * -> *) a. TokenParser m a -> m a
runTokenParser (TokenParser m [Instruction] -> m [Instruction])
-> TokenParser m [Instruction] -> m [Instruction]
forall a b. (a -> b) -> a -> b
$ TokenParser m Instruction -> TokenParser m [Instruction]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many TokenParser m Instruction
forall (m :: * -> *).
(CharParsing m, Monad m) =>
TokenParser m Instruction
instruction
  Maybe Doctype
doctype <- m Doctype -> m (Maybe Doctype)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (m Doctype -> m (Maybe Doctype)) -> m Doctype -> m (Maybe Doctype)
forall a b. (a -> b) -> a -> b
$ do
    m ()
forall (m :: * -> *). (CharParsing m, Monad m) => m ()
skipCommentsWhitespace
    TokenParser m Doctype -> m Doctype
forall (m :: * -> *) a. TokenParser m a -> m a
Mid.runTokenParser TokenParser m Doctype
forall (m :: * -> *).
(CharParsing m, Monad m) =>
TokenParser m Doctype
Mid.tokenDoctype

  Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Maybe XMLDeclaration -> Bool
forall a. Maybe a -> Bool
isNothing Maybe XMLDeclaration
xmlDeclaration Bool -> Bool -> Bool
&& [Instruction] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Instruction]
instructions Bool -> Bool -> Bool
&& Maybe Doctype -> Bool
forall a. Maybe a -> Bool
isNothing Maybe Doctype
doctype)
    (m () -> m ()) -> m () -> m ()
forall a b. (a -> b) -> a -> b
$ String -> m ()
forall (m :: * -> *) a. Parsing m => String -> m a
unexpected String
"Expected XML prolog"

  Prolog -> m Prolog
forall (m :: * -> *) a. Monad m => a -> m a
return (Prolog -> m Prolog) -> Prolog -> m Prolog
forall a b. (a -> b) -> a -> b
$ Maybe XMLDeclaration -> [Instruction] -> Maybe Doctype -> Prolog
Prolog Maybe XMLDeclaration
xmlDeclaration [Instruction]
instructions Maybe Doctype
doctype


-- | Parse textual content of a tag, including CDATA.
textContent :: CharParsing m => Monad m => EntityDecoder -> TokenParser m Text
textContent :: EntityDecoder -> TokenParser m Text
textContent EntityDecoder
entityDecoder = m Text -> TokenParser m Text
forall (m :: * -> *) a. m a -> TokenParser m a
TokenParser (m Text -> TokenParser m Text) -> m Text -> TokenParser m Text
forall a b. (a -> b) -> a -> b
$ [Text] -> Text
forall a. Monoid a => [a] -> a
mconcat ([Text] -> Text) -> m [Text] -> m Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> do
  m ()
forall (m :: * -> *). (CharParsing m, Monad m) => m ()
skipComments
  (m Text
textualData m Text -> m Text -> m Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> TokenParser m Text -> m Text
forall (m :: * -> *) a. TokenParser m a -> m a
Mid.runTokenParser TokenParser m Text
forall (m :: * -> *).
(CharParsing m, Monad m) =>
TokenParser m Text
Mid.tokenCdata) m Text -> m Text -> m [Text]
forall (m :: * -> *) a sep. Alternative m => m a -> m sep -> m [a]
`sepBy1` TokenParser m Text -> m Text
forall (m :: * -> *) a. TokenParser m a -> m a
Mid.runTokenParser TokenParser m Text
forall (m :: * -> *).
(CharParsing m, Monad m) =>
TokenParser m Text
Mid.tokenComment
  where textualData :: m Text
textualData = EntityDecoder -> [Content] -> m Text
forall (m :: * -> *).
(Alternative m, Monad m) =>
EntityDecoder -> [Content] -> m Text
expandContents EntityDecoder
entityDecoder ([Content] -> m Text) -> m [Content] -> m Text
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< TokenParser m [Content] -> m [Content]
forall (m :: * -> *) a. TokenParser m a -> m a
Mid.runTokenParser TokenParser m [Content]
forall (m :: * -> *).
(CharParsing m, Monad m) =>
TokenParser m [Content]
Mid.tokenData

-- | Same as @textContent (decodePredefinedEntities <> decodeHtmlEntities)@, provided for convenience.
--
-- >>> parseOnly (runTokenParser $ tag' anyName anyAttr (withContent textContent')) "<tag>body<!-- Ignored comment --><![CDATA[<innertag>innerbody</innertag>]]></tag>"
-- Right "body<innertag>innerbody</innertag>"
textContent' :: CharParsing m => Monad m => TokenParser m Text
textContent' :: TokenParser m Text
textContent' = EntityDecoder -> TokenParser m Text
forall (m :: * -> *).
(CharParsing m, Monad m) =>
EntityDecoder -> TokenParser m Text
textContent EntityDecoder
decodeStandardEntities


normalizeAttributes :: EntityDecoder -> [Attribute] -> Map QName Text
normalizeAttributes :: EntityDecoder -> [Attribute] -> Map QName Text
normalizeAttributes EntityDecoder
entityDecoder [Attribute]
attributes = [(QName, Text)] -> Map QName Text
forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList ([(QName, Text)] -> Map QName Text)
-> [(QName, Text)] -> Map QName Text
forall a b. (a -> b) -> a -> b
$ do
  Attribute QName
name [Content]
contents <- [Attribute]
attributes
  Text
value <- Maybe Text -> [Text]
forall a. Maybe a -> [a]
maybeToList (Maybe Text -> [Text]) -> Maybe Text -> [Text]
forall a b. (a -> b) -> a -> b
$ EntityDecoder -> [Content] -> Maybe Text
forall (m :: * -> *).
(Alternative m, Monad m) =>
EntityDecoder -> [Content] -> m Text
expandContents EntityDecoder
entityDecoder [Content]
contents
  (QName, Text) -> [(QName, Text)]
forall (m :: * -> *) a. Monad m => a -> m a
return (QName
name, Text
value)

-- | Generic tag parser.
tag :: CharParsing m => Monad m
    => EntityDecoder             -- ^ How to expand entity references
    -> NameParser a              -- ^ How to parse tag name
    -> (a -> AttrParser b)       -- ^ How to parse tag attributes
    -> (b -> ContentParser m c)  -- ^ How to parse tag content
    -> TokenParser m c
tag :: EntityDecoder
-> NameParser a
-> (a -> AttrParser b)
-> (b -> ContentParser m c)
-> TokenParser m c
tag EntityDecoder
entityDecoder NameParser a
parseName a -> AttrParser b
parseAttributes b -> ContentParser m c
parseContent = TokenParser m c
parseStartToEnd TokenParser m c -> TokenParser m c -> TokenParser m c
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> TokenParser m c
parseEmptyElement where
  parseStartToEnd :: TokenParser m c
parseStartToEnd = m c -> TokenParser m c
forall (m :: * -> *) a. m a -> TokenParser m a
TokenParser (m c -> TokenParser m c) -> m c -> TokenParser m c
forall a b. (a -> b) -> a -> b
$ do
    m ()
forall (m :: * -> *). (CharParsing m, Monad m) => m ()
skipCommentsWhitespace
    Mid.StartTag QName
name [Attribute]
attributes <- TokenParser m StartTag -> m StartTag
forall (m :: * -> *) a. TokenParser m a -> m a
Mid.runTokenParser TokenParser m StartTag
forall (m :: * -> *).
(CharParsing m, Monad m) =>
TokenParser m StartTag
Mid.tokenStartTag
    a
a <- QName -> m a
processName QName
name
    b
b <- a -> [Attribute] -> m b
processAttributes a
a [Attribute]
attributes
    c
c <- case b -> ContentParser m c
parseContent b
b of
      NoContent m c
f        -> m c
f
      AnyContent [Token] -> m c
f       -> [Token] -> m c
f ([Token] -> m c) -> m [Token] -> m c
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< TokenParser m [Token] -> m [Token]
forall (m :: * -> *) a. TokenParser m a -> m a
runTokenParser (TokenParser m Token -> TokenParser m [Token]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (TokenParser m Token -> TokenParser m [Token])
-> TokenParser m Token -> TokenParser m [Token]
forall a b. (a -> b) -> a -> b
$ EntityDecoder -> TokenParser m Token
forall (m :: * -> *).
(CharParsing m, Monad m) =>
EntityDecoder -> TokenParser m Token
anyToken EntityDecoder
entityDecoder)
      WithContent TokenParser m c
parser -> TokenParser m c -> m c
forall (m :: * -> *) a. TokenParser m a -> m a
runTokenParser TokenParser m c
parser
    m ()
forall (m :: * -> *). (CharParsing m, Monad m) => m ()
skipCommentsWhitespace
    TokenParser m () -> m ()
forall (m :: * -> *) a. TokenParser m a -> m a
Mid.runTokenParser (TokenParser m () -> m ()) -> TokenParser m () -> m ()
forall a b. (a -> b) -> a -> b
$ do
      QName
name' <- TokenParser m QName
forall (m :: * -> *).
(CharParsing m, Monad m) =>
TokenParser m QName
Mid.tokenEndTag
      Bool -> TokenParser m () -> TokenParser m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (QName
name QName -> QName -> Bool
forall a. Eq a => a -> a -> Bool
/= QName
name') (TokenParser m () -> TokenParser m ())
-> TokenParser m () -> TokenParser m ()
forall a b. (a -> b) -> a -> b
$ String -> TokenParser m ()
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"Invalid end tag name"
    c -> m c
forall (m :: * -> *) a. Monad m => a -> m a
return c
c
  parseEmptyElement :: TokenParser m c
parseEmptyElement = m c -> TokenParser m c
forall (m :: * -> *) a. m a -> TokenParser m a
TokenParser (m c -> TokenParser m c) -> m c -> TokenParser m c
forall a b. (a -> b) -> a -> b
$ do
    m ()
forall (m :: * -> *). (CharParsing m, Monad m) => m ()
skipCommentsWhitespace
    Mid.EmptyElementTag QName
name [Attribute]
attributes <- TokenParser m EmptyElementTag -> m EmptyElementTag
forall (m :: * -> *) a. TokenParser m a -> m a
Mid.runTokenParser TokenParser m EmptyElementTag
forall (m :: * -> *).
(CharParsing m, Monad m) =>
TokenParser m EmptyElementTag
Mid.tokenEmptyElementTag
    a
a <- QName -> m a
processName QName
name
    b
b <- a -> [Attribute] -> m b
processAttributes a
a [Attribute]
attributes
    case b -> ContentParser m c
parseContent b
b of
      NoContent m c
f        -> m c
f
      AnyContent [Token] -> m c
f       -> [Token] -> m c
f [Token]
forall a. Monoid a => a
mempty
      WithContent TokenParser m c
parser -> String -> m c
forall (m :: * -> *) a. Parsing m => String -> m a
unexpected String
"Expected non-empty tag"
  processName :: QName -> m a
processName QName
name = NameParser a -> QName -> Either String a
forall a. NameParser a -> QName -> Either String a
runNameParser NameParser a
parseName QName
name
    Either String a -> (Either String a -> m a) -> m a
forall a b. a -> (a -> b) -> b
& (String -> m a) -> (a -> m a) -> Either String a -> m a
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> m a
forall (m :: * -> *) a. Parsing m => String -> m a
unexpected a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return
  processAttributes :: a -> [Attribute] -> m b
processAttributes a
state [Attribute]
attributes = AttrParser b -> Map QName Text -> Either String b
forall a. AttrParser a -> Map QName Text -> Either String a
runAttrParser (a -> AttrParser b
parseAttributes a
state) (EntityDecoder -> [Attribute] -> Map QName Text
normalizeAttributes EntityDecoder
entityDecoder [Attribute]
attributes)
    Either String b -> (Either String b -> m b) -> m b
forall a b. a -> (a -> b) -> b
& (String -> m b) -> (b -> m b) -> Either String b -> m b
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> m b
forall (m :: * -> *) a. Parsing m => String -> m a
unexpected b -> m b
forall (m :: * -> *) a. Monad m => a -> m a
return

-- | Simplified version of 'tag':
--
-- - no state forwarding between name, attributes and content parsers
-- - uses @decodePredefinedEntities <> decodeHtmlEntities@ to expand entity references
tag' :: CharParsing m => Monad m
     => NameParser a       -- ^ How to parse tag name
     -> AttrParser b       -- ^ How to parse tag attributes
     -> ContentParser m c  -- ^ How to parse tag content
     -> TokenParser m c
tag' :: NameParser a
-> AttrParser b -> ContentParser m c -> TokenParser m c
tag' NameParser a
parseName AttrParser b
parseAttributes ContentParser m c
parseBody = EntityDecoder
-> NameParser a
-> (a -> AttrParser b)
-> (b -> ContentParser m c)
-> TokenParser m c
forall (m :: * -> *) a b c.
(CharParsing m, Monad m) =>
EntityDecoder
-> NameParser a
-> (a -> AttrParser b)
-> (b -> ContentParser m c)
-> TokenParser m c
tag EntityDecoder
decodeStandardEntities NameParser a
parseName (AttrParser b -> a -> AttrParser b
forall a b. a -> b -> a
const AttrParser b
parseAttributes) (ContentParser m c -> b -> ContentParser m c
forall a b. a -> b -> a
const ContentParser m c
parseBody)

-- | Parse a tag with any name, any attributes and any content.
--
-- >>> parseOnly (runTokenParser anyTag) "<tag key='value'>body</tag>"
-- Right ()
-- >>> parseOnly (runTokenParser anyTag) "<tag key='value'/>"
-- Right ()
-- >>> parseOnly (runTokenParser anyTag) "<!-- ignored comment --><tag key='value'/>"
-- Right ()
-- >>> parseOnly (runTokenParser anyTag) "<tag key='value'>body<!-- ignored comment --></tag>"
-- Right ()
anyTag :: CharParsing m => Monad m => TokenParser m ()
anyTag :: TokenParser m ()
anyTag = NameParser QName
-> AttrParser () -> ContentParser m () -> TokenParser m ()
forall (m :: * -> *) a b c.
(CharParsing m, Monad m) =>
NameParser a
-> AttrParser b -> ContentParser m c -> TokenParser m c
tag' NameParser QName
anyName AttrParser ()
anyAttr ContentParser m ()
forall (m :: * -> *).
(CharParsing m, Monad m) =>
ContentParser m ()
anyContent


-- | Parse any 'Token'.
anyToken :: CharParsing m => Monad m => EntityDecoder -> TokenParser m Token
anyToken :: EntityDecoder -> TokenParser m Token
anyToken EntityDecoder
entityDecoder = (Prolog -> Token
TokenProlog (Prolog -> Token) -> TokenParser m Prolog -> TokenParser m Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TokenParser m Prolog
forall (m :: * -> *).
(CharParsing m, Monad m) =>
TokenParser m Prolog
prolog)
  TokenParser m Token -> TokenParser m Token -> TokenParser m Token
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Instruction -> Token
TokenInstruction (Instruction -> Token)
-> TokenParser m Instruction -> TokenParser m Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TokenParser m Instruction
forall (m :: * -> *).
(CharParsing m, Monad m) =>
TokenParser m Instruction
instruction)
  TokenParser m Token -> TokenParser m Token -> TokenParser m Token
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> TokenParser m Token
tokenTag
  TokenParser m Token -> TokenParser m Token -> TokenParser m Token
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Text -> Token
TokenTextContent (Text -> Token) -> TokenParser m Text -> TokenParser m Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> EntityDecoder -> TokenParser m Text
forall (m :: * -> *).
(CharParsing m, Monad m) =>
EntityDecoder -> TokenParser m Text
textContent EntityDecoder
entityDecoder)
  where tokenTag :: TokenParser m Token
tokenTag = EntityDecoder
-> NameParser QName
-> (QName -> AttrParser (QName, Map QName Text))
-> ((QName, Map QName Text) -> ContentParser m Token)
-> TokenParser m Token
forall (m :: * -> *) a b c.
(CharParsing m, Monad m) =>
EntityDecoder
-> NameParser a
-> (a -> AttrParser b)
-> (b -> ContentParser m c)
-> TokenParser m c
tag EntityDecoder
entityDecoder NameParser QName
anyName (\QName
name -> (QName
name,) (Map QName Text -> (QName, Map QName Text))
-> AttrParser (Map QName Text)
-> AttrParser (QName, Map QName Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> AttrParser (Map QName Text)
forwardAttrs) (((QName, Map QName Text) -> ContentParser m Token)
 -> TokenParser m Token)
-> ((QName, Map QName Text) -> ContentParser m Token)
-> TokenParser m Token
forall a b. (a -> b) -> a -> b
$ \(QName
name, Map QName Text
attributes) ->
          QName -> Map QName Text -> [Token] -> Token
TokenTag QName
name Map QName Text
attributes ([Token] -> Token)
-> ContentParser m [Token] -> ContentParser m Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ([Token] -> m [Token]) -> ContentParser m [Token]
forall (m :: * -> *) a. ([Token] -> m a) -> ContentParser m a
AnyContent [Token] -> m [Token]
forall (f :: * -> *) a. Applicative f => a -> f a
pure
        forwardAttrs :: AttrParser (Map QName Text)
forwardAttrs = (Map QName Text -> Either String (Map QName Text))
-> AttrParser (Map QName Text)
forall a. (Map QName Text -> Either String a) -> AttrParser a
AttrParser Map QName Text -> Either String (Map QName Text)
forall a b. b -> Either a b
Right

-- | Same as @anyToken (decodePredefinedEntities <> decodeHtmlEntities)@, provided for convenience.
anyToken' :: CharParsing m => Monad m => TokenParser m Token
anyToken' :: TokenParser m Token
anyToken' = EntityDecoder -> TokenParser m Token
forall (m :: * -> *).
(CharParsing m, Monad m) =>
EntityDecoder -> TokenParser m Token
anyToken EntityDecoder
decodeStandardEntities


-- * Private functions

skipComments :: CharParsing m => Monad m => m ()
skipComments :: m ()
skipComments = m [Text] -> m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (m [Text] -> m ()) -> m [Text] -> m ()
forall a b. (a -> b) -> a -> b
$ m Text -> m [Text]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (m Text -> m [Text]) -> m Text -> m [Text]
forall a b. (a -> b) -> a -> b
$ TokenParser m Text -> m Text
forall (m :: * -> *) a. TokenParser m a -> m a
Mid.runTokenParser TokenParser m Text
forall (m :: * -> *).
(CharParsing m, Monad m) =>
TokenParser m Text
Mid.tokenComment

skipCommentsWhitespace :: CharParsing m => Monad m => m ()
skipCommentsWhitespace :: m ()
skipCommentsWhitespace = m [()] -> m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (m [()] -> m ()) -> m [()] -> m ()
forall a b. (a -> b) -> a -> b
$ m () -> m [()]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (m () -> m [()]) -> m () -> m [()]
forall a b. (a -> b) -> a -> b
$ m Text -> m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (TokenParser m Text -> m Text
forall (m :: * -> *) a. TokenParser m a -> m a
Mid.runTokenParser TokenParser m Text
forall (m :: * -> *).
(CharParsing m, Monad m) =>
TokenParser m Text
Mid.tokenComment) m () -> m () -> m ()
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> m String -> m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void m String
forall (m :: * -> *). CharParsing m => m String
tokenWhitespace

decodeStandardEntities :: EntityDecoder
decodeStandardEntities :: EntityDecoder
decodeStandardEntities = EntityDecoder
decodePredefinedEntities EntityDecoder -> EntityDecoder -> EntityDecoder
forall a. Semigroup a => a -> a -> a
<> EntityDecoder
decodeHtmlEntities