{-# LANGUAGE OverloadedStrings #-}
-- | Low-level XML parsers:
--
-- - parsed tokens are small and may overlap; it is not possible to tokenize XML document in a stateless way
-- - parsers are reversible: all formatting details are retained (e.g. whitespacing)
--
-- All documentation examples assume the following setup:
--
-- > :set -XOverloadedStrings
-- > import Data.Attoparsec.ByteString
module Data.XML.Parser.Low
  ( module Data.XML.Parser.Low.Entity
  , module Data.XML.Parser.Low.Name
  , module Data.XML.Parser.Low.Reference
  , module Data.XML.Parser.Low
  , module Data.XML.InternalSubset.Parser.Low
  ) where

import           Control.Applicative
import           Control.Arrow                      ((>>>))
import           Control.Monad
import           Data.Char
import           Data.Functor
import           Data.Text                          (Text)
import qualified Data.Text                          as Text
import           Data.XML.InternalSubset.Parser.Low
import           Data.XML.Parser.Low.Entity
import           Data.XML.Parser.Low.Name
import           Data.XML.Parser.Low.Reference
import           Numeric
import           Text.Parser.Char
import           Text.Parser.Combinators


-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import Data.Attoparsec.ByteString

-- | Raw text or reference.
data Content = ContentText Text | ContentReference Reference
  deriving (Content -> Content -> Bool
(Content -> Content -> Bool)
-> (Content -> Content -> Bool) -> Eq Content
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Content -> Content -> Bool
$c/= :: Content -> Content -> Bool
== :: Content -> Content -> Bool
$c== :: Content -> Content -> Bool
Eq, Eq Content
Eq Content
-> (Content -> Content -> Ordering)
-> (Content -> Content -> Bool)
-> (Content -> Content -> Bool)
-> (Content -> Content -> Bool)
-> (Content -> Content -> Bool)
-> (Content -> Content -> Content)
-> (Content -> Content -> Content)
-> Ord Content
Content -> Content -> Bool
Content -> Content -> Ordering
Content -> Content -> Content
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Content -> Content -> Content
$cmin :: Content -> Content -> Content
max :: Content -> Content -> Content
$cmax :: Content -> Content -> Content
>= :: Content -> Content -> Bool
$c>= :: Content -> Content -> Bool
> :: Content -> Content -> Bool
$c> :: Content -> Content -> Bool
<= :: Content -> Content -> Bool
$c<= :: Content -> Content -> Bool
< :: Content -> Content -> Bool
$c< :: Content -> Content -> Bool
compare :: Content -> Content -> Ordering
$ccompare :: Content -> Content -> Ordering
$cp1Ord :: Eq Content
Ord, ReadPrec [Content]
ReadPrec Content
Int -> ReadS Content
ReadS [Content]
(Int -> ReadS Content)
-> ReadS [Content]
-> ReadPrec Content
-> ReadPrec [Content]
-> Read Content
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Content]
$creadListPrec :: ReadPrec [Content]
readPrec :: ReadPrec Content
$creadPrec :: ReadPrec Content
readList :: ReadS [Content]
$creadList :: ReadS [Content]
readsPrec :: Int -> ReadS Content
$creadsPrec :: Int -> ReadS Content
Read, Int -> Content -> ShowS
[Content] -> ShowS
Content -> String
(Int -> Content -> ShowS)
-> (Content -> String) -> ([Content] -> ShowS) -> Show Content
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Content] -> ShowS
$cshowList :: [Content] -> ShowS
show :: Content -> String
$cshow :: Content -> String
showsPrec :: Int -> Content -> ShowS
$cshowsPrec :: Int -> Content -> ShowS
Show)

-- | Expand content reference, if any.
expandContent :: Alternative m => EntityDecoder -> Content -> m Text
expandContent :: EntityDecoder -> Content -> m Text
expandContent EntityDecoder
_ (ContentText Text
t)      = Text -> m Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
t
expandContent EntityDecoder
f (ContentReference Reference
r) = EntityDecoder -> Reference -> m Text
forall (m :: * -> *).
Alternative m =>
EntityDecoder -> Reference -> m Text
expandReference EntityDecoder
f Reference
r

-- | Same as 'expandContent', but on a list. Provided for convenience.
expandContents :: Alternative m => Monad m => EntityDecoder -> [Content] -> m Text
expandContents :: EntityDecoder -> [Content] -> m Text
expandContents EntityDecoder
f [Content]
contents = [Text] -> Text
forall a. Monoid a => [a] -> a
mconcat ([Text] -> Text) -> m [Text] -> m Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Content -> m Text) -> [Content] -> m [Text]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (EntityDecoder -> Content -> m Text
forall (m :: * -> *).
Alternative m =>
EntityDecoder -> Content -> m Text
expandContent EntityDecoder
f) [Content]
contents

-- | @'@
tokenSingleQuote :: CharParsing m => m Char
tokenSingleQuote :: m Char
tokenSingleQuote = Char -> m Char
forall (m :: * -> *). CharParsing m => Char -> m Char
char Char
'\''

-- | @"@
tokenDoubleQuote :: CharParsing m => m Char
tokenDoubleQuote :: m Char
tokenDoubleQuote = Char -> m Char
forall (m :: * -> *). CharParsing m => Char -> m Char
char Char
'"'

-- | Single or double quote.
tokenQuote :: CharParsing m => m Char
tokenQuote :: m Char
tokenQuote = m Char
forall (m :: * -> *). CharParsing m => m Char
tokenSingleQuote m Char -> m Char -> m Char
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> m Char
forall (m :: * -> *). CharParsing m => m Char
tokenDoubleQuote

-- | <https://www.w3.org/TR/REC-xml/#NT-S>
tokenWhitespace :: CharParsing m => m String
tokenWhitespace :: m String
tokenWhitespace = m Char -> m String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
some ((Char -> Bool) -> m Char
forall (m :: * -> *). CharParsing m => (Char -> Bool) -> m Char
satisfy Char -> Bool
isXmlSpace) where
  isXmlSpace :: Char -> Bool
isXmlSpace Char
' '  = Bool
True
  isXmlSpace Char
'\t' = Bool
True
  isXmlSpace Char
'\r' = Bool
True
  isXmlSpace Char
'\n' = Bool
True
  isXmlSpace Char
_    = Bool
False

-- | <https://www.w3.org/TR/REC-xml/#NT-Eq>
tokenEqual :: CharParsing m => Monad m => m ()
tokenEqual :: m ()
tokenEqual = do
  m String -> m (Maybe String)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional m String
forall (m :: * -> *). CharParsing m => m String
tokenWhitespace
  Char -> m Char
forall (m :: * -> *). CharParsing m => Char -> m Char
char Char
'='
  m String -> m (Maybe String)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional m String
forall (m :: * -> *). CharParsing m => m String
tokenWhitespace
  () -> m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()

tokenContent :: CharParsing m => Monad m => String -> m Content
tokenContent :: String -> m Content
tokenContent String
forbiddenChars = (Text -> Content
ContentText (Text -> Content) -> (String -> Text) -> String -> Content
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
Text.pack (String -> Content) -> m String -> m Content
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> m Char -> m String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
some (String -> m Char
forall (m :: * -> *). CharParsing m => String -> m Char
noneOf (String -> m Char) -> String -> m Char
forall a b. (a -> b) -> a -> b
$ Char
'&'Char -> ShowS
forall a. a -> [a] -> [a]
:String
forbiddenChars))
  m Content -> m Content -> m Content
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Reference -> Content
ContentReference (Reference -> Content) -> m Reference -> m Content
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> m Reference
forall (m :: * -> *). (CharParsing m, Monad m) => m Reference
tokenReference)

-- | Return processing instruction name.
--
-- >>> parseOnly tokenInstructionOpen "<?php"
-- Right "php"
tokenInstructionOpen :: CharParsing m => Monad m => m Text
tokenInstructionOpen :: m Text
tokenInstructionOpen = do
  String -> m String
forall (m :: * -> *). CharParsing m => String -> m String
string String
"<?"
  Text
name <- m Text
forall (m :: * -> *). (CharParsing m, Monad m) => m Text
tokenName
  Bool -> m ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> m ()) -> Bool -> m ()
forall a b. (a -> b) -> a -> b
$ Text -> Text
Text.toLower Text
name Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
/= Text
"xml"
  Text -> m Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
name

-- | @?>@
tokenInstructionClose :: CharParsing m => m ()
tokenInstructionClose :: m ()
tokenInstructionClose = m String -> m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (m String -> m ()) -> m String -> m ()
forall a b. (a -> b) -> a -> b
$ String -> m String
forall (m :: * -> *). CharParsing m => String -> m String
string String
"?>"

-- | @<![CDATA[@
--
-- <https://www.w3.org/TR/REC-xml/#NT-CDStart>
tokenCdataOpen :: CharParsing m => m ()
tokenCdataOpen :: m ()
tokenCdataOpen = m String -> m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (m String -> m ()) -> m String -> m ()
forall a b. (a -> b) -> a -> b
$ String -> m String
forall (m :: * -> *). CharParsing m => String -> m String
string String
"<![CDATA["

-- | @]]>@
--
-- <https://www.w3.org/TR/REC-xml/#NT-CDEnd>
tokenCdataClose :: CharParsing m => m ()
tokenCdataClose :: m ()
tokenCdataClose = m String -> m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (m String -> m ()) -> m String -> m ()
forall a b. (a -> b) -> a -> b
$ String -> m String
forall (m :: * -> *). CharParsing m => String -> m String
string String
"]]>"

-- | @<!--@
tokenCommentOpen :: CharParsing m => m ()
tokenCommentOpen :: m ()
tokenCommentOpen = m String -> m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (m String -> m ()) -> m String -> m ()
forall a b. (a -> b) -> a -> b
$ String -> m String
forall (m :: * -> *). CharParsing m => String -> m String
string String
"<!--"

-- | @-->@
tokenCommentClose :: CharParsing m => m ()
tokenCommentClose :: m ()
tokenCommentClose = m String -> m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (m String -> m ()) -> m String -> m ()
forall a b. (a -> b) -> a -> b
$ String -> m String
forall (m :: * -> *). CharParsing m => String -> m String
string String
"-->"

-- | @<!DOCTYPE@
tokenDoctypeOpen :: CharParsing m => m ()
tokenDoctypeOpen :: m ()
tokenDoctypeOpen = m String -> m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (m String -> m ()) -> m String -> m ()
forall a b. (a -> b) -> a -> b
$ String -> m String
forall (m :: * -> *). CharParsing m => String -> m String
string String
"<!DOCTYPE"

-- | @<?xml@
tokenXmlDeclarationOpen :: CharParsing m => m ()
tokenXmlDeclarationOpen :: m ()
tokenXmlDeclarationOpen = m String -> m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (m String -> m ()) -> m String -> m ()
forall a b. (a -> b) -> a -> b
$ String -> m String
forall (m :: * -> *). CharParsing m => String -> m String
string String
"<?xml"

-- | @?>@
tokenXmlDeclarationClose :: CharParsing m => m ()
tokenXmlDeclarationClose :: m ()
tokenXmlDeclarationClose = m String -> m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (m String -> m ()) -> m String -> m ()
forall a b. (a -> b) -> a -> b
$ String -> m String
forall (m :: * -> *). CharParsing m => String -> m String
string String
"?>"

-- | @/>@
tokenEmptyElementTagClose :: CharParsing m => m ()
tokenEmptyElementTagClose :: m ()
tokenEmptyElementTagClose = m String -> m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (m String -> m ()) -> m String -> m ()
forall a b. (a -> b) -> a -> b
$ String -> m String
forall (m :: * -> *). CharParsing m => String -> m String
string String
"/>"

-- | Return tag name.
--
-- >>> parseOnly tokenStartTagOpen "<foo"
-- Right (QName {namePrefix = "", nameLocal = "foo"})
tokenStartTagOpen :: CharParsing m => Monad m => m QName
tokenStartTagOpen :: m QName
tokenStartTagOpen = Char -> m Char
forall (m :: * -> *). CharParsing m => Char -> m Char
char Char
'<' m Char -> m QName -> m QName
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> m QName
forall (m :: * -> *). (CharParsing m, Monad m) => m QName
tokenQualifiedName

-- | Return tag name.
--
-- >>> parseOnly tokenEndTagOpen "</foo"
-- Right (QName {namePrefix = "", nameLocal = "foo"})
tokenEndTagOpen :: CharParsing m => Monad m => m QName
tokenEndTagOpen :: m QName
tokenEndTagOpen = String -> m String
forall (m :: * -> *). CharParsing m => String -> m String
string String
"</" m String -> m QName -> m QName
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> m QName
forall (m :: * -> *). (CharParsing m, Monad m) => m QName
tokenQualifiedName

-- | @>@
tokenElementClose :: CharParsing m => m ()
tokenElementClose :: m ()
tokenElementClose = m Char -> m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (m Char -> m ()) -> m Char -> m ()
forall a b. (a -> b) -> a -> b
$ Char -> m Char
forall (m :: * -> *). CharParsing m => Char -> m Char
char Char
'>'