{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE StandaloneDeriving #-}
module Data.XML.Parser.Mid
( module Data.XML.Parser.Mid.Attribute
, module Data.XML.Parser.Mid.Doctype
, Instruction(..)
, XMLDeclaration(..)
, StartTag(..)
, EmptyElementTag(..)
, Token(..)
, TokenParser()
, runTokenParser
, tokenInstruction
, tokenComment
, tokenCdata
, tokenDoctype
, tokenXmlDeclaration
, tokenStartTag
, tokenEndTag
, tokenEmptyElementTag
, tokenData
, anyToken
) where
import Control.Applicative
import Control.Arrow ((>>>))
import Control.Monad.Compat
import Control.Monad.Fail.Compat
import Data.Char
import Data.Functor
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as Text
import Data.XML.Parser.Low
import Data.XML.Parser.Mid.Attribute
import Data.XML.Parser.Mid.Doctype
import Numeric
import Text.Parser.Char
import Text.Parser.Combinators
data Token
= TokenXMLDeclaration XMLDeclaration
| TokenDoctype Doctype
| TokenInstruction Instruction
| TokenStartTag StartTag
| TokenEndTag QName
| TokenEmptyElementTag EmptyElementTag
| TokenData [Content]
| TokenComment Text
| TokenCDATA Text
deriving (Eq, Ord, Show)
data Instruction = Instruction Text Text
deriving (Eq, Ord, Read, Show)
data XMLDeclaration = XMLDeclaration Text (Maybe Text) (Maybe Bool)
deriving (Eq, Ord, Read, Show)
data StartTag = StartTag QName [Attribute]
deriving (Eq, Ord, Read, Show)
data EmptyElementTag = EmptyElementTag QName [Attribute]
deriving (Eq, Ord, Read, Show)
newtype TokenParser m a = TokenParser { runTokenParser :: m a }
deriving instance Functor m => Functor (TokenParser m)
deriving instance Applicative m => Applicative (TokenParser m)
deriving instance Alternative m => Alternative (TokenParser m)
deriving instance Monad m => Monad (TokenParser m)
instance (Parsing m, Monad m) => MonadFail (TokenParser m) where
fail = TokenParser . unexpected
tokenDoctype :: CharParsing m => Monad m => TokenParser m Doctype
tokenDoctype = TokenParser doctype
tokenInstruction :: CharParsing m => Monad m => TokenParser m Instruction
tokenInstruction = TokenParser $ do
name <- tokenInstructionOpen
tokenWhitespace
content <- manyTill anyChar $ try tokenInstructionClose
return $ Instruction name $ Text.pack content
tokenComment :: CharParsing m => Monad m => TokenParser m Text
tokenComment = TokenParser $ do
tokenCommentOpen
content <- manyTill anyChar $ try tokenCommentClose
return $ Text.pack content
tokenCdata :: CharParsing m => Monad m => TokenParser m Text
tokenCdata = TokenParser $ do
tokenCdataOpen
content <- manyTill anyChar $ try tokenCdataClose
return $ Text.pack content
tokenXmlDeclaration :: CharParsing m => Monad m => TokenParser m XMLDeclaration
tokenXmlDeclaration = TokenParser $ do
tokenXmlDeclarationOpen
tokenWhitespace
Attribute key value <- attribute
guard $ key == QName "" "version"
version <- expandContents decodePredefinedEntities value
encoding <- optional $ do
tokenWhitespace
Attribute key value <- attribute
guard $ key == QName "" "encoding"
expandContents decodePredefinedEntities value
standalone <- optional $ do
tokenWhitespace
Attribute key value <- attribute
guard $ key == QName "" "standalone"
boolean <- expandContents decodePredefinedEntities value
case boolean of
"yes" -> return True
"no" -> return False
_ -> empty
optional tokenWhitespace
tokenXmlDeclarationClose
return $ XMLDeclaration version encoding standalone
tokenStartTag :: CharParsing m => Monad m => TokenParser m StartTag
tokenStartTag = TokenParser $ do
name <- tokenStartTagOpen
attributes <- many (tokenWhitespace >> attribute)
optional tokenWhitespace
tokenElementClose
return $ StartTag name attributes
tokenEndTag :: CharParsing m => Monad m => TokenParser m QName
tokenEndTag = TokenParser $ do
name <- tokenEndTagOpen
optional tokenWhitespace
tokenElementClose
return name
tokenEmptyElementTag :: CharParsing m => Monad m => TokenParser m EmptyElementTag
tokenEmptyElementTag = TokenParser $ do
name <- tokenStartTagOpen
attributes <- optional $ do
tokenWhitespace
attribute `sepBy` tokenWhitespace
optional tokenWhitespace
tokenEmptyElementTagClose
return $ EmptyElementTag name $ fromMaybe mempty attributes
tokenData :: CharParsing m => Monad m => TokenParser m [Content]
tokenData = TokenParser $ some (tokenContent "<")
anyToken :: CharParsing m => Monad m => TokenParser m Token
anyToken = TokenDoctype <$> tokenDoctype
<|> TokenInstruction <$> tokenInstruction
<|> TokenComment <$> tokenComment
<|> TokenCDATA <$> tokenCdata
<|> TokenXMLDeclaration <$> tokenXmlDeclaration
<|> TokenStartTag <$> tokenStartTag
<|> TokenEndTag <$> tokenEndTag
<|> TokenEmptyElementTag <$> tokenEmptyElementTag
<|> TokenData <$> tokenData