module Text.XML.Expat.Tree (
Node,
NodeG(..),
UNode,
module Text.XML.Expat.Internal.NodeClass,
QNode,
module Text.XML.Expat.Internal.Qualified,
NNode,
module Text.XML.Expat.Internal.Namespaced,
ParseOptions(..),
defaultParseOptions,
Encoding(..),
parse,
parse',
parseG,
XMLParseError(..),
XMLParseLocation(..),
parseThrowing,
XMLParseException(..),
saxToTree,
saxToTreeG,
GenericXMLString(..)
) where
import Text.XML.Expat.SAX ( Encoding(..)
, GenericXMLString(..)
, ParseOptions(..)
, defaultParseOptions
, SAXEvent(..)
, XMLParseError(..)
, XMLParseException(..)
, XMLParseLocation(..) )
import qualified Text.XML.Expat.SAX as SAX
import Text.XML.Expat.Internal.Namespaced
import Text.XML.Expat.Internal.NodeClass
import Text.XML.Expat.Internal.Qualified
import Control.Arrow
import Control.Monad (mplus, mzero)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Lazy as L
import Data.List.Class
import Data.Monoid (Monoid,mempty,mappend)
import Control.DeepSeq
data NodeG c tag text =
Element {
eName :: !tag,
eAttributes :: ![(tag,text)],
eChildren :: c (NodeG c tag text)
} |
Text !text
type instance ListOf (NodeG c tag text) = c (NodeG c tag text)
instance (Show tag, Show text) => Show (NodeG [] tag text) where
showsPrec d (Element na at ch) = showParen (d > 10) $
("Element "++) . showsPrec 11 na . (" "++) .
showsPrec 11 at . (" "++) .
showsPrec 11 ch
showsPrec d (Text t) = showParen (d > 10) $ ("Text "++) . showsPrec 11 t
instance (Eq tag, Eq text) => Eq (NodeG [] tag text) where
Element na1 at1 ch1 == Element na2 at2 ch2 =
na1 == na2 &&
at1 == at2 &&
ch1 == ch2
Text t1 == Text t2 = t1 == t2
_ == _ = False
type Node tag text = NodeG [] tag text
instance (NFData tag, NFData text) => NFData (NodeG [] tag text) where
rnf (Element nam att chi) = rnf (nam, att, chi)
rnf (Text txt) = rnf txt
type UNode text = Node text text
type QNode text = Node (QName text) text
type NNode text = Node (NName text) text
instance (Functor c, List c) => NodeClass NodeG c where
textContentM (Element _ _ children) = foldlL mappend mempty $ joinM $ fmap textContentM children
textContentM (Text txt) = return txt
isElement (Element _ _ _) = True
isElement _ = False
isText (Text _) = True
isText _ = False
isCData _ = False
isProcessingInstruction _ = False
isComment _ = False
isNamed _ (Text _) = False
isNamed nm (Element nm' _ _) = nm == nm'
getName (Text _) = mempty
getName (Element name _ _) = name
hasTarget _ _ = False
getTarget _ = mempty
getAttributes (Text _) = []
getAttributes (Element _ attrs _) = attrs
getChildren (Text _) = mzero
getChildren (Element _ _ ch) = ch
getText (Text txt) = txt
getText (Element _ _ _) = mempty
modifyName _ node@(Text _) = node
modifyName f (Element n a c) = Element (f n) a c
modifyAttributes _ node@(Text _) = node
modifyAttributes f (Element n a c) = Element n (f a) c
modifyChildren _ node@(Text _) = node
modifyChildren f (Element n a c) = Element n a (f c)
mapAllTags _ (Text t) = Text t
mapAllTags f (Element n a c) = Element (f n) (map (first f) a) (fmap (mapAllTags f) c)
modifyElement _ (Text t) = Text t
modifyElement f (Element n a c) =
let (n', a', c') = f (n, a, c)
in Element n' a' c'
mapNodeContainer f (Element n a ch) = do
ch' <- mapNodeListContainer f ch
return $ Element n a ch'
mapNodeContainer _ (Text t) = return $ Text t
mkText = Text
instance (Functor c, List c) => MkElementClass NodeG c where
mkElement name attrs children = Element name attrs children
parse' :: (GenericXMLString tag, GenericXMLString text) =>
ParseOptions tag text
-> ByteString
-> Either XMLParseError (Node tag text)
parse' opts doc = case parse opts (L.fromChunks [doc]) of
(xml, Nothing) -> Right xml
(_, Just err) -> Left err
saxToTree :: GenericXMLString tag =>
[SAXEvent tag text]
-> (Node tag text, Maybe XMLParseError)
saxToTree events =
let (nodes, mError, _) = ptl events
in (findRoot nodes, mError)
where
findRoot (elt@(Element _ _ _):_) = elt
findRoot (_:nodes) = findRoot nodes
findRoot [] = Element (gxFromString "") [] []
ptl (StartElement name attrs:rema) =
let (children, err1, rema') = ptl rema
elt = Element name attrs children
(out, err2, rema'') = ptl rema'
in (elt:out, err1 `mplus` err2, rema'')
ptl (EndElement _:rema) = ([], Nothing, rema)
ptl (CharacterData txt:rema) =
let (out, err, rema') = ptl rema
in (Text txt:out, err, rema')
ptl (FailDocument err:_) = ([], Just err, [])
ptl (_:rema) = ptl rema
ptl [] = ([], Nothing, [])
saxToTreeG :: forall tag text l . (GenericXMLString tag, List l) =>
l (SAXEvent tag text)
-> ItemM l (NodeG l tag text)
saxToTreeG events = do
li <- runList (process events)
case li of
Cons elt@(Element _ _ _ ) _ -> return elt
_ -> return $ Element (gxFromString "") mzero mzero
where
process :: l (SAXEvent tag text) -> l (NodeG l tag text)
process events = joinL $ process_ events
where
process_ :: l (SAXEvent tag text) -> ItemM l (l (NodeG l tag text))
process_ events = do
li <- runList events
case li of
Nil -> return mzero
Cons (StartElement name attrs) rema -> do
return $ Element name attrs (process rema) `cons` process (stripElement rema)
Cons (EndElement _) _ -> return mzero
Cons (CharacterData txt) rema -> return $ Text txt `cons` process rema
Cons _ rema -> process_ rema
stripElement :: l (SAXEvent tag text) -> l (SAXEvent tag text)
stripElement events = joinL $ stripElement_ 0 events
where
stripElement_ :: Int -> l (SAXEvent tag text) -> ItemM l (l (SAXEvent tag text))
stripElement_ level events = level `seq` do
li <- runList events
case li of
Nil -> return mzero
Cons (StartElement _ _) rema -> stripElement_ (level+1) rema
Cons (EndElement _) rema -> if level == 0 then return rema
else stripElement_ (level1) rema
Cons _ rema -> stripElement_ level rema
parse :: (GenericXMLString tag, GenericXMLString text) =>
ParseOptions tag text
-> L.ByteString
-> (Node tag text, Maybe XMLParseError)
parse opts bs = saxToTree $ SAX.parse opts bs
parseG :: (GenericXMLString tag, GenericXMLString text, List l) =>
ParseOptions tag text
-> l ByteString
-> ItemM l (NodeG l tag text)
parseG opts = saxToTreeG . SAX.parseG opts
parseThrowing :: (GenericXMLString tag, GenericXMLString text) =>
ParseOptions tag text
-> L.ByteString
-> Node tag text
parseThrowing opts bs = fst $ saxToTree $ SAX.parseThrowing opts bs