{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RankNTypes #-}
module Text.XML.Unresolved
(
writeFile
, readFile
, renderLBS
, parseLBS
, parseLBS_
, parseText
, parseText_
, sinkTextDoc
, sinkDoc
, toEvents
, elementToEvents
, fromEvents
, elementFromEvents
, renderBuilder
, renderBytes
, renderText
, InvalidEventStream (..)
, P.def
, P.ParseSettings
, P.psDecodeEntities
, P.psRetainNamespaces
, R.RenderSettings
, R.rsPretty
, R.rsNamespaces
) where
import Conduit
import Control.Applicative ((<$>), (<*>))
import Control.Exception (Exception, SomeException, throw)
import Control.Monad (when)
import Control.Monad.Trans.Class (lift)
import Data.ByteString (ByteString)
import Data.ByteString.Builder (Builder)
import qualified Data.ByteString.Lazy as L
import Data.Char (isSpace)
import qualified Data.Conduit.Binary as CB
import Data.Conduit.Lazy (lazyConsume)
import qualified Data.Conduit.List as CL
import Data.Maybe (isJust, mapMaybe)
import Data.Monoid (mconcat)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import Data.Typeable (Typeable)
import Data.XML.Types
import Prelude hiding (readFile, writeFile)
import System.IO.Unsafe (unsafePerformIO)
import Text.XML.Stream.Parse (ParseSettings)
import qualified Text.XML.Stream.Parse as P
import qualified Text.XML.Stream.Render as R
readFile :: P.ParseSettings -> FilePath -> IO Document
readFile ps fp = runConduitRes $ CB.sourceFile fp .| sinkDoc ps
sinkDoc :: MonadThrow m
=> P.ParseSettings
-> ConduitT ByteString o m Document
sinkDoc ps = P.parseBytesPos ps .| fromEvents
writeFile :: R.RenderSettings -> FilePath -> Document -> IO ()
writeFile rs fp doc =
runConduitRes $ renderBytes rs doc .| CB.sinkFile fp
renderLBS :: R.RenderSettings -> Document -> L.ByteString
renderLBS rs doc =
L.fromChunks $ unsafePerformIO
$ lazyConsume
$ renderBytes rs doc
parseLBS :: P.ParseSettings -> L.ByteString -> Either SomeException Document
parseLBS ps lbs = runConduit $ CL.sourceList (L.toChunks lbs) .| sinkDoc ps
parseLBS_ :: P.ParseSettings -> L.ByteString -> Document
parseLBS_ ps lbs = either throw id $ parseLBS ps lbs
data InvalidEventStream = ContentAfterRoot P.EventPos
| MissingRootElement
| InvalidInlineDoctype P.EventPos
| MissingEndElement Name (Maybe P.EventPos)
| UnterminatedInlineDoctype
deriving Typeable
instance Exception InvalidEventStream
instance Show InvalidEventStream where
show (ContentAfterRoot (pos, e)) = mShowPos pos ++ "Found content after root element: " ++ prettyShowE e
show MissingRootElement = "Missing root element"
show (InvalidInlineDoctype (pos, e)) = mShowPos pos ++ "Invalid content inside doctype: " ++ prettyShowE e
show (MissingEndElement name Nothing) = "Documented ended while expected end element for: " ++ prettyShowName name
show (MissingEndElement name (Just (pos, e))) = mShowPos pos ++ "Expected end element for: " ++ prettyShowName name ++ ", but received: " ++ prettyShowE e
show UnterminatedInlineDoctype = "Unterminated doctype declaration"
mShowPos :: Maybe P.PositionRange -> String
mShowPos Nothing = ""
mShowPos (Just pos) = show pos ++ ": "
prettyShowE :: Event -> String
prettyShowE = show
prettyShowName :: Name -> String
prettyShowName = show
renderBuilder :: Monad m => R.RenderSettings -> Document -> ConduitT i Builder m ()
renderBuilder rs doc = CL.sourceList (toEvents doc) .| R.renderBuilder rs
renderBytes :: PrimMonad m => R.RenderSettings -> Document -> ConduitT i ByteString m ()
renderBytes rs doc = CL.sourceList (toEvents doc) .| R.renderBytes rs
renderText :: (MonadThrow m, PrimMonad m) => R.RenderSettings -> Document -> ConduitT i Text m ()
renderText rs doc = CL.sourceList (toEvents doc) .| R.renderText rs
manyTries :: Monad m => m (Maybe a) -> m [a]
manyTries f =
go id
where
go front = do
x <- f
case x of
Nothing -> return $ front []
Just y -> go (front . (:) y)
dropReturn :: Monad m => a -> ConduitM i o m a
dropReturn x = CL.drop 1 >> return x
fromEvents :: MonadThrow m => ConduitT P.EventPos o m Document
fromEvents = do
skip EventBeginDocument
d <- Document <$> goP <*> require elementFromEvents <*> goM
skip EventEndDocument
y <- CL.head
case y of
Nothing -> return d
Just (_, EventEndDocument) -> lift $ throwM MissingRootElement
Just z ->
lift $ throwM $ ContentAfterRoot z
where
skip e = do
x <- CL.peek
when (fmap snd x == Just e) (CL.drop 1)
require f = do
x <- f
case x of
Just y -> return y
Nothing -> do
my <- CL.head
case my of
Nothing -> error "Text.XML.Unresolved:impossible"
Just (_, EventEndDocument) -> lift $ throwM MissingRootElement
Just y -> lift $ throwM $ ContentAfterRoot y
goP = Prologue <$> goM <*> goD <*> goM
goM = manyTries goM'
goM' = do
x <- CL.peek
case x of
Just (_, EventInstruction i) -> dropReturn $ Just $ MiscInstruction i
Just (_, EventComment t) -> dropReturn $ Just $ MiscComment t
Just (_, EventContent (ContentText t))
| T.all isSpace t -> CL.drop 1 >> goM'
_ -> return Nothing
goD = do
x <- CL.peek
case x of
Just (_, EventBeginDoctype name meid) -> do
CL.drop 1
dropTillDoctype
return (Just $ Doctype name meid)
_ -> return Nothing
dropTillDoctype = do
x <- CL.head
case x of
Just (_, EventEndDoctype) -> return ()
Just epos -> lift $ throwM $ InvalidInlineDoctype epos
Nothing -> lift $ throwM UnterminatedInlineDoctype
elementFromEvents :: MonadThrow m => ConduitT P.EventPos o m (Maybe Element)
elementFromEvents = goE
where
goE = do
x <- CL.peek
case x of
Just (_, EventBeginElement n as) -> Just <$> goE' n as
_ -> return Nothing
goE' n as = do
CL.drop 1
ns <- manyTries goN
y <- CL.head
if fmap snd y == Just (EventEndElement n)
then return $ Element n as $ compressNodes ns
else lift $ throwM $ MissingEndElement n y
goN = do
x <- CL.peek
case x of
Just (_, EventBeginElement n as) -> (Just . NodeElement) <$> goE' n as
Just (_, EventInstruction i) -> dropReturn $ Just $ NodeInstruction i
Just (_, EventContent c) -> dropReturn $ Just $ NodeContent c
Just (_, EventComment t) -> dropReturn $ Just $ NodeComment t
Just (_, EventCDATA t) -> dropReturn $ Just $ NodeContent $ ContentText t
_ -> return Nothing
toEvents :: Document -> [Event]
toEvents (Document prol root epi) =
(EventBeginDocument :)
. goP prol . elementToEvents' root . goM epi $ [EventEndDocument]
where
goP (Prologue before doctype after) =
goM before . maybe id goD doctype . goM after
goM [] = id
goM [x] = (goM' x :)
goM (x:xs) = (goM' x :) . goM xs
goM' (MiscInstruction i) = EventInstruction i
goM' (MiscComment t) = EventComment t
goD (Doctype name meid) =
(:) (EventBeginDoctype name meid)
. (:) EventEndDoctype
elementToEvents :: Element -> [Event]
elementToEvents e = elementToEvents' e []
elementToEvents' :: Element -> [Event] -> [Event]
elementToEvents' = goE
where
goE (Element name as ns) =
(EventBeginElement name as :)
. goN ns
. (EventEndElement name :)
goN [] = id
goN [x] = goN' x
goN (x:xs) = goN' x . goN xs
goN' (NodeElement e) = goE e
goN' (NodeInstruction i) = (EventInstruction i :)
goN' (NodeContent c) = (EventContent c :)
goN' (NodeComment t) = (EventComment t :)
compressNodes :: [Node] -> [Node]
compressNodes [] = []
compressNodes [x] = [x]
compressNodes (x@(NodeContent (ContentText _)) : y@(NodeContent (ContentText _)) : z) =
let (textNodes, remainder) = span (isJust . unContent) (x:y:z)
texts = mapMaybe unContent textNodes
in
compressNodes $ NodeContent (ContentText $ mconcat texts) : remainder
where
unContent (NodeContent (ContentText text)) = Just text
unContent _ = Nothing
compressNodes (x:xs) = x : compressNodes xs
parseText :: ParseSettings -> TL.Text -> Either SomeException Document
parseText ps tl =
runConduit
$ CL.sourceList (TL.toChunks tl)
.| sinkTextDoc ps
parseText_ :: ParseSettings -> TL.Text -> Document
parseText_ ps = either throw id . parseText ps
sinkTextDoc :: MonadThrow m
=> ParseSettings
-> ConduitT Text o m Document
sinkTextDoc ps = P.parseTextPos ps .| fromEvents