{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Xmlbf
(
FromXml(fromXml)
, Parser
, runParser
, pElement
, pAttr
, pAttrs
, pText
, pRead
, pEndOfInput
, ToXml(toXml)
, encode
, Node
, pattern Element
, element
, element'
, pattern Text
, text
, dfpos
, dfposM
, dfpre
, dfpreM
) where
import qualified Data.ByteString.Builder as BB
import qualified Data.ByteString.Builder.Prim as BBP
import qualified Data.Char as Char
import Data.Foldable (for_, toList)
import Data.Functor.Identity (Identity(Identity), runIdentity)
import qualified Data.HashMap.Strict as HM
import Data.Monoid ((<>))
import Data.Sequence (Seq)
import qualified Data.Sequence as Seq
import Data.String (IsString(fromString))
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import Data.Typeable (Typeable, typeRep, tyConName, typeRepTyCon)
import Data.Traversable (for)
import Data.Word (Word8)
import Control.Applicative (Alternative(empty, (<|>)))
import Control.Monad (MonadPlus(mplus, mzero), join, guard)
import Control.Monad.Fail (MonadFail(fail))
import qualified Text.Read
data Node
= Element' !T.Text !(HM.HashMap T.Text T.Text) ![Node]
| Text' !T.Text
deriving (Eq)
instance Show Node where
showsPrec n = \x -> showParen (n > 10) $ case x of
Text' t -> showString "Text " . showsPrec 0 t
Element' t as cs ->
showString "Element " .
showsPrec 0 t . showChar ' ' .
showsPrec 0 (HM.toList as) . showChar ' ' .
showsPrec 0 cs
pattern Element :: T.Text -> (HM.HashMap T.Text T.Text) -> [Node] -> Node
pattern Element t as cs <- Element' t as cs
{-# COMPLETE Element #-}
pattern Text :: T.Text -> Node
pattern Text t <- Text' t
{-# COMPLETE Text #-}
instance IsString Node where
fromString = text . T.pack
{-# INLINABLE fromString #-}
normalize :: [Node] -> [Node]
{-# INLINE normalize #-}
normalize = \case
Text a : Text b : ns -> normalize (text (a <> b) : ns)
(n : ns) -> n : normalize ns
[] -> []
text :: T.Text -> Node
text = Text'
{-# INLINE text #-}
element
:: T.Text
-> HM.HashMap T.Text T.Text
-> [Node]
-> Either String Node
element t0 hm0 ns0 = do
guarde (t0 == T.strip t0) $
"Element name has surrounding whitespace: " ++ show t0
guarde (not (T.null t0)) ("Element name is blank: " ++ show t0)
for_ (HM.keys hm0) $ \k -> do
guarde (k == T.strip k) $
"Attribute name has surrounding whitespace: " ++ show k
guarde (not (T.null k)) ("Attribute name is blank: " ++ show k)
Right (Element' t0 hm0 (normalize ns0))
element'
:: T.Text
-> HM.HashMap T.Text T.Text
-> [Node]
-> Node
{-# INLINE element' #-}
element' t hm ns =
case element t hm ns of
Right x -> x
Left e -> error ("element': " ++ e)
guarde :: Bool -> String -> Either String ()
{-# INLINE guarde #-}
guarde True _ = Right ()
guarde False s = Left s
class FromXml a where
fromXml :: Parser a
newtype Parser a = Parser { unParser :: S -> Either String (a, S) }
deriving (Functor)
runParser :: Parser a -> [Node] -> Either String a
runParser p0 = fmap fst . unParser p0 . STop . normalize
data S
= STop ![Node]
| SReg !(HM.HashMap T.Text T.Text) ![Node]
deriving (Show)
instance Applicative Parser where
{-# INLINE pure #-}
pure = \a -> Parser (\s -> Right (a, s))
{-# INLINE (<*>) #-}
Parser gf <*> Parser ga = Parser $ \s0 -> do
(f, s1) <- gf s0
(a, s2) <- ga s1
pure (f a, s2)
instance Monad Parser where
{-# INLINE return #-}
return = pure
{-# INLINE (>>=) #-}
Parser ga >>= k = Parser $ \s0 -> do
(a, s1) <- ga s0
unParser (k a) s1
fail e = Parser (\_ -> Left e)
instance MonadFail Parser where
fail e = Parser (\_ -> Left e)
instance Alternative Parser where
{-# INLINE empty #-}
empty = Parser (\_ -> Left "empty")
{-# INLINE (<|>) #-}
Parser a <|> Parser b = Parser (\s -> either (\_ -> b s) Right (a s))
instance MonadPlus Parser where
{-# INLINE mzero #-}
mzero = empty
{-# INLINE mplus #-}
mplus = (<|>)
pElement :: T.Text -> Parser a -> Parser a
{-# INLINABLE pElement #-}
pElement t0 p0 = Parser $ \case
SReg as0 (Element' t as cs : cs0) | t == t0 -> do
(a,_) <- unParser p0 (SReg as cs)
Right (a, SReg as0 cs0)
STop (Element' t as cs : cs0) | t == t0 -> do
(a,_) <- unParser p0 (SReg as cs)
Right (a, STop cs0)
SReg as (Text' x : cs) | T.all Char.isSpace x ->
unParser (pElement t0 p0) (SReg as cs)
STop (Text' x : cs) | T.all Char.isSpace x ->
unParser (pElement t0 p0) (STop cs)
_ -> Left ("Missing element " ++ show t0)
pAttr :: T.Text -> Parser T.Text
{-# INLINABLE pAttr #-}
pAttr n = Parser $ \case
STop _ -> Left "Before selecting an attribute, you must select an element"
SReg as cs -> case HM.lookup n as of
Just x -> Right (x, SReg (HM.delete n as) cs)
Nothing -> Left ("Missing attribute " ++ show n)
pAttrs :: Parser (HM.HashMap T.Text T.Text)
{-# INLINABLE pAttrs #-}
pAttrs = Parser $ \case
STop _ -> Left "Before selecting an attribute, you must select an element"
SReg as cs -> Right (as, SReg mempty cs)
pText :: Parser T.Text
{-# INLINABLE pText #-}
pText = Parser $ \case
STop (Text x : ns) -> Right (x, STop ns)
SReg as (Text x : cs) -> Right (x, SReg as cs)
_ -> Left "Missing text node"
pRead :: (Typeable a, Read a) => T.Text -> Parser a
{-# INLINABLE pRead #-}
pRead = \t -> case Text.Read.readMaybe (T.unpack t) of
Just a -> pure a
ya@Nothing -> do
let ty = tyConName (typeRepTyCon (typeRep ya))
Parser (\_ -> Left ("Can't read as " ++ ty ++ ": " ++ show t))
pEndOfInput :: Parser ()
{-# INLINABLE pEndOfInput #-}
pEndOfInput = Parser (\s ->
if isEof s then Right ((), s)
else Left "Not end of input yet")
isEof :: S -> Bool
{-# INLINE isEof #-}
isEof = \case
SReg as cs -> HM.null as && null cs
STop ns -> null ns
class ToXml a where
toXml :: a -> [Node]
encode :: [Node] -> BB.Builder
encode xs = mconcat $ xs >>= \case
Text x -> [encodeXmlUtf8 x]
Element t as cs ->
[ "<"
, encodeUtf8 t
, mconcat $ do
(k,v) <- HM.toList as
guard (not (T.null k))
[ " ", encodeUtf8 k, "=\"", encodeXmlUtf8 v, "\"" ]
, if null cs then "/" else ""
, ">"
, encode cs
, if null cs then "" else "</" <> encodeUtf8 t <> ">"
]
dfpos :: ((Node -> [Node]) -> Node -> [Node]) -> Node -> [Node]
dfpos f = runIdentity . dfposM (\k -> Identity . f (runIdentity . k))
dfposM :: Monad m => ((Node -> m [Node]) -> Node -> m [Node]) -> Node -> m [Node]
dfposM f = \n0 -> do
c1 <- traverseChildren (dfposM f) (cursorFromNode n0)
c2 <- traverseRightSiblings (dfposM f) c1
fmap (normalize . join)
(traverse (f (dfposM f)) (cursorSiblings c2))
dfpre :: ((Node -> [Node]) -> Node -> [Node]) -> Node -> [Node]
dfpre f = runIdentity . dfpreM (\k -> Identity . f (runIdentity . k))
dfpreM :: Monad m => ((Node -> m [Node]) -> Node -> m [Node]) -> Node -> m [Node]
dfpreM f = \n0 -> do
ns <- f (dfpreM f) n0
fmap (normalize . join) $ for ns $ \n -> do
c1 <- traverseChildren (dfpreM f) (cursorFromNode n)
cursorSiblings <$> traverseRightSiblings (dfpreM f) c1
data Cursor = Cursor
{ _cursorCurrent :: !Node
, _cursorLefts :: !(Seq Node)
, _cursorRights :: !(Seq Node)
, _cursorParents :: !(Seq (Seq Node, T.Text, HM.HashMap T.Text T.Text, Seq Node))
}
traverseChildren :: Monad m => (Node -> m [Node]) -> Cursor -> m Cursor
{-# INLINE traverseChildren #-}
traverseChildren f c0 = case _cursorCurrent c0 of
Text _ -> pure c0
Element t as cs -> do
n1s <- fmap (normalize . join) (traverse f cs)
pure (c0 {_cursorCurrent = Element' t as n1s})
traverseRightSiblings :: Monad m => (Node -> m [Node]) -> Cursor -> m Cursor
{-# INLINE traverseRightSiblings #-}
traverseRightSiblings f c0 = case cursorRemoveRight c0 of
Nothing -> pure c0
Just (n1, c1) -> do
n2s <- fmap normalize (f n1)
traverseRightSiblings f (cursorInsertManyRight n2s c1)
cursorFromNode :: Node -> Cursor
{-# INLINE cursorFromNode #-}
cursorFromNode n = Cursor n mempty mempty mempty
cursorSiblings :: Cursor -> [Node]
{-# INLINE cursorSiblings #-}
cursorSiblings (Cursor cur ls rs _) =
toList (Seq.reverse ls <> (cur Seq.<| rs))
cursorRemoveRight :: Cursor -> Maybe (Node, Cursor)
{-# INLINE cursorRemoveRight #-}
cursorRemoveRight = \case
Cursor n ls rs0 ps | not (Seq.null rs0) ->
case Seq.viewl rs0 of
r Seq.:< rs -> Just (r, Cursor n ls rs ps)
_ -> undefined
_ -> Nothing
cursorInsertManyRight :: [Node] -> Cursor -> Cursor
{-# INLINE cursorInsertManyRight #-}
cursorInsertManyRight ns (Cursor nn ls rs ps) =
Cursor nn ls (Seq.fromList ns <> rs) ps
encodeUtf8 :: T.Text -> BB.Builder
{-# INLINE encodeUtf8 #-}
encodeUtf8 = T.encodeUtf8Builder
encodeXmlUtf8 :: T.Text -> BB.Builder
{-# INLINE encodeXmlUtf8 #-}
encodeXmlUtf8 = T.encodeUtf8BuilderEscaped xmlEscaped
xmlEscaped :: BBP.BoundedPrim Word8
{-# INLINE xmlEscaped #-}
xmlEscaped =
BBP.condB (== 38) (fixed5 (38,(97,(109,(112,59))))) $
BBP.condB (== 60) (fixed4 (38,(108,(116,59)))) $
BBP.condB (== 62) (fixed4 (38,(103,(116,59)))) $
BBP.condB (== 34) (fixed5 (38,(35,(51,(52,59))))) $
BBP.liftFixedToBounded BBP.word8
where
{-# INLINE fixed4 #-}
fixed4 :: (Word8, (Word8, (Word8, Word8))) -> BBP.BoundedPrim Word8
fixed4 x = BBP.liftFixedToBounded
(const x BBP.>$< BBP.word8 BBP.>*< BBP.word8
BBP.>*< BBP.word8 BBP.>*< BBP.word8)
{-# INLINE fixed5 #-}
fixed5 :: (Word8, (Word8, (Word8, (Word8, Word8)))) -> BBP.BoundedPrim Word8
fixed5 x = BBP.liftFixedToBounded
(const x BBP.>$< BBP.word8 BBP.>*< BBP.word8
BBP.>*< BBP.word8 BBP.>*< BBP.word8 BBP.>*< BBP.word8)