module Text.XML.HXT.Arrow.Edit
( canonicalizeAllNodes
, canonicalizeForXPath
, canonicalizeContents
, collapseAllXText
, collapseXText
, xshowEscapeXml
, escapeXmlRefs
, escapeHtmlRefs
, haskellRepOfXmlDoc
, treeRepOfXmlDoc
, addHeadlineToXmlDoc
, indentDoc
, numberLinesInXmlDoc
, preventEmptyElements
, removeComment
, removeAllComment
, removeWhiteSpace
, removeAllWhiteSpace
, removeDocWhiteSpace
, transfCdata
, transfAllCdata
, transfCharRef
, transfAllCharRef
, substAllXHTMLEntityRefs
, substXHTMLEntityRef
, rememberDTDAttrl
, addDefaultDTDecl
, hasXmlPi
, addXmlPi
, addXmlPiEncoding
, addDoctypeDecl
, addXHtmlDoctypeStrict
, addXHtmlDoctypeTransitional
, addXHtmlDoctypeFrameset
)
where
import Control.Arrow
import Control.Arrow.ArrowIf
import Control.Arrow.ArrowList
import Control.Arrow.ArrowTree
import Control.Arrow.ListArrow
import Control.Arrow.NTreeEdit
import Data.Char.Properties.XMLCharProps (isXmlSpaceChar)
import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.DOM.FormatXmlTree (formatXmlTree)
import Text.XML.HXT.DOM.Interface
import qualified Text.XML.HXT.DOM.ShowXml as XS
import qualified Text.XML.HXT.DOM.XmlNode as XN
import Text.XML.HXT.Parser.HtmlParsec (emptyHtmlTags)
import Text.XML.HXT.Parser.XhtmlEntities (xhtmlEntities)
import Text.XML.HXT.Parser.XmlEntities (xmlEntities)
import Data.List (isPrefixOf)
import qualified Data.Map as M
import Data.Maybe
canonicalizeTree' :: LA XmlTree XmlTree -> LA XmlTree XmlTree
canonicalizeTree' toBeRemoved
= ( processChildren
( (none `when` (isText <+> isXmlPi))
>>>
(deep isPi `when` isDTD)
)
`when` isRoot
)
>>>
canonicalizeNodes toBeRemoved
canonicalizeNodes :: LA XmlTree XmlTree -> LA XmlTree XmlTree
canonicalizeNodes toBeRemoved
= editNTreeA $
[ toBeRemoved :-> none
, ( isElem >>> getAttrl >>> getChildren >>> isCharRef )
:-> ( processAttrl
( processChildren transfCharRef
>>>
collapseXText'
)
>>>
( collapseXText'
`when`
(getChildren >>. has2XText)
)
)
, ( isElem >>> (getChildren >>. has2XText) )
:-> collapseXText'
, isCharRef :-> ( getCharRef
>>>
arr (\ i -> [toEnum i])
>>>
mkText
)
, isCdata :-> ( getCdata
>>>
mkText
)
]
canonicalizeAllNodes :: ArrowList a => a XmlTree XmlTree
canonicalizeAllNodes = fromLA $
canonicalizeTree' isCmt
{-# INLINE canonicalizeAllNodes #-}
canonicalizeForXPath :: ArrowList a => a XmlTree XmlTree
canonicalizeForXPath = fromLA $ canonicalizeTree' none
{-# INLINE canonicalizeForXPath #-}
canonicalizeContents :: ArrowList a => a XmlTree XmlTree
canonicalizeContents = fromLA $
canonicalizeNodes none
{-# INLINE canonicalizeContents #-}
has2XText :: XmlTrees -> XmlTrees
has2XText ts0@(t1 : ts1@(t2 : ts2))
| XN.isText t1 = if XN.isText t2
then ts0
else has2XText ts2
| otherwise = has2XText ts1
has2XText _ = []
collapseXText' :: LA XmlTree XmlTree
collapseXText'
= replaceChildren ( listA getChildren >>> arrL (foldr mergeText' []) )
where
mergeText' :: XmlTree -> XmlTrees -> XmlTrees
mergeText' t1 (t2 : ts2)
| XN.isText t1 && XN.isText t2
= let
s1 = fromJust . XN.getText $ t1
s2 = fromJust . XN.getText $ t2
t = XN.mkText (s1 ++ s2)
in
t : ts2
mergeText' t1 ts
= t1 : ts
collapseXText :: ArrowList a => a XmlTree XmlTree
collapseXText = fromLA collapseXText'
collapseAllXText :: ArrowList a => a XmlTree XmlTree
collapseAllXText = fromLA $ processBottomUp collapseXText'
xshowEscapeXml :: ArrowXml a => a n XmlTree -> a n String
xshowEscapeXml f = f >. (uncurry XS.xshow'' escapeXmlRefs)
type EntityRefTable = M.Map Int String
xmlEntityRefTable
, xhtmlEntityRefTable :: EntityRefTable
xmlEntityRefTable = buildEntityRefTable $ xmlEntities
xhtmlEntityRefTable = buildEntityRefTable $ xhtmlEntities
buildEntityRefTable :: [(String, Int)] -> EntityRefTable
buildEntityRefTable = M.fromList . map (\ (x,y) -> (y,x) )
type EntitySubstTable = M.Map String String
xhtmlEntitySubstTable :: EntitySubstTable
xhtmlEntitySubstTable = M.fromList . map (second $ (:[]) . toEnum) $ xhtmlEntities
substXHTMLEntityRef :: LA XmlTree XmlTree
substXHTMLEntityRef
= ( getEntityRef
>>>
arrL subst
>>>
mkText
)
`orElse` this
where
subst name
= maybe [] (:[]) $ M.lookup name xhtmlEntitySubstTable
substAllXHTMLEntityRefs :: ArrowXml a => a XmlTree XmlTree
substAllXHTMLEntityRefs
= fromLA $
processBottomUp substXHTMLEntityRef
escapeXmlRefs :: (Char -> String -> String, Char -> String -> String)
escapeXmlRefs = (cquote, aquote)
where
cquote c
| c `elem` "<&" = ('&' :)
. ((lookupRef c xmlEntityRefTable) ++)
. (';' :)
| otherwise = (c :)
aquote c
| c `elem` "<>\"\'&\n\r\t"
= ('&' :)
. ((lookupRef c xmlEntityRefTable) ++)
. (';' :)
| otherwise = (c :)
escapeHtmlRefs :: (Char -> String -> String, Char -> String -> String)
escapeHtmlRefs = (cquote, aquote)
where
cquote c
| isHtmlTextEsc c
= ('&' :)
. ((lookupRef c xhtmlEntityRefTable) ++)
. (';' :)
| otherwise = (c :)
aquote c
| isHtmlAttrEsc c
= ('&' :)
. ((lookupRef c xhtmlEntityRefTable) ++)
. (';' :)
| otherwise = (c :)
isHtmlTextEsc c = c >= toEnum(128) || ( c `elem` "<&" )
isHtmlAttrEsc c = c >= toEnum(128) || ( c `elem` "<>\"\'&\n\r\t" )
lookupRef :: Char -> EntityRefTable -> String
lookupRef c = fromMaybe ('#' : show (fromEnum c))
. M.lookup (fromEnum c)
{-# INLINE lookupRef #-}
preventEmptyElements :: ArrowList a => [String] -> Bool -> a XmlTree XmlTree
preventEmptyElements ns isHtml
= fromLA $
editNTreeA [ ( isElem
>>>
isNoneEmpty
>>>
neg getChildren
)
:-> replaceChildren (txt "")
]
where
isNoneEmpty
| not (null ns) = hasNameWith (localPart >>> (`elem` ns))
| isHtml = hasNameWith (localPart >>> (`notElem` emptyHtmlTags))
| otherwise = this
haskellRepOfXmlDoc :: ArrowList a => a XmlTree XmlTree
haskellRepOfXmlDoc
= fromLA $
root [getAttrl] [show ^>> mkText]
numberLinesInXmlDoc :: ArrowList a => a XmlTree XmlTree
numberLinesInXmlDoc
= fromLA $
processChildren (changeText numberLines)
where
numberLines :: String -> String
numberLines str
= concat $
zipWith (\ n l -> lineNr n ++ l ++ "\n") [1..] (lines str)
where
lineNr :: Int -> String
lineNr n = (reverse (take 6 (reverse (show n) ++ replicate 6 ' '))) ++ " "
treeRepOfXmlDoc :: ArrowList a => a XmlTree XmlTree
treeRepOfXmlDoc
= fromLA $
root [getAttrl] [formatXmlTree ^>> mkText]
addHeadlineToXmlDoc :: ArrowXml a => a XmlTree XmlTree
addHeadlineToXmlDoc
= fromLA $ ( addTitle $< (getAttrValue a_source >>^ formatTitle) )
where
addTitle str
= replaceChildren ( txt str <+> getChildren <+> txt "\n" )
formatTitle str
= "\n" ++ headline ++ "\n" ++ underline ++ "\n\n"
where
headline = "content of: " ++ str
underline = map (const '=') headline
removeComment :: ArrowXml a => a XmlTree XmlTree
removeComment = none `when` isCmt
removeAllComment :: ArrowXml a => a XmlTree XmlTree
removeAllComment = fromLA $ editNTreeA [isCmt :-> none]
removeWhiteSpace :: ArrowXml a => a XmlTree XmlTree
removeWhiteSpace = fromLA $ none `when` isWhiteSpace
removeAllWhiteSpace :: ArrowXml a => a XmlTree XmlTree
removeAllWhiteSpace = fromLA $ editNTreeA [isWhiteSpace :-> none]
removeDocWhiteSpace :: ArrowXml a => a XmlTree XmlTree
removeDocWhiteSpace = fromLA $ removeRootWhiteSpace
removeRootWhiteSpace :: LA XmlTree XmlTree
removeRootWhiteSpace
= processChildren processRootElement
`when`
isRoot
where
processRootElement :: LA XmlTree XmlTree
processRootElement
= removeWhiteSpace >>> processChild
where
processChild
= choiceA [ isDTD
:-> removeAllWhiteSpace
, this
:-> replaceChildren ( getChildren
>>. indentTrees insertNothing False 1
)
]
indentDoc :: ArrowXml a => a XmlTree XmlTree
indentDoc = fromLA $
( ( isRoot `guards` indentRoot )
`orElse`
(root [] [this] >>> indentRoot >>> getChildren)
)
indentRoot :: LA XmlTree XmlTree
indentRoot = processChildren indentRootChildren
where
indentRootChildren
= removeText >>> indentChild >>> insertNL
where
removeText = none `when` isText
insertNL = this <+> txt "\n"
indentChild = ( replaceChildren
( getChildren
>>.
indentTrees (insertIndentation 2) False 1
)
`whenNot` isDTD
)
indentTrees :: (Int -> LA XmlTree XmlTree) -> Bool -> Int -> XmlTrees -> XmlTrees
indentTrees _ _ _ []
= []
indentTrees indentFilter preserveSpace level ts
= runLAs lsf ls
++
indentRest rs
where
runLAs f l
= runLA (constL l >>> f) undefined
(ls, rs)
= break XN.isElem ts
isSignificant :: Bool
isSignificant
= preserveSpace
||
(not . null . runLAs isSignificantPart) ls
isSignificantPart :: LA XmlTree XmlTree
isSignificantPart
= catA
[ isText `guards` neg isWhiteSpace
, isCdata
, isCharRef
, isEntityRef
]
lsf :: LA XmlTree XmlTree
lsf
| isSignificant
= this
| otherwise
= (none `when` isWhiteSpace)
>>>
(indentFilter level <+> this)
indentRest :: XmlTrees -> XmlTrees
indentRest []
| isSignificant
= []
| otherwise
= runLA (indentFilter (level - 1)) undefined
indentRest (t':ts')
= runLA ( ( indentElem
>>>
lsf
)
`when` isElem
) t'
++
( if null ts'
then indentRest
else indentTrees indentFilter preserveSpace level
) ts'
where
indentElem
= replaceChildren ( getChildren
>>.
indentChildren
)
xmlSpaceAttrValue :: String
xmlSpaceAttrValue
= concat . runLA (getAttrValue "xml:space") $ t'
preserveSpace' :: Bool
preserveSpace'
= ( fromMaybe preserveSpace
.
lookup xmlSpaceAttrValue
) [ ("preserve", True)
, ("default", False)
]
indentChildren :: XmlTrees -> XmlTrees
indentChildren cs'
| all (maybe False (all isXmlSpaceChar) . XN.getText) cs'
= []
| otherwise
= indentTrees indentFilter preserveSpace' (level + 1) cs'
insertIndentation :: Int -> Int -> LA a XmlTree
insertIndentation indentWidth level
= txt ('\n' : replicate (level * indentWidth) ' ')
insertNothing :: Int -> LA a XmlTree
insertNothing _ = none
transfCdata :: ArrowXml a => a XmlTree XmlTree
transfCdata = fromLA $
(getCdata >>> mkText) `when` isCdata
transfAllCdata :: ArrowXml a => a XmlTree XmlTree
transfAllCdata = fromLA $ editNTreeA [isCdata :-> (getCdata >>> mkText)]
transfCharRef :: ArrowXml a => a XmlTree XmlTree
transfCharRef = fromLA $
( getCharRef >>> arr (\ i -> [toEnum i]) >>> mkText )
`when`
isCharRef
transfAllCharRef :: ArrowXml a => a XmlTree XmlTree
transfAllCharRef = fromLA $ editNTreeA [isCharRef :-> (getCharRef >>> arr (\ i -> [toEnum i]) >>> mkText)]
rememberDTDAttrl :: ArrowList a => a XmlTree XmlTree
rememberDTDAttrl
= fromLA $
( ( addDTDAttrl $< ( getChildren >>> isDTDDoctype >>> getDTDAttrl ) )
`orElse`
this
)
where
addDTDAttrl al
= seqA . map (uncurry addAttr) . map (first (dtdPrefix ++)) $ al
addDefaultDTDecl :: ArrowList a => a XmlTree XmlTree
addDefaultDTDecl
= fromLA $
( addDTD $< listA (getAttrl >>> (getName &&& xshow getChildren) >>> hasDtdPrefix) )
where
hasDtdPrefix
= isA (fst >>> (dtdPrefix `isPrefixOf`))
>>>
arr (first (drop (length dtdPrefix)))
addDTD []
= this
addDTD al
= replaceChildren
( mkDTDDoctype al none
<+>
txt "\n"
<+>
( getChildren >>> (none `when` isDTDDoctype) )
)
hasXmlPi :: ArrowXml a => a XmlTree XmlTree
hasXmlPi
= fromLA
( getChildren
>>>
isPi
>>>
hasName t_xml
)
addXmlPi :: ArrowXml a => a XmlTree XmlTree
addXmlPi
= fromLA
( insertChildrenAt 0 ( ( mkPi (mkName t_xml) none
>>>
addAttr a_version "1.0"
)
<+>
txt "\n"
)
`whenNot`
hasXmlPi
)
addXmlPiEncoding :: ArrowXml a => String -> a XmlTree XmlTree
addXmlPiEncoding enc
= fromLA $
processChildren ( addAttr a_encoding enc
`when`
( isPi >>> hasName t_xml )
)
addXHtmlDoctypeStrict
, addXHtmlDoctypeTransitional
, addXHtmlDoctypeFrameset :: ArrowXml a => a XmlTree XmlTree
addXHtmlDoctypeStrict
= addDoctypeDecl "html" "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
addXHtmlDoctypeTransitional
= addDoctypeDecl "html" "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
addXHtmlDoctypeFrameset
= addDoctypeDecl "html" "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"
addDoctypeDecl :: ArrowXml a => String -> String -> String -> a XmlTree XmlTree
addDoctypeDecl rootElem public system
= fromLA $
replaceChildren
( mkDTDDoctype ( ( if null public then id else ( (k_public, public) : ) )
.
( if null system then id else ( (k_system, system) : ) )
$ [ (a_name, rootElem) ]
) none
<+>
txt "\n"
<+>
getChildren
)