module Text.XML.HXT.Arrow.DTDProcessing
( processDTD
)
where
import Control.Arrow
import Control.Arrow.ArrowList
import Control.Arrow.ArrowIf
import Control.Arrow.ArrowTree
import Text.XML.HXT.DOM.Interface
import qualified Text.XML.HXT.DOM.XmlNode as XN
import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.Arrow.XmlState
import Text.XML.HXT.Arrow.ParserInterface
( parseXmlDTDdecl
, parseXmlDTDdeclPart
, parseXmlDTDEntityValue
, parseXmlDTDPart
)
import Text.XML.HXT.Arrow.Edit
( transfCharRef
)
import Text.XML.HXT.Arrow.DocumentInput
( getXmlEntityContents
)
import Data.Maybe
import qualified Data.Map as M
( Map
, empty
, lookup
, insert
)
data DTDPart = Internal
| External
deriving (Eq)
type RecList = [String]
type DTDStateArrow b c = IOStateArrow PEEnv b c
newtype PEEnv = PEEnv (M.Map String XmlTree)
emptyPeEnv :: PEEnv
emptyPeEnv = PEEnv M.empty
lookupPeEnv :: String -> PEEnv -> Maybe XmlTree
lookupPeEnv k (PEEnv env)
= M.lookup k env
addPeEntry :: String -> XmlTree -> PEEnv -> PEEnv
addPeEntry k a (PEEnv env)
= PEEnv $ M.insert k a env
getPeValue :: DTDStateArrow String XmlTree
getPeValue
= (this &&& getUserState)
>>>
arrL (\ (n, env) -> maybeToList . lookupPeEnv n $ env)
addPe :: String -> DTDStateArrow XmlTree XmlTree
addPe n
= traceMsg 2 ("substParamEntity: add entity " ++ show n ++ " to env")
>>>
changeUserState ins
where
ins t peEnv = addPeEntry n t peEnv
processDTD :: IOStateArrow s XmlTree XmlTree
processDTD
= runInLocalURIContext
( processRoot
>>>
traceTree
>>>
traceSource
)
`when` ( isRoot >>> getChildren )
where
processRoot :: IOStateArrow s XmlTree XmlTree
processRoot
= ( traceMsg 1 ("processDTD: process parameter entities")
>>>
setSysAttrString a_standalone ""
>>>
processChildren substParamEntities
>>>
setDocumentStatusFromSystemState "in XML DTD processing"
>>>
traceMsg 1 ("processDTD: parameter entities processed")
)
`when`
documentStatusOk
substParamEntities :: IOStateArrow s XmlTree XmlTree
substParamEntities
= withOtherUserState emptyPeEnv processParamEntities
`when`
isDTDDoctype
where
processParamEntities :: DTDStateArrow XmlTree XmlTree
processParamEntities
= mergeEntities $<<< ( listA processPredef
&&&
listA processInt
&&&
listA (runInLocalURIContext processExt)
)
where
mergeEntities dtdPre dtdInt dtdExt
= replaceChildren (arrL $ const $ foldl1 mergeDTDs [dtdPre, dtdInt, dtdExt])
processPredef
= predefDTDPart >>> substParamEntity Internal []
processInt
= getChildren >>> substParamEntity Internal []
processExt
= externalDTDPart >>> substParamEntity External []
mergeDTDs :: XmlTrees -> XmlTrees -> XmlTrees
mergeDTDs dtdInt dtdExt
= dtdInt ++ (filter (filterDTDNodes dtdInt) dtdExt)
filterDTDNodes :: XmlTrees -> XmlTree -> Bool
filterDTDNodes dtdPart t
= not (any (filterDTDNode t) dtdPart)
filterDTDNode :: XmlTree -> XmlTree -> Bool
filterDTDNode t1 t2
= fromMaybe False $
do
dp1 <- XN.getDTDPart t1
dp2 <- XN.getDTDPart t2
al1 <- XN.getDTDAttrl t1
al2 <- XN.getDTDAttrl t2
return ( dp1 == dp2
&&
( dp1 `elem` [ELEMENT, NOTATION, ENTITY, ATTLIST] )
&&
( lookup a_name al1 == lookup a_name al2 )
&&
( dp1 /= ATTLIST
||
lookup a_value al1 == lookup a_value al2
)
)
substParamEntity :: DTDPart -> RecList -> DTDStateArrow XmlTree XmlTree
substParamEntity loc recList
= choiceA
[ isDTDEntity :-> ( traceDTD "ENTITY declaration before DTD declaration parsing"
>>>
processChildren (substPeRefsInDTDdecl recList)
>>>
parseXmlDTDdecl
>>>
substPeRefsInEntityValue
>>>
traceDTD "ENTITY declaration after PE substitution"
>>>
processEntityDecl
>>>
traceDTD "ENTITY declaration after DTD declaration parsing"
)
, ( isDTDElement
<+>
isDTDAttlist
<+>
isDTDNotation
) :-> ( traceDTD "DTD declaration before PE substitution"
>>>
processChildren (substPeRefsInDTDdecl recList)
>>>
parseXmlDTDdecl
>>>
traceDTD "DTD declaration after DTD declaration parsing"
)
, isDTDPERef :-> substPeRefsInDTDpart recList
, isDTDCondSect :-> ( if loc == Internal
then issueErr "conditional sections in internal part of the DTD is not allowed"
else evalCondSect $< getDTDAttrValue a_value
)
, isCmt :-> none
, this :-> this
]
where
processEntityDecl :: DTDStateArrow XmlTree XmlTree
processEntityDecl
= choiceA
[ isDTDEntity :-> ( ifA (hasDTDAttr k_system)
processExternalEntity
processInternalEntity
)
, isDTDPEntity
:-> ( processParamEntity $< getDTDAttrValue a_name )
, this :-> none
]
where
processExternalEntity :: DTDStateArrow XmlTree XmlTree
processExternalEntity
= setDTDAttrValue a_url $< ( getDTDAttrValue k_system >>> mkAbsURI )
processInternalEntity :: DTDStateArrow XmlTree XmlTree
processInternalEntity
= this
processParamEntity :: String -> DTDStateArrow XmlTree XmlTree
processParamEntity peName
= ifA (constA peName >>> getPeValue)
( issueWarn ("parameter entity " ++ show peName ++ " already defined")
>>>
none
)
( ( ifA ( hasDTDAttr k_system )
( setDTDAttrValue a_url $<
( getDTDAttrValue k_system >>> mkAbsURI )
)
( this )
)
>>>
addPe peName
)
substPERef :: String -> DTDStateArrow XmlTree XmlTree
substPERef pn
= choiceA
[ isUndefinedRef :-> issueErr ("parameter entity " ++ show pn ++ " not found (forward reference?)")
, isInternalRef :-> issueErr ("a parameter entity reference of " ++ show pn ++ " occurs in the internal subset of the DTD")
, isUnreadExternalRef :-> ( perform
( peVal
>>>
getExternalParamEntityValue pn
>>>
addPe pn
)
>>>
substPERef pn
)
, this :-> substPE
]
`when`
isDTDPERef
where
peVal = constA pn >>> getPeValue
isUnreadExternalRef = ( peVal
>>>
getDTDAttrValue a_url
>>>
isA (not . null)
)
`guards`
this
isInternalRef = none
isUndefinedRef = neg peVal
substPE = replaceChildren (peVal >>> getChildren)
substPeRefsInEntityValue :: DTDStateArrow XmlTree XmlTree
substPeRefsInEntityValue
= ( ( replaceChildren
( xshow ( getChildren
>>>
transfCharRef
>>>
substPeRefsInValue []
)
>>>
mkText
)
)
`whenNot`
hasDTDAttr k_system
)
`when`
( isDTDEntity <+> isDTDPEntity )
substPeRefsInDTDpart :: RecList -> DTDStateArrow XmlTree XmlTree
substPeRefsInDTDpart rl
= recursionCheck "DTD part" rl subst
where
subst :: RecList -> String -> DTDStateArrow XmlTree XmlTree
subst recl pn
= substPERef pn
>>>
traceDTD "substPeRefsInDTDdecl: before parseXmlDTDPart"
>>>
( runInPeContext ( getChildren
>>>
( (constA ("parameter entity: " ++ pn)) &&& this )
>>>
parseXmlDTDPart
>>>
traceDTD "substPeRefsInDTDpart: after parseXmlDTDPart"
>>>
substParamEntity loc (pn : recl)
)
`when`
isDTDPERef
)
substPeRefsInDTDdecl :: RecList -> DTDStateArrow XmlTree XmlTree
substPeRefsInDTDdecl rl
= recursionCheck "DTD declaration" rl subst
where
subst :: RecList -> String -> DTDStateArrow XmlTree XmlTree
subst recl pn
= substPERef pn
>>>
traceDTD "substPeRefsInDTDdecl: before parseXmlDTDdeclPart"
>>>
( runInPeContext ( parseXmlDTDdeclPart
>>>
traceDTD "substPeRefsInDTDdecl: after parseXmlDTDdeclPart"
>>>
processChildren ( substPeRefsInDTDdecl (pn : recl) )
)
`when`
isDTDPERef
)
substPeRefsInValue :: RecList -> DTDStateArrow XmlTree XmlTree
substPeRefsInValue rl
= recursionCheck "entity value" rl subst
where
subst :: RecList -> String -> DTDStateArrow XmlTree XmlTree
subst recl pn
= substPERef pn
>>>
parseXmlDTDEntityValue
>>>
substPeRefsInValue (pn : recl)
substPeRefsInCondSect :: RecList -> DTDStateArrow XmlTree XmlTree
substPeRefsInCondSect rl
= recursionCheck "conditional section" rl subst
where
subst :: RecList -> String -> DTDStateArrow XmlTree XmlTree
subst recl pn
= substPERef pn
>>>
traceDTD "substPeRefsInCondSect: parseXmlDTDdeclPart"
>>>
runInPeContext ( parseXmlDTDdeclPart
>>>
traceDTD "substPeRefsInCondSect: after parseXmlDTDdeclPart"
>>>
processChildren ( substPeRefsInCondSect (pn : recl) )
)
recursionCheck :: String -> RecList -> (RecList -> String -> DTDStateArrow XmlTree XmlTree) -> DTDStateArrow XmlTree XmlTree
recursionCheck wher rl subst
= ( recusiveSubst $< getDTDAttrValue a_peref )
`when`
isDTDPERef
where
recusiveSubst name
| name `elem` rl
= issueErr ("recursive call of parameter entity " ++ show name ++ " in " ++ wher)
| otherwise
= subst rl name
runInPeContext :: DTDStateArrow XmlTree XmlTree -> DTDStateArrow XmlTree XmlTree
runInPeContext f
= ( runWithNewBase $< getDTDAttrValue a_url )
`orElse`
f
where
runWithNewBase base
= runInLocalURIContext
( perform (constA base >>> setBaseURI)
>>>
f
)
evalCondSect :: String -> DTDStateArrow XmlTree XmlTree
evalCondSect content
= traceDTD "evalCondSect: process conditional section"
>>>
processChildren (substPeRefsInCondSect [])
>>>
parseXmlDTDdecl
>>>
( hasText (== k_include)
`guards`
( ( constA "conditional section" &&& txt content )
>>>
parseXmlDTDPart
>>>
traceMsg 2 "evalCond: include DTD part"
>>>
substParamEntity External recList
)
)
predefDTDPart :: DTDStateArrow XmlTree XmlTree
predefDTDPart
= ( constA "predefined entities"
&&&
( constA predefinedEntities >>> mkText)
)
>>>
parseXmlDTDPart
where
predefinedEntities :: String
predefinedEntities
= concat [ "<!ENTITY lt '&#60;'>"
, "<!ENTITY gt '>'>"
, "<!ENTITY amp '&#38;'>"
, "<!ENTITY apos '''>"
, "<!ENTITY quot '"'>"
]
externalDTDPart :: DTDStateArrow XmlTree XmlTree
externalDTDPart
= isDTDDoctype
`guards`
( hasDTDAttr k_system
`guards`
( getExternalDTDPart $< getDTDAttrValue k_system )
)
getExternalDTDPart :: String -> DTDStateArrow XmlTree XmlTree
getExternalDTDPart src
= root [sattr a_source src] []
>>>
getXmlEntityContents
>>>
replaceChildren ( ( constA src &&& getChildren )
>>>
parseXmlDTDPart
)
>>>
traceDoc "processExternalDTD: parsing DTD part done"
>>>
getChildren
getExternalParamEntityValue :: String -> DTDStateArrow XmlTree XmlTree
getExternalParamEntityValue pn
= isDTDPEntity
`guards`
( setEntityValue $< ( listA ( getEntityValue $< getDTDAttrValue a_url ) ) )
where
getEntityValue :: String -> DTDStateArrow XmlTree XmlTree
getEntityValue url
= root [sattr a_source url] []
>>>
runInLocalURIContext getXmlEntityContents
>>>
traceMsg 2 ("getExternalParamEntityValue: contents read for " ++ show pn ++ " from " ++ show url)
>>>
getChildren
setEntityValue :: XmlTrees -> DTDStateArrow XmlTree XmlTree
setEntityValue res
| null res
= issueErr ("illegal external parameter entity value for entity %" ++ pn ++";")
| otherwise
= replaceChildren (constL res)
>>>
setDTDAttrValue a_url ""
traceDTD :: String -> DTDStateArrow XmlTree XmlTree
traceDTD msg = traceMsg 3 msg >>> traceTree