module Text.XML.HXT.Arrow.GeneralEntitySubstitution
( processGeneralEntities )
where
import Control.Arrow
import Control.Arrow.ArrowList
import Control.Arrow.ArrowIf
import Control.Arrow.ArrowTree
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.Arrow.XmlState
import Text.XML.HXT.Arrow.ParserInterface
( parseXmlEntityValueAsAttrValue
, parseXmlEntityValueAsContent
)
import Text.XML.HXT.Arrow.Edit
( transfCharRef
)
import Text.XML.HXT.Arrow.DocumentInput
( getXmlEntityContents
)
import qualified Data.Map as M
( Map
, empty
, lookup
, insert
)
data GEContext
= ReferenceInContent
| ReferenceInAttributeValue
| ReferenceInEntityValue
type GESubstArrow = GEContext -> RecList -> GEArrow XmlTree XmlTree
type GEArrow b c = IOStateArrow GEEnv b c
type RecList = [String]
newtype GEEnv = GEEnv (M.Map String GESubstArrow)
emptyGeEnv :: GEEnv
emptyGeEnv = GEEnv M.empty
lookupGeEnv :: String -> GEEnv -> Maybe GESubstArrow
lookupGeEnv k (GEEnv env)
= M.lookup k env
addGeEntry :: String -> GESubstArrow -> GEEnv -> GEEnv
addGeEntry k a (GEEnv env)
= GEEnv $ M.insert k a env
processGeneralEntities :: IOStateArrow s XmlTree XmlTree
processGeneralEntities
= ( traceMsg 1 "processGeneralEntities: collect and substitute general entities"
>>>
withOtherUserState emptyGeEnv (processChildren (processGeneralEntity ReferenceInContent []))
>>>
setDocumentStatusFromSystemState "in general entity processing"
>>>
traceTree
>>>
traceSource
)
`when`
documentStatusOk
processGeneralEntity :: GESubstArrow
processGeneralEntity context recl
= choiceA [ isElem :-> ( processAttrl (processChildren substEntitiesInAttrValue)
>>>
processChildren (processGeneralEntity context recl)
)
, isEntityRef :-> substEntityRef
, isDTDDoctype :-> processChildren (processGeneralEntity context recl)
, isDTDEntity :-> addEntityDecl
, isDTDAttlist :-> substEntitiesInAttrDefaultValue
, this :-> this
]
where
addEntityDecl :: GEArrow XmlTree XmlTree
addEntityDecl
= perform ( choiceA [ isIntern :-> addInternalEntity
, isExtern :-> addExternalEntity
, isUnparsed :-> addUnparsedEntity
]
)
where
isIntern = none `when` hasDTDAttr k_system
isExtern = none `when` hasDTDAttr k_ndata
isUnparsed = this
addInternalEntity :: GEArrow XmlTree b
addInternalEntity
= insertInternal $<<
( ( getDTDAttrValue a_name
>>>
traceValue 2 (("processGeneralEntity: general entity definition for " ++) . show)
)
&&&
xshow (getChildren >>> isText)
)
where
insertInternal entity contents
= insertEntity (substInternal contents) entity
>>>
none
addExternalEntity :: GEArrow XmlTree b
addExternalEntity
= insertExternal $<<
( ( getDTDAttrValue a_name
>>>
traceValue 2 (("processGeneralEntity: external entity definition for " ++) . show)
)
&&&
getDTDAttrValue a_url
)
where
insertExternal entity uri
= insertEntity (substExternalParsed1Time uri) entity
>>>
none
addUnparsedEntity :: GEArrow XmlTree b
addUnparsedEntity
= getDTDAttrValue a_name
>>>
traceValue 2 (("processGeneralEntity: unparsed entity definition for " ++) . show)
>>>
applyA (arr (insertEntity substUnparsed))
>>>
none
insertEntity :: (String -> GESubstArrow) -> String -> GEArrow b b
insertEntity fct entity
= ( getUserState
>>>
applyA (arr checkDefined)
)
`guards`
addEntity fct entity
where
checkDefined geEnv
= maybe ok alreadyDefined . lookupGeEnv entity $ geEnv
where
ok = this
alreadyDefined _
= issueWarn ("entity " ++ show entity ++ " already defined, repeated definition ignored")
>>>
none
addEntity :: (String -> GESubstArrow) -> String -> GEArrow b b
addEntity fct entity
= changeUserState ins
where
ins _ geEnv = addGeEntry entity (fct entity) geEnv
substEntitiesInAttrDefaultValue :: GEArrow XmlTree XmlTree
substEntitiesInAttrDefaultValue
= applyA ( xshow ( getDTDAttrValue a_default
>>>
mkText
>>>
parseXmlEntityValueAsAttrValue "default value of attribute"
>>>
filterErrorMsg
>>>
substEntitiesInAttrValue
)
>>> arr (setDTDAttrValue a_default)
)
`when` hasDTDAttr a_default
substEntitiesInAttrValue :: GEArrow XmlTree XmlTree
substEntitiesInAttrValue
= ( processGeneralEntity ReferenceInAttributeValue recl
`when`
isEntityRef
)
>>>
changeText normalizeWhiteSpace
>>>
transfCharRef
where
normalizeWhiteSpace = map ( \c -> if c `elem` "\n\t\r" then ' ' else c )
substEntityRef :: GEArrow XmlTree XmlTree
substEntityRef
= applyA ( ( ( getEntityRef
>>>
traceValue 2 (("processGeneralEntity: entity reference for entity " ++) . show)
>>>
traceMsg 3 ("recursion list = " ++ show recl)
)
&&&
getUserState
) >>>
arr2 substA
)
where
substA :: String -> GEEnv -> GEArrow XmlTree XmlTree
substA entity geEnv
= maybe entityNotFound entityFound . lookupGeEnv entity $ geEnv
where
errMsg msg
= issueErr msg
entityNotFound
= errMsg ("general entity reference \"&" ++ entity ++ ";\" not processed, no definition found, (forward reference?)")
entityFound fct
| entity `elem` recl
= errMsg ("general entity reference \"&" ++ entity ++ ";\" not processed, cyclic definition")
| otherwise
= fct context recl
substExternalParsed1Time :: String -> String -> GESubstArrow
substExternalParsed1Time uri entity cx rl
= perform ( traceMsg 2 ("substExternalParsed1Time: read and parse external parsed entity " ++ show entity)
>>>
runInLocalURIContext ( root [sattr a_source uri] []
>>>
getXmlEntityContents
>>>
processExternalEntityContents
)
>>>
applyA ( arr $ \ s -> addEntity (substExternalParsed s) entity )
)
>>>
processGeneralEntity cx rl
where
processExternalEntityContents :: IOStateArrow s XmlTree String
processExternalEntityContents
= ( ( ( documentStatusOk
>>>
(getChildren >>> isText)
)
`guards`
this
)
`orElse`
issueErr ("illegal value for external parsed entity " ++ show entity)
)
>>>
xshow (getChildren >>> isText)
substExternalParsed :: String -> String -> GESubstArrow
substExternalParsed s entity ReferenceInContent rl = includedIfValidating s rl entity
substExternalParsed _ entity ReferenceInAttributeValue _
= forbidden entity "external parsed general" "in attribute value"
substExternalParsed _ _ ReferenceInEntityValue _
= bypassed
substInternal :: String -> String -> GESubstArrow
substInternal s entity ReferenceInContent rl = included s rl entity
substInternal s entity ReferenceInAttributeValue rl = includedInLiteral s rl entity
substInternal _ _ ReferenceInEntityValue _ = bypassed
substUnparsed :: String -> GESubstArrow
substUnparsed entity ReferenceInContent _ = forbidden entity "unparsed" "content"
substUnparsed entity ReferenceInAttributeValue _ = forbidden entity "unparsed" "attribute value"
substUnparsed entity ReferenceInEntityValue _ = forbidden entity "unparsed" "entity value"
included :: String -> RecList -> String -> GEArrow XmlTree XmlTree
included s rl entity
= traceMsg 3 ("substituting general entity " ++ show entity ++ " with value " ++ show s)
>>>
txt s
>>>
parseXmlEntityValueAsContent ("substituting general entity " ++ show entity ++ " in contents")
>>>
filterErrorMsg
>>>
processGeneralEntity context (entity : rl)
includedIfValidating :: String -> RecList -> String -> GEArrow XmlTree XmlTree
includedIfValidating
= included
forbidden :: String -> String -> String -> GEArrow XmlTree XmlTree
forbidden entity msg cx
= issueErr ("reference of " ++ msg ++ show entity ++ " forbidden in " ++ cx)
includedInLiteral :: String -> RecList -> String -> GEArrow XmlTree XmlTree
includedInLiteral s rl entity
= txt s
>>>
parseXmlEntityValueAsAttrValue ("substituting general entity " ++ show entity ++ " in attribute value")
>>>
filterErrorMsg
>>>
processGeneralEntity context (entity : rl)
bypassed :: GEArrow XmlTree XmlTree
bypassed
= this