module Text.XML.HaXml.Html.ParseLazy
( htmlParse
) where
import Prelude hiding (either,maybe,sequence)
import Data.Maybe hiding (maybe)
import Data.Char (toLower, isDigit, isHexDigit)
import Numeric (readDec,readHex)
import Control.Monad
import Text.XML.HaXml.Types
import Text.XML.HaXml.Lex
import Text.XML.HaXml.Posn
import Text.ParserCombinators.Poly.Lazy
#if defined(DEBUG)
# if ( defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ > 502 ) || \
( defined(__NHC__) && __NHC__ > 114 ) || defined(__HUGS__)
import Debug.Trace(trace)
# elif defined(__GLASGOW_HASKELL__)
import IOExts(trace)
# elif defined(__NHC__) || defined(__HBC__)
import NonStdTrace
# endif
debug :: Monad m => String -> m ()
debug s = trace s (return ())
#else
debug :: Monad m => String -> m ()
debug :: forall (m :: * -> *). Monad m => String -> m ()
debug String
_ = forall (m :: * -> *) a. Monad m => a -> m a
return ()
#endif
htmlParse :: String -> String -> Document Posn
htmlParse :: String -> String -> Document Posn
htmlParse String
file = forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall t a. Parser t a -> [t] -> (a, [t])
runParser HParser (Document Posn)
document forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> [(Posn, TokenT)]
xmlLex String
file
selfclosingtags :: [String]
selfclosingtags :: [String]
selfclosingtags = [String
"img",String
"hr",String
"br",String
"meta",String
"col",String
"link",String
"base"
,String
"param",String
"area",String
"frame",String
"input"]
closeInnerTags :: [(String,[String])]
closeInnerTags :: [(String, [String])]
closeInnerTags =
[ (String
"ul", [String
"li"])
, (String
"ol", [String
"li"])
, (String
"dl", [String
"dt",String
"dd"])
, (String
"tr", [String
"th",String
"td"])
, (String
"div", [String
"p"])
, (String
"thead", [String
"th",String
"tr",String
"td"])
, (String
"tfoot", [String
"th",String
"tr",String
"td"])
, (String
"tbody", [String
"th",String
"tr",String
"td"])
, (String
"table", [String
"th",String
"tr",String
"td",String
"thead",String
"tfoot",String
"tbody"])
, (String
"caption", [String
"p"])
, (String
"th", [String
"p"])
, (String
"td", [String
"p"])
, (String
"li", [String
"p"])
, (String
"dt", [String
"p"])
, (String
"dd", [String
"p"])
, (String
"object", [String
"p"])
, (String
"map", [String
"p"])
, (String
"body", [String
"p"])
]
closes :: String -> String -> Bool
String
"a" closes :: String -> String -> Bool
`closes` String
"a" = Bool
True
String
"li" `closes` String
"li" = Bool
True
String
"th" `closes` String
t | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"th",String
"td"] = Bool
True
String
"td" `closes` String
t | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"th",String
"td"] = Bool
True
String
"tr" `closes` String
t | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"th",String
"td",String
"tr"] = Bool
True
String
"dt" `closes` String
t | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"dt",String
"dd"] = Bool
True
String
"dd" `closes` String
t | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"dt",String
"dd"] = Bool
True
String
"form" `closes` String
"form" = Bool
True
String
"label" `closes` String
"label" = Bool
True
String
_ `closes` String
"option" = Bool
True
String
"thead" `closes` String
t | String
t forall a. Eq a => a -> a -> Bool
== String
"colgroup" = Bool
True
String
"tfoot" `closes` String
t | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"thead",String
"colgroup"] = Bool
True
String
"tbody" `closes` String
t | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"tbody",String
"tfoot",String
"thead",String
"colgroup"] = Bool
True
String
"colgroup" `closes` String
"colgroup" = Bool
True
String
t `closes` String
"p" | String
t forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String
"p",String
"h1",String
"h2",String
"h3",String
"h4",String
"h5",String
"h6"
,String
"hr",String
"div",String
"ul",String
"dl",String
"ol",String
"table"] = Bool
True
String
_ `closes` String
_ = Bool
False
type HParser a = Parser (Posn,TokenT) a
tok :: TokenT -> HParser TokenT
tok :: TokenT -> HParser TokenT
tok TokenT
t = do (Posn
p,TokenT
t') <- forall t. Parser t t
next
case TokenT
t' of TokError String
_ -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (p :: * -> *) a. PolyParse p => String -> p a
failBad (forall a. Show a => a -> String
show TokenT
t) Posn
p TokenT
t'
TokenT
_ | TokenT
t'forall a. Eq a => a -> a -> Bool
==TokenT
t -> forall (m :: * -> *) a. Monad m => a -> m a
return TokenT
t
| Bool
otherwise -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (m :: * -> *) a. MonadFail m => String -> m a
fail (forall a. Show a => a -> String
show TokenT
t) Posn
p TokenT
t'
qname :: HParser QName
qname :: HParser QName
qname = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> QName
N HParser String
name
name :: HParser Name
name :: HParser String
name = do (Posn
p,TokenT
tok) <- forall t. Parser t t
next
case TokenT
tok of
TokName String
s -> forall (m :: * -> *) a. Monad m => a -> m a
return String
s
TokError String
_ -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (p :: * -> *) a. PolyParse p => String -> p a
failBad String
"a name" Posn
p TokenT
tok
TokenT
_ -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"a name" Posn
p TokenT
tok
string, freetext :: HParser String
string :: HParser String
string = do (Posn
p,TokenT
t) <- forall t. Parser t t
next
case TokenT
t of TokName String
s -> forall (m :: * -> *) a. Monad m => a -> m a
return String
s
TokenT
_ -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"text" Posn
p TokenT
t
freetext :: HParser String
freetext = do (Posn
p,TokenT
t) <- forall t. Parser t t
next
case TokenT
t of TokFreeText String
s -> forall (m :: * -> *) a. Monad m => a -> m a
return String
s
TokenT
_ -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"text" Posn
p TokenT
t
maybe :: HParser a -> HParser (Maybe a)
maybe :: forall a. HParser a -> HParser (Maybe a)
maybe HParser a
p =
(forall a. a -> Maybe a
Just forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser a
p) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
either :: HParser a -> HParser b -> HParser (Either a b)
either :: forall a b. HParser a -> HParser b -> HParser (Either a b)
either HParser a
p HParser b
q =
(forall a b. a -> Either a b
Left forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser a
p) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
(forall a b. b -> Either a b
Right forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser b
q)
word :: String -> HParser ()
word :: String -> HParser ()
word String
s = do { (Posn, TokenT)
x <- forall t. Parser t t
next
; case (Posn, TokenT)
x of
(Posn
_p,TokName String
n) | String
sforall a. Eq a => a -> a -> Bool
==String
n -> forall (m :: * -> *) a. Monad m => a -> m a
return ()
(Posn
_p,TokFreeText String
n) | String
sforall a. Eq a => a -> a -> Bool
==String
n -> forall (m :: * -> *) a. Monad m => a -> m a
return ()
( Posn
p,t :: TokenT
t@(TokError String
_)) -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (p :: * -> *) a. PolyParse p => String -> p a
failBad (forall a. Show a => a -> String
show String
s) Posn
p TokenT
t
( Posn
p,TokenT
t) -> forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report forall (m :: * -> *) a. MonadFail m => String -> m a
fail (forall a. Show a => a -> String
show String
s) Posn
p TokenT
t
}
posn :: HParser Posn
posn :: HParser Posn
posn = do { x :: (Posn, TokenT)
x@(Posn
p,TokenT
_) <- forall t. Parser t t
next
; forall t. [t] -> Parser t ()
reparse [(Posn, TokenT)
x]
; forall (m :: * -> *) a. Monad m => a -> m a
return Posn
p
} forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall (m :: * -> *) a. Monad m => a -> m a
return Posn
noPos
nmtoken :: HParser NmToken
nmtoken :: HParser String
nmtoken = HParser String
string forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` HParser String
freetext
failP, failBadP :: String -> HParser a
failP :: forall a. String -> HParser a
failP String
msg = do { Posn
p <- HParser Posn
posn; forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String
msgforall a. [a] -> [a] -> [a]
++String
"\n at "forall a. [a] -> [a] -> [a]
++forall a. Show a => a -> String
show Posn
p) }
failBadP :: forall a. String -> HParser a
failBadP String
msg = do { Posn
p <- HParser Posn
posn; forall (p :: * -> *) a. PolyParse p => String -> p a
failBad (String
msgforall a. [a] -> [a] -> [a]
++String
"\n at "forall a. [a] -> [a] -> [a]
++forall a. Show a => a -> String
show Posn
p) }
report :: (String->HParser a) -> String -> Posn -> TokenT -> HParser a
report :: forall a.
(String -> HParser a) -> String -> Posn -> TokenT -> HParser a
report String -> HParser a
fail String
expect Posn
p TokenT
t = String -> HParser a
fail (String
"Expected "forall a. [a] -> [a] -> [a]
++forall a. Show a => a -> String
show String
expectforall a. [a] -> [a] -> [a]
++String
" but found "forall a. [a] -> [a] -> [a]
++forall a. Show a => a -> String
show TokenT
t
forall a. [a] -> [a] -> [a]
++String
"\n at "forall a. [a] -> [a] -> [a]
++forall a. Show a => a -> String
show Posn
p)
adjustErrP :: HParser a -> (String->String) -> HParser a
HParser a
p adjustErrP :: forall a. HParser a -> (String -> String) -> HParser a
`adjustErrP` String -> String
f = HParser a
p forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` do Posn
pn <- HParser Posn
posn
(HParser a
p forall (p :: * -> *) a.
Commitment p =>
p a -> (String -> String) -> p a
`adjustErr` String -> String
f) forall (p :: * -> *) a.
Commitment p =>
p a -> (String -> String) -> p a
`adjustErr` (forall a. [a] -> [a] -> [a]
++forall a. Show a => a -> String
show Posn
pn)
document :: HParser (Document Posn)
document :: HParser (Document Posn)
document = do
forall (m :: * -> *) a. Monad m => a -> m a
return forall i.
Prolog -> SymTab EntityDef -> Element i -> [Misc] -> Document i
Document
forall (p :: * -> *) a b. PolyParse p => p (a -> b) -> p a -> p b
`apply` (Parser (Posn, TokenT) Prolog
prolog forall (p :: * -> *) a.
Commitment p =>
p a -> (String -> String) -> p a
`adjustErr` (String
"unrecognisable XML prolog\n"forall a. [a] -> [a] -> [a]
++))
forall (p :: * -> *) a b. PolyParse p => p (a -> b) -> p a -> p b
`apply` forall (m :: * -> *) a. Monad m => a -> m a
return forall a. SymTab a
emptyST
forall (p :: * -> *) a b. PolyParse p => p (a -> b) -> p a -> p b
`apply` (do [(Stack, Element Posn)]
ht <- forall (p :: * -> *) a. PolyParse p => p a -> p [a]
many1 (QName -> HParser (Stack, Element Posn)
element (String -> QName
N String
"HTML document"))
forall (m :: * -> *) a. Monad m => a -> m a
return (case forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> b
snd [(Stack, Element Posn)]
ht of
[Element Posn
e] -> Element Posn
e
[Element Posn]
es -> forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
"html") [] (forall a b. (a -> b) -> [a] -> [b]
map Element Posn -> Content Posn
mkCElem [Element Posn]
es)))
forall (p :: * -> *) a b. PolyParse p => p (a -> b) -> p a -> p b
`apply` forall (f :: * -> *) a. Alternative f => f a -> f [a]
many HParser Misc
misc
where mkCElem :: Element Posn -> Content Posn
mkCElem Element Posn
e = forall i. Element i -> i -> Content i
CElem Element Posn
e Posn
noPos
comment :: HParser Comment
= do
forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokCommentOpen) (TokenT -> HParser TokenT
tok TokenT
TokCommentClose) HParser String
freetext
processinginstruction :: HParser ProcessingInstruction
processinginstruction :: HParser ProcessingInstruction
processinginstruction = do
TokenT -> HParser TokenT
tok TokenT
TokPIOpen
forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ do
String
n <- HParser String
string forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failP String
"processing instruction has no target"
String
f <- HParser String
freetext
(TokenT -> HParser TokenT
tok TokenT
TokPIClose forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` TokenT -> HParser TokenT
tok TokenT
TokAnyClose) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failP String
"missing ?> or >"
forall (m :: * -> *) a. Monad m => a -> m a
return (String
n, String
f)
cdsect :: HParser CDSect
cdsect :: HParser String
cdsect = do
TokenT -> HParser TokenT
tok TokenT
TokSectionOpen
forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok (Section -> TokenT
TokSection Section
CDATAx)) (forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokSectionClose) HParser String
chardata
prolog :: HParser Prolog
prolog :: Parser (Posn, TokenT) Prolog
prolog = do
Maybe XMLDecl
x <- forall a. HParser a -> HParser (Maybe a)
maybe HParser XMLDecl
xmldecl
[Misc]
m1 <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many HParser Misc
misc
Maybe DocTypeDecl
dtd <- forall a. HParser a -> HParser (Maybe a)
maybe HParser DocTypeDecl
doctypedecl
[Misc]
m2 <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many HParser Misc
misc
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe XMLDecl -> [Misc] -> Maybe DocTypeDecl -> [Misc] -> Prolog
Prolog Maybe XMLDecl
x [Misc]
m1 Maybe DocTypeDecl
dtd [Misc]
m2)
xmldecl :: HParser XMLDecl
xmldecl :: HParser XMLDecl
xmldecl = do
TokenT -> HParser TokenT
tok TokenT
TokPIOpen
String -> HParser ()
word String
"xml" forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> HParser ()
word String
"XML"
Posn
p <- HParser Posn
posn
String
s <- HParser String
freetext
TokenT -> HParser TokenT
tok TokenT
TokPIClose forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failBadP String
"missing ?> in <?xml ...?>"
(forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall t a. Parser t a -> [t] -> (a, [t])
runParser HParser XMLDecl
aux forall b c a. (b -> c) -> (a -> b) -> a -> c
. Posn -> String -> [(Posn, TokenT)]
xmlReLex Posn
p) String
s
where
aux :: HParser XMLDecl
aux = do
String
v <- HParser String
versioninfo forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failP String
"missing XML version info"
Maybe EncodingDecl
e <- forall a. HParser a -> HParser (Maybe a)
maybe HParser EncodingDecl
encodingdecl
Maybe Bool
s <- forall a. HParser a -> HParser (Maybe a)
maybe HParser Bool
sddecl
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> Maybe EncodingDecl -> Maybe Bool -> XMLDecl
XMLDecl String
v Maybe EncodingDecl
e Maybe Bool
s)
versioninfo :: HParser VersionInfo
versioninfo :: HParser String
versioninfo = do
String -> HParser ()
word String
"version" forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> HParser ()
word String
"VERSION"
TokenT -> HParser TokenT
tok TokenT
TokEqual
forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote) HParser String
freetext
misc :: HParser Misc
misc :: HParser Misc
misc =
forall (p :: * -> *) a. Commitment p => [(String, p a)] -> p a
oneOf' [ (String
"<!--comment-->", String -> Misc
Comment forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser String
comment)
, (String
"<?PI?>", ProcessingInstruction -> Misc
PI forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser ProcessingInstruction
processinginstruction)
]
doctypedecl :: HParser DocTypeDecl
doctypedecl :: HParser DocTypeDecl
doctypedecl = do
TokenT -> HParser TokenT
tok TokenT
TokSpecialOpen
TokenT -> HParser TokenT
tok (Special -> TokenT
TokSpecial Special
DOCTYPEx)
forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ do
QName
n <- HParser QName
qname
Maybe ExternalID
eid <- forall a. HParser a -> HParser (Maybe a)
maybe HParser ExternalID
externalid
TokenT -> HParser TokenT
tok TokenT
TokAnyClose forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failP String
"missing > in DOCTYPE decl"
forall (m :: * -> *) a. Monad m => a -> m a
return (QName -> Maybe ExternalID -> [MarkupDecl] -> DocTypeDecl
DTD QName
n Maybe ExternalID
eid [])
sddecl :: HParser SDDecl
sddecl :: HParser Bool
sddecl = do
String -> HParser ()
word String
"standalone" forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> HParser ()
word String
"STANDALONE"
forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ do
TokenT -> HParser TokenT
tok TokenT
TokEqual forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failP String
"missing = in 'standalone' decl"
forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote)
( (String -> HParser ()
word String
"yes" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Bool
True) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
(String -> HParser ()
word String
"no" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Bool
False) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
forall a. String -> HParser a
failP String
"'standalone' decl requires 'yes' or 'no' value" )
type Stack = [(Name,[Attribute])]
element :: QName -> HParser (Stack,Element Posn)
element :: QName -> HParser (Stack, Element Posn)
element (N String
ctx) =
do
TokenT -> HParser TokenT
tok TokenT
TokAnyOpen
(ElemTag (N String
e) [Attribute]
avs) <- HParser ElemTag
elemtag
( if String
e String -> String -> Bool
`closes` String
ctx then
( do forall (m :: * -> *). Monad m => String -> m ()
debug String
"/"
[TokenT] -> HParser ()
unparse ([TokenT
TokEndOpen, String -> TokenT
TokName String
ctx, TokenT
TokAnyClose,
TokenT
TokAnyOpen, String -> TokenT
TokName String
e] forall a. [a] -> [a] -> [a]
++ [Attribute] -> [TokenT]
reformatAttrs [Attribute]
avs)
forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
"null") [] []))
else if String
e forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String]
selfclosingtags then
( do TokenT -> HParser TokenT
tok TokenT
TokEndClose
forall (m :: * -> *). Monad m => String -> m ()
debug (String
eforall a. [a] -> [a] -> [a]
++String
"[+]")
forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
e) [Attribute]
avs [])) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
( do TokenT -> HParser TokenT
tok TokenT
TokAnyClose
forall (m :: * -> *). Monad m => String -> m ()
debug (String
eforall a. [a] -> [a] -> [a]
++String
"[+]")
forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
e) [Attribute]
avs []))
else
( do TokenT -> HParser TokenT
tok TokenT
TokEndClose
forall (m :: * -> *). Monad m => String -> m ()
debug (String
eforall a. [a] -> [a] -> [a]
++String
"[]")
forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
e) [Attribute]
avs [])) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
( do TokenT -> HParser TokenT
tok TokenT
TokAnyClose forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failP String
"missing > or /> in element tag"
forall (m :: * -> *). Monad m => String -> m ()
debug (String
eforall a. [a] -> [a] -> [a]
++String
"[")
forall (m :: * -> *) a. Monad m => a -> m a
return (\ (Stack, [Content Posn])
interior-> let (Stack
stack,[Content Posn]
contained) = (Stack, [Content Posn])
interior
in (Stack
stack, forall i. QName -> [Attribute] -> [Content i] -> Element i
Elem (String -> QName
N String
e) [Attribute]
avs [Content Posn]
contained))
forall (p :: * -> *) a b. PolyParse p => p (a -> b) -> p a -> p b
`apply`
(do [(Stack, Content Posn)]
zz <- forall (p :: * -> *) a z. PolyParse p => p a -> p z -> p [a]
manyFinally (String -> HParser (Stack, Content Posn)
content String
e)
(TokenT -> HParser TokenT
tok TokenT
TokEndOpen)
(N String
n) <- HParser QName
qname
forall (p :: * -> *) a. Commitment p => p a -> p a
commit (TokenT -> HParser TokenT
tok TokenT
TokAnyClose)
forall (m :: * -> *). Monad m => String -> m ()
debug String
"]"
let ([Stack]
ss,[Content Posn]
cs) = forall a b. [(a, b)] -> ([a], [b])
unzip [(Stack, Content Posn)]
zz
let s :: Stack
s = if forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Stack]
ss then [] else forall a. [a] -> a
last [Stack]
ss
( if String
e forall a. Eq a => a -> a -> Bool
== (forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower String
n :: Name) then
do [TokenT] -> HParser ()
unparse (Stack -> [TokenT]
reformatTags (String -> Stack -> Stack
closeInner String
e Stack
s))
forall (m :: * -> *). Monad m => String -> m ()
debug String
"^"
forall (m :: * -> *) a. Monad m => a -> m a
return ([], [Content Posn]
cs)
else
do [TokenT] -> HParser ()
unparse [TokenT
TokEndOpen, String -> TokenT
TokName String
n, TokenT
TokAnyClose]
forall (m :: * -> *). Monad m => String -> m ()
debug String
"-"
forall (m :: * -> *) a. Monad m => a -> m a
return ((String
e,[Attribute]
avs)forall a. a -> [a] -> [a]
:Stack
s, [Content Posn]
cs)))
) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failP (String
"failed to repair non-matching tags in context: "forall a. [a] -> [a] -> [a]
++String
ctx))
closeInner :: Name -> [(Name,[Attribute])] -> [(Name,[Attribute])]
closeInner :: String -> Stack -> Stack
closeInner String
c Stack
ts =
case forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup String
c [(String, [String])]
closeInnerTags of
(Just [String]
these) -> forall a. (a -> Bool) -> [a] -> [a]
filter ((forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` [String]
these)forall b c a. (b -> c) -> (a -> b) -> a -> c
.forall a b. (a, b) -> a
fst) Stack
ts
Maybe [String]
Nothing -> Stack
ts
unparse :: [TokenT] -> Parser (Posn, TokenT) ()
unparse :: [TokenT] -> HParser ()
unparse [TokenT]
ts = do Posn
p <- HParser Posn
posn
forall t. [t] -> Parser t ()
reparse (forall a b. [a] -> [b] -> [(a, b)]
zip (forall a. a -> [a]
repeat Posn
p) [TokenT]
ts)
reformatAttrs :: [(QName, AttValue)] -> [TokenT]
reformatAttrs :: [Attribute] -> [TokenT]
reformatAttrs = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Attribute -> [TokenT]
f0
where f0 :: Attribute -> [TokenT]
f0 (N String
a, v :: AttValue
v@(AttValue [Either String Reference]
_)) = [String -> TokenT
TokName String
a, TokenT
TokEqual, TokenT
TokQuote,
String -> TokenT
TokFreeText (forall a. Show a => a -> String
show AttValue
v), TokenT
TokQuote]
reformatTags :: [(Name, [(QName, AttValue)])] -> [TokenT]
reformatTags :: Stack -> [TokenT]
reformatTags = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (String, [Attribute]) -> [TokenT]
f0
where f0 :: (String, [Attribute]) -> [TokenT]
f0 (String
t,[Attribute]
avs) = [TokenT
TokAnyOpen, String -> TokenT
TokName String
t]forall a. [a] -> [a] -> [a]
++[Attribute] -> [TokenT]
reformatAttrs [Attribute]
avs
forall a. [a] -> [a] -> [a]
++[TokenT
TokAnyClose]
content :: Name -> HParser (Stack,Content Posn)
content :: String -> HParser (Stack, Content Posn)
content String
ctx = do { Posn
p <- HParser Posn
posn ; Posn -> HParser (Stack, Content Posn)
content' Posn
p }
where content' :: Posn -> HParser (Stack, Content Posn)
content' Posn
p = forall (p :: * -> *) a. Commitment p => [(String, p a)] -> p a
oneOf'
[ ( String
"element", QName -> HParser (Stack, Element Posn)
element (String -> QName
N String
ctx) forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \(Stack
s,Element Posn
e)-> forall (m :: * -> *) a. Monad m => a -> m a
return (Stack
s, forall i. Element i -> i -> Content i
CElem Element Posn
e Posn
p))
, ( String
"chardata", HParser String
chardata forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \String
s-> forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. Bool -> String -> i -> Content i
CString Bool
False String
s Posn
p))
, ( String
"reference", HParser Reference
reference forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Reference
r-> forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. Reference -> i -> Content i
CRef Reference
r Posn
p))
, ( String
"cdsect", HParser String
cdsect forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \String
c-> forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. Bool -> String -> i -> Content i
CString Bool
True String
c Posn
p))
, ( String
"misc", HParser Misc
misc forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Misc
m-> forall (m :: * -> *) a. Monad m => a -> m a
return ([], forall i. Misc -> i -> Content i
CMisc Misc
m Posn
p))
] forall a. HParser a -> (String -> String) -> HParser a
`adjustErrP` (String
"when looking for a content item,\n"forall a. [a] -> [a] -> [a]
++)
elemtag :: HParser ElemTag
elemtag :: HParser ElemTag
elemtag = do
(N String
n) <- HParser QName
qname forall (p :: * -> *) a.
PolyParse p =>
p a -> (String -> String) -> p a
`adjustErrBad` (String
"malformed element tag\n"forall a. [a] -> [a] -> [a]
++)
[Attribute]
as <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
many HParser Attribute
attribute
forall (m :: * -> *) a. Monad m => a -> m a
return (QName -> [Attribute] -> ElemTag
ElemTag (String -> QName
N (forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower String
n)) [Attribute]
as)
attribute :: HParser Attribute
attribute :: HParser Attribute
attribute = do
(N String
n) <- HParser QName
qname
AttValue
v <- (do TokenT -> HParser TokenT
tok TokenT
TokEqual
Parser (Posn, TokenT) AttValue
attvalue) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
forall (m :: * -> *) a. Monad m => a -> m a
return ([Either String Reference] -> AttValue
AttValue [forall a b. a -> Either a b
Left String
"TRUE"])
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> QName
N (forall a b. (a -> b) -> [a] -> [b]
map Char -> Char
toLower String
n), AttValue
v)
reference :: HParser Reference
reference :: HParser Reference
reference = do
forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokAmp) (TokenT -> HParser TokenT
tok TokenT
TokSemi) (HParser String
freetext forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall {m :: * -> *}. Monad m => String -> m Reference
val)
where
val :: String -> m Reference
val (Char
'#':Char
'x':String
i) | forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isHexDigit String
i
= forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. CharRef -> Reference
RefChar forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> a
head forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (Eq a, Num a) => ReadS a
readHex forall a b. (a -> b) -> a -> b
$ String
i
val (Char
'#':String
i) | forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Char -> Bool
isDigit String
i
= forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. CharRef -> Reference
RefChar forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> a
head forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (Eq a, Num a) => ReadS a
readDec forall a b. (a -> b) -> a -> b
$ String
i
val String
ent = forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Reference
RefEntity forall a b. (a -> b) -> a -> b
$ String
ent
externalid :: HParser ExternalID
externalid :: HParser ExternalID
externalid =
( do String -> HParser ()
word String
"SYSTEM"
SystemLiteral -> ExternalID
SYSTEM forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> HParser SystemLiteral
systemliteral
) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
( do String -> HParser ()
word String
"PUBLIC"
PubidLiteral
p <- HParser PubidLiteral
pubidliteral
SystemLiteral
s <- HParser SystemLiteral
systemliteral forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall (m :: * -> *) a. Monad m => a -> m a
return (String -> SystemLiteral
SystemLiteral String
"")
forall (m :: * -> *) a. Monad m => a -> m a
return (PubidLiteral -> SystemLiteral -> ExternalID
PUBLIC PubidLiteral
p SystemLiteral
s))
encodingdecl :: HParser EncodingDecl
encodingdecl :: HParser EncodingDecl
encodingdecl = do
String -> HParser ()
word String
"encoding" forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` String -> HParser ()
word String
"ENCODING"
TokenT -> HParser TokenT
tok TokenT
TokEqual forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall a. String -> HParser a
failBadP String
"expected = in 'encoding' decl"
String
f <- forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote) HParser String
freetext
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> EncodingDecl
EncodingDecl String
f)
attvalue :: HParser AttValue
attvalue :: Parser (Posn, TokenT) AttValue
attvalue =
( do [Either String Reference]
avs <- forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote)
(forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (forall a b. HParser a -> HParser b -> HParser (Either a b)
either HParser String
freetext HParser Reference
reference))
forall (m :: * -> *) a. Monad m => a -> m a
return ([Either String Reference] -> AttValue
AttValue [Either String Reference]
avs) ) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
( do String
v <- HParser String
nmtoken
String
s <- (TokenT -> HParser TokenT
tok TokenT
TokPercent forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return String
"%") forall t a. Parser t a -> Parser t a -> Parser t a
`onFail` forall (m :: * -> *) a. Monad m => a -> m a
return String
""
forall (m :: * -> *) a. Monad m => a -> m a
return ([Either String Reference] -> AttValue
AttValue [forall a b. a -> Either a b
Left (String
vforall a. [a] -> [a] -> [a]
++String
s)]) ) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
( do String
s <- forall (p :: * -> *) a. PolyParse p => [p a] -> p a
oneOf [ TokenT -> HParser TokenT
tok TokenT
TokPlus forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return String
"+"
, TokenT -> HParser TokenT
tok TokenT
TokHash forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return String
"#"
]
String
v <- HParser String
nmtoken
forall (m :: * -> *) a. Monad m => a -> m a
return ([Either String Reference] -> AttValue
AttValue [forall a b. a -> Either a b
Left (String
sforall a. [a] -> [a] -> [a]
++String
v)]) ) forall t a. Parser t a -> Parser t a -> Parser t a
`onFail`
forall a. String -> HParser a
failP String
"Badly formatted attribute value"
systemliteral :: HParser SystemLiteral
systemliteral :: HParser SystemLiteral
systemliteral = do
String
s <- forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote) HParser String
freetext
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> SystemLiteral
SystemLiteral String
s)
pubidliteral :: HParser PubidLiteral
pubidliteral :: HParser PubidLiteral
pubidliteral = do
String
s <- forall (p :: * -> *) bra ket a.
PolyParse p =>
p bra -> p ket -> p a -> p a
bracket (TokenT -> HParser TokenT
tok TokenT
TokQuote) (forall (p :: * -> *) a. Commitment p => p a -> p a
commit forall a b. (a -> b) -> a -> b
$ TokenT -> HParser TokenT
tok TokenT
TokQuote) HParser String
freetext
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> PubidLiteral
PubidLiteral String
s)
chardata :: HParser CharData
chardata :: HParser String
chardata = HParser String
freetext