module HtmlLex(module HtmlLex) where
import Html(HtmlTag(..),noAttrs,attrs) --,TagAttrs(..)
import HtmlTags
import HtmlPrinter(printTag)
import HtmlEntities(decode)
import RichTextLex
import AnchorParser(parseTag)
import qualified Data.Map as OM
import Utils2(isSpace')
import Unicode(compose)
--import LexSymbol
default (Int)
data HtmlLex = HtmlEntities String
| HtmlSpace String
| HtmlBadTag String
| HtmlTag HtmlTag
| HtmlEndTag TagName
cmpLex l1 l2 =
case (l1,l2) of
(HtmlTag (t1,_),HtmlTag (t2,_)) -> compare t1 t2
(HtmlEndTag t1,HtmlEndTag t2) -> compare t1 t2
_ -> compare (pos l1) (pos l2)
where
pos (HtmlEntities _) = 0
pos (HtmlSpace _) = 1
pos (HtmlBadTag _) = 2
pos (HtmlTag _) = 3
pos (HtmlEndTag _) = 4
instance Eq HtmlLex where l1==l2 = compare l1 l2==EQ
instance Ord HtmlLex where compare = cmpLex
instance Show HtmlLex where showsPrec n l = (sh1 l++)
{-
instance Symbol HtmlLex where lexToInt = lex2int
lex2int = toInt
where
firstendtag = 4+fromEnum (maxBound::TagName)
toInt l =
case l of
HtmlEntities _ -> 0
HtmlSpace _ -> 1
HtmlBadTag _ -> 2
HtmlTag (t,_) -> 3+fromEnum t
HtmlEndTag t -> firstendtag + fromEnum t
-}
{-
-- This can not be derived:
instance Ix HtmlLex where
index (lo,hi) i = lex2int i-lex2int lo
inRange (lo,hi) i = lex2int lo<=lex2int i && lex2int i<=lex2int hi
-}
-- Characteristic elements from each equvalence class:
htmlEntities = HtmlEntities "plain text"
htmlSpace = HtmlSpace "space"
htmlBadTag = HtmlBadTag "bad tag"
htmlTag t = HtmlTag (t,noAttrs)
htmlEndTag = HtmlEndTag
htmlAny = [htmlEntities,htmlSpace,htmlBadTag]++
map htmlTag allTags++
map htmlEndTag allTags
htmlLex = cleanup . map rt2html . rtlex
where
rt2html rt =
case rt of
Chars s ->
if all isSpace' s
then HtmlSpace s
else HtmlEntities (compose (decode s))
FmtCmd s1@('/':s) ->
case parseTag s of
Just (n,_) ->
case tagLex n of
Just t -> HtmlEndTag t
_ -> HtmlBadTag s1
_ -> HtmlBadTag s1
FmtCmd s ->
case parseTag s of
Just (n,as) ->
case tagLex n of
Just t -> HtmlTag (t,attrs as)
_ -> HtmlBadTag s
_ -> HtmlBadTag s
Comment s -> HtmlBadTag ("!--"++s++"--")
cleanup [] = []
cleanup (HtmlEntities "":ls) = cleanup ls
cleanup (HtmlSpace "":ls) = cleanup ls
cleanup (HtmlTag t:l:ls) = HtmlTag t:cleanup (rmnl l:ls)
cleanup (l1:l2@(HtmlEndTag t):ls) =
case rmlastnl l1 of
HtmlEntities "" -> cleanup (l2:ls)
HtmlSpace "" -> cleanup (l2:ls)
l1' -> l1:cleanup (l2:ls)
cleanup (HtmlEndTag BODY:ls) = cleanup ls -- these only cause trouble
cleanup (HtmlEndTag HTML:ls) = cleanup ls
cleanup (l:ls) = l:cleanup ls
rmnl = mapchars dropnl
rmlastnl = mapchars (reverse . dropnl .reverse)
dropnl ('\n':s) = s ; dropnl s = s
mapchars f (HtmlEntities s) = HtmlEntities (f s)
mapchars f (HtmlSpace s) = HtmlSpace (f s)
mapchars f l = l
tagLex = lookup
where
lookup name = OM.lookup name tags
tags = OM.fromList ([(show t,t) | t <- allTags])
{-
-- This solution is only marginally faster:
lookup name =
case name of
"TITLE" -> Just TITLE
...
"ISINDEX" -> Just ISINDEX
"FUPPLET" -> Just FUPPLET
_ -> Nothing
---}
showHtmlLex xs = concatMap sh1 xs
sh1 l =
case l of
HtmlEntities s -> s
HtmlSpace s -> s
HtmlTag t -> printTag t
HtmlEndTag s -> ""++show s++">"
HtmlBadTag s -> "<"++s++">"