{-# LANGUAGE OverloadedStrings #-}
module Text.HTMLEntity.Parser where
import Control.Applicative
import Data.Attoparsec.Text as A
import Data.Char
import Data.Functor
import qualified Data.HashMap.Strict as H
import Data.Ix
import Data.Monoid.Compat
import qualified Data.Text as T
import qualified Text.HTMLEntity.Table as Table
import Prelude.Compat
decodeParser :: Parser T.Text
decodeParser =
liftA2
(<>)
((A.takeWhile1 (/= '&') <?> "plain text") <|> (ent <?> "entity"))
(eof <|> decodeParser)
where
eof = "" <$ endOfInput
ent = do
void $ char '&'
inner <-
eitherP
(do void $ char '#'
(char 'x' *> hexadecimal) <|> decimal)
(takeWhile1 (/= ';') <?> "entity name")
void $ char ';'
case inner of
Right ename ->
case H.lookup ename Table.names of
Just u -> pure u
Nothing -> fail $ "Unknown entity name " ++ T.unpack ename
Left n
| n > ord maxBound -> fail $ show n ++ " is out of Char range"
| otherwise -> pure $ T.singleton $ chr n
encodeParser :: Parser T.Text
encodeParser = mmany $ multichar <|> singlechar <|> fallback
where
mmany p = mmany_p
where
mmany_p = ssome_p <|> pure mempty
ssome_p = liftA2 (<>) p mmany_p
multichar =
foldr1 (<|>) $
map (\(a, b) -> entity b <$ string a) $ H.toList Table.entitiesMulti
singlechar = do
c <- specialsClass
return $
if inRange ('!', '}') c && c /= '&' && c /= '<' && c /= '>'
then T.singleton c
else entity $ H.lookupDefault undefined c Table.entitiesSingle
fallback = do
c <- anyChar
return $
if c < ' ' || c > '~'
then decEntity c
else T.singleton c
specialsClass = satisfy (inClass $ H.keys Table.entitiesSingle)
entity s = "&" <> s <> ";"
decEntity c = "&#" <> T.pack (show (ord c)) <> ";"