{-# LANGUAGE OverloadedStrings #-}
module Commonmark.Tag
( htmlTag
, htmlOpenTag
, htmlClosingTag
, htmlAttributeName
, htmlAttributeValue
, htmlDoubleQuotedAttributeValue
, Enders
, defaultEnders )
where
import Commonmark.Tokens
import Commonmark.TokParsers
import Control.Monad (liftM2, guard)
import Control.Monad.Trans.State.Strict
import Control.Monad.Trans.Class (lift)
import Data.Char (isAscii, isLetter)
import qualified Data.Text as T
import Text.Parsec hiding (State)
data Enders =
Enders
{ scannedForCDATA :: !Bool
, scannedForProcessingInstruction :: !Bool
, scannedForDeclaration :: !Bool
} deriving Show
defaultEnders :: Enders
defaultEnders = Enders { scannedForCDATA = False
, scannedForProcessingInstruction = False
, scannedForDeclaration = False }
(.&&.) :: (a -> Bool) -> (a -> Bool) -> (a -> Bool)
(.&&.) = liftM2 (&&)
htmlTagName :: Monad m => ParsecT [Tok] s m [Tok]
htmlTagName = try $ do
let isTagText = T.all isAscii
let startsWithLetter t' = not (T.null t') && isLetter (T.head t')
t <- satisfyWord (isTagText .&&. startsWithLetter)
rest <- many (symbol '-' <|> satisfyWord isTagText)
return (t:rest)
htmlAttributeName :: Monad m => ParsecT [Tok] s m [Tok]
htmlAttributeName = try $ do
let isTagText t' = T.all isAscii t'
let startsWithLetter t' = not (T.null t') && isLetter (T.head t')
t <- satisfyWord (startsWithLetter .&&. isTagText) <|>
symbol '_' <|>
symbol ':'
rest <- many $ satisfyWord isTagText
<|> symbol '_'
<|> symbol '-'
<|> symbol '.'
<|> symbol ':'
return (t:rest)
htmlAttributeValueSpec :: Monad m => ParsecT [Tok] s m [Tok]
htmlAttributeValueSpec = try $ do
sps1 <- option [] whitespace
eq <- symbol '='
sps2 <- option [] whitespace
val <- htmlAttributeValue
return $ sps1 ++ [eq] ++ sps2 ++ val
htmlAttributeValue :: Monad m => ParsecT [Tok] s m [Tok]
htmlAttributeValue =
htmlUnquotedAttributeValue <|>
htmlSingleQuotedAttributeValue <|>
htmlDoubleQuotedAttributeValue
htmlAttribute :: Monad m => ParsecT [Tok] s m [Tok]
htmlAttribute = try $ do
sps <- whitespace
n <- htmlAttributeName
val <- option [] htmlAttributeValueSpec
return $ sps ++ n ++ val
htmlUnquotedAttributeValue :: Monad m => ParsecT [Tok] s m [Tok]
htmlUnquotedAttributeValue =
many1 $ noneOfToks [Spaces, LineEnd, Symbol '<', Symbol '>',
Symbol '=', Symbol '`', Symbol '\'', Symbol '"']
htmlSingleQuotedAttributeValue :: Monad m => ParsecT [Tok] s m [Tok]
htmlSingleQuotedAttributeValue = try $ do
op <- symbol '\''
contents <- many (satisfyTok (not . hasType (Symbol '\'')))
cl <- symbol '\''
return $ op : contents ++ [cl]
htmlDoubleQuotedAttributeValue :: Monad m => ParsecT [Tok] s m [Tok]
htmlDoubleQuotedAttributeValue = try $ do
op <- symbol '"'
contents <- many (satisfyTok (not . hasType (Symbol '"')))
cl <- symbol '"'
return $ op : contents ++ [cl]
htmlOpenTag :: Monad m => ParsecT [Tok] s m [Tok]
htmlOpenTag = try $ do
n <- htmlTagName
attrs <- concat <$> many htmlAttribute
sps <- option [] whitespace
sl <- option [] $ (:[]) <$> symbol '/'
cl <- symbol '>'
return $ n ++ attrs ++ sps ++ sl ++ [cl]
htmlClosingTag :: Monad m => ParsecT [Tok] s m [Tok]
htmlClosingTag = try $ do
op <- symbol '/'
n <- htmlTagName
sps <- option [] whitespace
cl <- symbol '>'
return $ op : n ++ sps ++ [cl]
htmlComment :: Monad m => ParsecT [Tok] s m [Tok]
htmlComment = try $ do
op <- sequence [ symbol '!'
, symbol '-'
, symbol '-' ]
notFollowedBy $ do
optional $ symbol '-'
symbol '>'
contents <- many $ satisfyTok (not . hasType (Symbol '-'))
<|> try (symbol '-' <* notFollowedBy (symbol '-'))
cl <- sequence [ symbol '-'
, symbol '-'
, symbol '>' ]
return $ op ++ contents ++ cl
htmlProcessingInstruction :: Monad m
=> ParsecT [Tok] s (StateT Enders m) [Tok]
htmlProcessingInstruction = try $ do
let questionmark = symbol '?'
op <- questionmark
alreadyScanned <- lift $ gets scannedForProcessingInstruction
guard $ not alreadyScanned
contents <- many $ satisfyTok (not . hasType (Symbol '?'))
<|> try (questionmark <*
notFollowedBy (symbol '>'))
lift $ modify $ \st -> st{ scannedForProcessingInstruction = True }
cl <- sequence [ questionmark
, symbol '>' ]
return $ op : contents ++ cl
htmlDeclaration :: Monad m => ParsecT [Tok] s (StateT Enders m) [Tok]
htmlDeclaration = try $ do
op <- symbol '!'
alreadyScanned <- lift $ gets scannedForDeclaration
guard $ not alreadyScanned
let isDeclName t = not (T.null t) && T.all (isAscii .&&. isLetter) t
name <- satisfyWord isDeclName
ws <- whitespace
contents <- many (satisfyTok (not . hasType (Symbol '>')))
lift $ modify $ \st -> st{ scannedForDeclaration = True }
cl <- symbol '>'
return $ op : name : ws ++ contents ++ [cl]
htmlCDATASection :: Monad m => ParsecT [Tok] s (StateT Enders m) [Tok]
htmlCDATASection = try $ do
op <- sequence [ symbol '!'
, symbol '['
, satisfyWord (== "CDATA")
, symbol '[' ]
alreadyScanned <- lift $ gets scannedForCDATA
guard $ not alreadyScanned
let ender = try $ sequence [ symbol ']'
, symbol ']'
, symbol '>' ]
contents <- many $ do
notFollowedBy ender
anyTok
lift $ modify $ \st -> st{ scannedForCDATA = True }
cl <- ender
return $ op ++ contents ++ cl
htmlTag :: Monad m => ParsecT [Tok] s (StateT Enders m) [Tok]
htmlTag = htmlOpenTag <|> htmlClosingTag <|> htmlComment <|>
htmlProcessingInstruction <|> htmlDeclaration <|> htmlCDATASection