{-# LANGUAGE CPP #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE BangPatterns #-}
module Commonmark.Blocks
( mkBlockParser
, defaultBlockSpecs
, BlockStartResult(..)
, BlockSpec(..)
, BlockData(..)
, defBlockData
, BlockNode
, BPState(..)
, BlockParser
, LinkInfo(..)
, defaultFinalizer
, runInlineParser
, addNodeToStack
, collapseNodeStack
, getBlockText
, removeIndent
, bspec
, endOfBlock
, interruptsParagraph
, linkReferenceDef
, renderChildren
, docSpec
, indentedCodeSpec
, fencedCodeSpec
, blockQuoteSpec
, atxHeadingSpec
, setextHeadingSpec
, thematicBreakSpec
, listItemSpec
, bulletListMarker
, orderedListMarker
, rawHtmlSpec
, attributeSpec
, paraSpec
, plainSpec
)
where
import Commonmark.Tag
import Commonmark.TokParsers
import Commonmark.ReferenceMap
import Commonmark.Inlines (pEscaped, pLinkDestination,
pLinkLabel, pLinkTitle)
import Commonmark.Entity (unEntity)
import Commonmark.Tokens
import Commonmark.Types
import Control.Monad (foldM, guard, mzero, void, unless,
when)
import Control.Monad.Trans.Class (lift)
import Data.Foldable (foldrM)
#if !MIN_VERSION_base(4,11,0)
import Data.Monoid
#endif
import Data.Char (isAsciiUpper, isDigit, isSpace)
import Data.Dynamic
import Data.List (sort)
import Data.Text (Text)
import qualified Data.Map.Strict as M
import qualified Data.Text as T
import qualified Data.Text.Read as TR
import Data.Tree
import Text.Parsec
mkBlockParser
:: (Monad m, IsBlock il bl)
=> [BlockSpec m il bl]
-> [BlockParser m il bl bl]
-> (ReferenceMap -> [Tok] -> m (Either ParseError il))
-> [BlockParser m il bl Attributes]
-> [Tok]
-> m (Either ParseError bl)
mkBlockParser specs finalParsers ilParser attrParsers ts =
runParserT (do case ts of
(t:_) -> setPosition (tokPos t)
[] -> return ()
processLines specs finalParsers)
BPState{ referenceMap = emptyReferenceMap
, inlineParser = ilParser
, nodeStack = [Node (defBlockData docSpec) []]
, blockMatched = False
, maybeLazy = True
, maybeBlank = True
, counters = M.empty
, failurePositions = M.empty
, attributeParsers = attrParsers
, nextAttributes = mempty
}
"source" (length ts `seq` ts)
processLines :: (Monad m, IsBlock il bl)
=> [BlockSpec m il bl]
-> [BlockParser m il bl bl]
-> BlockParser m il bl bl
processLines specs finalParsers = {-# SCC processLines #-} do
let go = eof <|> (processLine specs >> go) in go
tree <- (nodeStack <$> getState) >>= collapseNodeStack
updateState $ \st -> st{ nodeStack = [reverseSubforests tree] }
endContent <- mconcat <$> sequence finalParsers
tree':_ <- nodeStack <$> getState
body <- blockConstructor (blockSpec (rootLabel tree')) tree'
return $! body <> endContent
reverseSubforests :: Tree a -> Tree a
reverseSubforests (Node x cs) = Node x $ map reverseSubforests $ reverse cs
processLine :: (Monad m, IsBlock il bl)
=> [BlockSpec m il bl] -> BlockParser m il bl ()
processLine specs = do
st' <- getState
putState $ st'{ blockMatched = True
, maybeLazy = True
, maybeBlank = True
, failurePositions = M.empty }
(matched, unmatched) <- foldrM checkContinue ([],[]) (nodeStack st')
updateState $ \st -> st{ maybeLazy = maybeLazy st &&
case unmatched of
m:_ -> blockParagraph (bspec m)
_ -> False }
if null unmatched
then updateState $ \st -> st{ nodeStack = matched }
else case matched of
[] -> error "no blocks matched"
m:ms -> do
m' <- collapseNodeStack (unmatched ++ [m])
updateState $ \st -> st{ nodeStack = m':ms }
restBlank <- option False $ True <$ lookAhead blankLine
{-# SCC block_starts #-} unless restBlank $
(do skipMany1 (doBlockStarts specs)
optional (try (blockStart paraSpec)))
<|>
(do getState >>= guard . maybeLazy
sp <- getPosition
updateState $ \st -> st{ nodeStack =
map (addStartPos sp) (unmatched ++ matched) })
<|>
void (try (blockStart paraSpec))
<|>
return ()
(cur:rest) <- nodeStack <$> getState
let curdata = rootLabel cur
when (blockParagraph (bspec cur)) $ skipMany spaceTok
pos <- getPosition
toks <- {-# SCC restOfLine #-} restOfLine
updateState $ \st -> st{
nodeStack =
cur{ rootLabel =
if blockContainsLines (bspec cur)
then curdata{ blockLines = toks : blockLines curdata }
else
if maybeBlank st && restBlank
then curdata{ blockBlanks = sourceLine pos :
blockBlanks curdata }
else curdata
} : rest
}
addStartPos :: SourcePos -> BlockNode m il bl -> BlockNode m il bl
addStartPos sp (Node bd cs) = Node bd{ blockStartPos = sp : blockStartPos bd } cs
doBlockStarts :: Monad m => [BlockSpec m il bl] -> BlockParser m il bl ()
doBlockStarts specs = do
st' <- getState
initPos <- getPosition
let failurePosMap = failurePositions st'
let specs' = foldr (\spec sps ->
case M.lookup (blockType spec) failurePosMap of
Just pos' | initPos < pos' -> sps
_ -> spec:sps) [] specs
go initPos specs'
where
go _ [] = mzero
go initPos (spec:otherSpecs) = try (do
pst <- getParserState
res <- blockStart spec
case res of
BlockStartMatch -> return ()
BlockStartNoMatchBefore pos -> do
setParserState pst
unless (pos == initPos) $
updateState $ \st ->
st{ failurePositions =
M.insert (blockType spec)
pos (failurePositions st) }
go initPos otherSpecs) <|> go initPos otherSpecs
checkContinue :: Monad m
=> BlockNode m il bl
-> ([BlockNode m il bl],[BlockNode m il bl])
-> BlockParser m il bl ([BlockNode m il bl],[BlockNode m il bl])
checkContinue nd (matched, unmatched) = do
ismatched <- blockMatched <$> getState
if ismatched
then
{-# SCC blockContinues #-}
(do (startpos, Node bdata children) <- blockContinue (bspec nd) nd
matched' <- blockMatched <$> getState
unless matched' $
updateState $ \st -> st{ maybeBlank = False,
maybeLazy = False }
let new = Node bdata{ blockStartPos =
startpos : blockStartPos bdata
} children
return $!
if matched'
then (new:matched, unmatched)
else (matched, new:unmatched))
<|> (matched, nd:unmatched) <$ updateState (\st -> st{
blockMatched = False })
else return $! (matched, nd:unmatched)
data BlockStartResult =
BlockStartMatch
| BlockStartNoMatchBefore !SourcePos
deriving (Show, Eq)
data BlockSpec m il bl = BlockSpec
{ blockType :: !Text
, blockStart :: BlockParser m il bl BlockStartResult
, blockCanContain :: BlockSpec m il bl -> Bool
, blockContainsLines :: !Bool
, blockParagraph :: !Bool
, blockContinue :: BlockNode m il bl
-> BlockParser m il bl (SourcePos, BlockNode m il bl)
, blockConstructor :: BlockNode m il bl -> BlockParser m il bl bl
, blockFinalize :: BlockNode m il bl -> BlockNode m il bl
-> BlockParser m il bl (BlockNode m il bl)
}
instance Show (BlockSpec m il bl) where
show bs = "<BlockSpec " ++ T.unpack (blockType bs) ++ ">"
defaultBlockSpecs :: (Monad m, IsBlock il bl) => [BlockSpec m il bl]
defaultBlockSpecs =
[ indentedCodeSpec
, fencedCodeSpec
, blockQuoteSpec
, atxHeadingSpec
, setextHeadingSpec
, thematicBreakSpec
, listItemSpec (bulletListMarker <|> orderedListMarker)
, rawHtmlSpec
, attributeSpec
]
defaultFinalizer :: Monad m
=> BlockNode m il bl
-> BlockNode m il bl
-> BlockParser m il bl (BlockNode m il bl)
defaultFinalizer !child !parent = do
case lookup "id" (blockAttributes (rootLabel child)) of
Nothing -> return ()
Just !ident -> updateState $ \st ->
st{ counters = M.insert ("identifier:" <> ident)
(toDyn (0 :: Int)) (counters st) }
return $! parent{ subForest = child : subForest parent }
data BlockData m il bl = BlockData
{ blockSpec :: BlockSpec m il bl
, blockLines :: [[Tok]]
, blockStartPos :: [SourcePos]
, blockData :: !Dynamic
, blockBlanks :: [Int]
, blockAttributes :: !Attributes
}
deriving Show
defBlockData :: BlockSpec m il bl -> BlockData m il bl
defBlockData spec = BlockData
{ blockSpec = spec
, blockLines = []
, blockStartPos = []
, blockData = toDyn ()
, blockBlanks = []
, blockAttributes = mempty
}
type BlockNode m il bl = Tree (BlockData m il bl)
data BPState m il bl = BPState
{ referenceMap :: !ReferenceMap
, inlineParser :: ReferenceMap -> [Tok] -> m (Either ParseError il)
, nodeStack :: [BlockNode m il bl]
, blockMatched :: !Bool
, maybeLazy :: !Bool
, maybeBlank :: !Bool
, counters :: M.Map Text Dynamic
, failurePositions :: M.Map Text SourcePos
, attributeParsers :: [ParsecT [Tok] (BPState m il bl) m Attributes]
, nextAttributes :: !Attributes
}
type BlockParser m il bl = ParsecT [Tok] (BPState m il bl) m
data ListData = ListData
{ listType :: !ListType
, listSpacing :: !ListSpacing
} deriving (Show, Eq)
data ListItemData = ListItemData
{ listItemType :: !ListType
, listItemIndent :: !Int
, listItemBlanksInside :: !Bool
, listItemBlanksAtEnd :: !Bool
} deriving (Show, Eq)
runInlineParser :: Monad m
=> [Tok]
-> BlockParser m il bl il
runInlineParser toks = {-# SCC runInlineParser #-} do
refmap <- referenceMap <$> getState
ilParser <- inlineParser <$> getState
res <- lift $ ilParser refmap toks
case res of
Right ils -> return $! ils
Left err -> mkPT (\_ -> return (Empty (return (Error err))))
addRange :: (Monad m, IsBlock il bl)
=> BlockNode m il bl -> bl -> bl
addRange (Node b _)
= ranged (SourceRange
(go . reverse $ map (\pos ->
(pos, setSourceColumn
(incSourceLine pos 1) 1))
(blockStartPos b)))
where
go [] = []
go ((!startpos1, !endpos1):(!startpos2, !endpos2):rest)
| endpos1 == startpos2 = go ((startpos1, endpos2):rest)
go (!x:xs) = x : go xs
addNodeToStack :: Monad m => BlockNode m bl il -> BlockParser m bl il ()
addNodeToStack node = do
(cur:rest) <- nodeStack <$> getState
guard $ blockParagraph (bspec cur) || not (blockContainsLines (bspec cur))
if blockCanContain (bspec cur) (bspec node)
then do
nextAttr <- nextAttributes <$> getState
let node' = if null nextAttr
then node
else
let rl = rootLabel node
in node{ rootLabel = rl{
blockAttributes = nextAttr
}}
updateState $ \st ->
st{ nextAttributes = mempty
, nodeStack = node' : cur : rest
, maybeLazy = False }
else case rest of
(x:xs) -> do
stack <- (:xs) <$> collapseNodeStack [cur,x]
updateState $ \st -> st{ nodeStack = stack }
addNodeToStack node
_ -> mzero
interruptsParagraph :: Monad m => BlockParser m bl il Bool
interruptsParagraph = do
(cur:_) <- nodeStack <$> getState
return $! blockParagraph (bspec cur)
renderChildren :: (Monad m, IsBlock il bl)
=> BlockNode m il bl -> BlockParser m il bl [bl]
renderChildren node = mapM renderC $ subForest node
where
renderC n = do
let attrs = blockAttributes (rootLabel n)
(if null attrs
then id
else addAttributes attrs) .
addRange n <$> blockConstructor (blockSpec (rootLabel n)) n
docSpec :: (Monad m, IsBlock il bl, Monoid bl) => BlockSpec m il bl
docSpec = BlockSpec
{ blockType = "Doc"
, blockStart = mzero
, blockCanContain = const True
, blockContainsLines = False
, blockParagraph = False
, blockContinue = \n -> (,n) <$> getPosition
, blockConstructor = fmap mconcat . renderChildren
, blockFinalize = defaultFinalizer
}
refLinkDefSpec :: (Monad m, IsBlock il bl)
=> BlockSpec m il bl
refLinkDefSpec = BlockSpec
{ blockType = "ReferenceLinkDefinition"
, blockStart = mzero
, blockCanContain = const False
, blockContainsLines = False
, blockParagraph = False
, blockContinue = const mzero
, blockConstructor = \node -> do
let linkdefs = fromDyn (blockData (rootLabel node))
undefined :: [((SourceRange, Text), LinkInfo)]
return $! mconcat $ map (\((range, lab), linkinfo) ->
(ranged range
(addAttributes (linkAttributes linkinfo)
(referenceLinkDefinition lab (linkDestination linkinfo,
linkTitle linkinfo))))) linkdefs
, blockFinalize = defaultFinalizer
}
extractReferenceLinks :: (Monad m, IsBlock il bl)
=> BlockNode m il bl
-> BlockParser m il bl (Maybe (BlockNode m il bl),
Maybe (BlockNode m il bl))
extractReferenceLinks node = do
st <- getState
res <- lift $ runParserT ((,) <$> ((lookAhead anyTok >>= setPosition . tokPos) >>
many1 (linkReferenceDef (choice $ attributeParsers st)))
<*> getInput) st "" (getBlockText node)
case res of
Left _ -> return $! (Just node, Nothing)
Right (linkdefs, toks') -> do
mapM_
(\((_,lab),linkinfo) ->
updateState $ \s -> s{
referenceMap = insertReference lab linkinfo
(referenceMap s) }) linkdefs
let isRefPos = case toks' of
(t:_) -> (< tokPos t)
_ -> const False
let node' = if null toks'
then Nothing
else Just node{ rootLabel =
(rootLabel node){
blockLines = [toks'],
blockStartPos = dropWhile isRefPos
(blockStartPos (rootLabel node))
}
}
let refnode = node{ rootLabel =
(rootLabel node){
blockLines = takeWhile (any (isRefPos . tokPos))
(blockLines (rootLabel node))
, blockStartPos = takeWhile isRefPos
(blockStartPos (rootLabel node))
, blockData = toDyn linkdefs
, blockSpec = refLinkDefSpec
}}
return $! (node', Just refnode)
attributeSpec :: (Monad m, IsBlock il bl)
=> BlockSpec m il bl
attributeSpec = BlockSpec
{ blockType = "Attribute"
, blockStart = do
attrParsers <- attributeParsers <$> getState
guard $ not (null attrParsers)
interruptsParagraph >>= guard . not
nonindentSpaces
pos <- getPosition
attrs <- choice attrParsers
skipWhile (hasType Spaces)
lookAhead (void lineEnd <|> eof)
addNodeToStack $
Node (defBlockData attributeSpec){
blockData = toDyn attrs,
blockStartPos = [pos] } []
return BlockStartMatch
, blockCanContain = const False
, blockContainsLines = False
, blockParagraph = False
, blockContinue = \n -> do
attrParsers <- attributeParsers <$> getState
guard $ not (null attrParsers)
nonindentSpaces
pos <- getPosition
attrs <- choice attrParsers
skipWhile (hasType Spaces)
lookAhead (void lineEnd <|> eof)
let oldattrs = fromDyn (blockData (rootLabel n)) mempty :: Attributes
let attrs' = oldattrs <> attrs
return $! (pos, n{ rootLabel = (rootLabel n){
blockData = toDyn attrs' }})
, blockConstructor = \_ -> return $! mempty
, blockFinalize = \node parent -> do
let attrs = fromDyn (blockData (rootLabel node)) mempty :: Attributes
updateState $ \st -> st{ nextAttributes = attrs }
defaultFinalizer node parent
}
paraSpec :: (Monad m, IsBlock il bl)
=> BlockSpec m il bl
paraSpec = BlockSpec
{ blockType = "Paragraph"
, blockStart = do
interruptsParagraph >>= guard . not
skipWhile (hasType Spaces)
pos <- getPosition
notFollowedBy lineEnd
addNodeToStack $
Node (defBlockData paraSpec){
blockStartPos = [pos] } []
return BlockStartMatch
, blockCanContain = const False
, blockContainsLines = True
, blockParagraph = True
, blockContinue = \n -> lookAhead $ try $ do
skipWhile (hasType Spaces)
pos <- getPosition
notFollowedBy lineEnd
return $! (pos, n)
, blockConstructor = \node ->
paragraph <$> runInlineParser (getBlockText node)
, blockFinalize = \child parent -> do
(mbchild, mbrefdefs) <- extractReferenceLinks child
case (mbchild, mbrefdefs) of
(_, Nothing) -> defaultFinalizer child parent
(Nothing, Just refnode)
-> return $! parent{ subForest =
refnode : subForest parent }
(Just child', Just refnode)
-> return $! parent{ subForest =
child' : refnode : subForest parent }
}
plainSpec :: (Monad m, IsBlock il bl)
=> BlockSpec m il bl
plainSpec = paraSpec{
blockConstructor = \node ->
plain <$> runInlineParser (getBlockText node)
}
linkReferenceDef :: Monad m
=> ParsecT [Tok] s m Attributes
-> ParsecT [Tok] s m ((SourceRange, Text), LinkInfo)
linkReferenceDef attrParser = try $ do
startpos <- getPosition
lab <- pLinkLabel
guard $ not $ T.all isSpace lab
symbol ':'
optional whitespace
dest <- pLinkDestination
(title, attrs) <- option (mempty, mempty) $ try $ do
tit <- option mempty $ try (whitespace *> pLinkTitle)
skipWhile (hasType Spaces)
as <- option mempty attrParser
skipWhile (hasType Spaces)
lookAhead (void lineEnd <|> eof)
return $! (tit, as)
endpos <- getPosition
void lineEnd <|> eof
return $! ((SourceRange [(startpos, endpos)], lab),
LinkInfo{ linkDestination = unEntity dest
, linkTitle = unEntity title
, linkAttributes = attrs })
atxHeadingSpec :: (Monad m, IsBlock il bl)
=> BlockSpec m il bl
atxHeadingSpec = BlockSpec
{ blockType = "ATXHeading"
, blockStart = do
nonindentSpaces
pos <- getPosition
hashes <- many1 (symbol '#')
let level = length hashes
guard $ level <= 6
(spaceTok *> skipMany spaceTok)
<|> void (lookAhead lineEnd)
<|> lookAhead eof
raw <- many (satisfyTok (not . hasType LineEnd))
let removeClosingHash (_ :: Int) [] = []
removeClosingHash 0 (Tok Spaces _ _ : xs) =
removeClosingHash 0 xs
removeClosingHash _ (Tok (Symbol '#') _ _ :
Tok (Symbol '\\') _ _ : _) =
reverse raw
removeClosingHash _ (Tok (Symbol '#') _ _ : xs) =
removeClosingHash 1 xs
removeClosingHash 1 (Tok Spaces _ _ : xs) = xs
removeClosingHash 1 (x:_)
| tokType x /= Symbol '#' = reverse raw
removeClosingHash _ xs = xs
let raw' = reverse . removeClosingHash 0 . reverse $ raw
addNodeToStack $ Node (defBlockData atxHeadingSpec){
blockLines = [raw'],
blockData = toDyn level,
blockStartPos = [pos] } []
return BlockStartMatch
, blockCanContain = const False
, blockContainsLines = False
, blockParagraph = False
, blockContinue = const mzero
, blockConstructor = \node -> do
let level = fromDyn (blockData (rootLabel node)) 1
ils <- runInlineParser (getBlockText node)
return $! heading level ils
, blockFinalize = \node@(Node cdata children) parent -> do
let oldAttr = blockAttributes cdata
let toks = getBlockText node
(newtoks, attr) <- parseFinalAttributes True toks
<|> (return $! (toks, mempty))
defaultFinalizer (Node cdata{ blockAttributes = oldAttr <> attr
, blockLines = [newtoks] }
children) parent
}
setextHeadingSpec :: (Monad m, IsBlock il bl)
=> BlockSpec m il bl
setextHeadingSpec = BlockSpec
{ blockType = "SetextHeading"
, blockStart = do
(cur:rest) <- nodeStack <$> getState
guard $ blockParagraph (bspec cur)
nonindentSpaces
pos <- getPosition
level <- (2 :: Int) <$ skipMany1 (symbol '-')
<|> (1 :: Int) <$ skipMany1 (symbol '=')
skipWhile (hasType Spaces)
lookAhead (eof <|> void lineEnd)
(mbcur, mbrefdefs) <- extractReferenceLinks cur
updateState $ \st ->
st{ nodeStack = case mbrefdefs of
Nothing -> rest
Just rd -> case rest of
(x:xs) ->
x{ subForest =
rd : subForest x }:xs
[] -> [rd] }
case mbcur of
Nothing -> mzero
Just cur' -> do
addNodeToStack $
Node (rootLabel cur'){
blockSpec = setextHeadingSpec,
blockData = toDyn level,
blockStartPos =
blockStartPos (rootLabel cur') ++ [pos] }
[]
return BlockStartMatch
, blockCanContain = const False
, blockContainsLines = True
, blockParagraph = False
, blockContinue = const mzero
, blockConstructor = \node -> do
let level = fromDyn (blockData (rootLabel node)) 1
ils <- runInlineParser (getBlockText node)
return $! heading level ils
, blockFinalize = \node@(Node cdata children) parent -> do
let oldAttr = blockAttributes cdata
let toks = getBlockText node
(newtoks, attr) <- parseFinalAttributes True toks
<|> (return $! (toks, mempty))
defaultFinalizer (Node cdata{ blockAttributes = oldAttr <> attr
, blockLines = [newtoks] }
children) parent
}
parseFinalAttributes :: Monad m
=> Bool -> [Tok] -> BlockParser m il bl ([Tok], Attributes)
parseFinalAttributes requireWhitespace ts = do
attrParsers <- attributeParsers <$> getState
let pAttr' = try $ (if requireWhitespace
then () <$ whitespace
else optional whitespace)
*> choice attrParsers <* optional whitespace <* eof
st <- getState
res <- lift $ runParserT
((,) <$> many (notFollowedBy pAttr' >> anyTok)
<*> option [] pAttr') st "heading contents" ts
case res of
Left _ -> mzero
Right (xs, ys) -> return $! (xs, ys)
blockQuoteSpec :: (Monad m, IsBlock il bl) => BlockSpec m il bl
blockQuoteSpec = BlockSpec
{ blockType = "BlockQuote"
, blockStart = do
nonindentSpaces
pos <- getPosition
_ <- symbol '>'
_ <- option 0 (gobbleSpaces 1)
addNodeToStack $
Node (defBlockData blockQuoteSpec){
blockStartPos = [pos] } []
return BlockStartMatch
, blockCanContain = const True
, blockContainsLines = False
, blockParagraph = False
, blockContinue = \n -> try $ do
nonindentSpaces
pos <- getPosition
_ <- symbol '>'
_ <- gobbleUpToSpaces 1
return $! (pos, n)
, blockConstructor = \node ->
(blockQuote . mconcat) <$> renderChildren node
, blockFinalize = defaultFinalizer
}
listItemSpec :: (Monad m, IsBlock il bl)
=> BlockParser m il bl ListType
-> BlockSpec m il bl
listItemSpec parseListMarker = BlockSpec
{ blockType = "ListItem"
, blockStart = do
(pos, lidata) <- itemStart parseListMarker
let linode = Node (defBlockData $ listItemSpec parseListMarker){
blockData = toDyn lidata,
blockStartPos = [pos] } []
let listdata = ListData{
listType = listItemType lidata
, listSpacing = TightList }
let listnode = Node (defBlockData listSpec){
blockData = toDyn listdata,
blockStartPos = [pos] } []
(cur:_) <- nodeStack <$> getState
when (blockParagraph (bspec cur)) $ do
guard $ case listType listdata of
BulletList _ -> True
OrderedList 1 Decimal _ -> True
_ -> False
notFollowedBy blankLine
let curdata = fromDyn (blockData (rootLabel cur))
(ListData (BulletList '*') TightList)
let matchesList (BulletList c) (BulletList d) = c == d
matchesList (OrderedList _ e1 d1)
(OrderedList _ e2 d2) = e1 == e2 && d1 == d2
matchesList _ _ = False
case blockType (bspec cur) of
"List" | listType curdata `matchesList`
listItemType lidata
-> addNodeToStack linode
_ -> addNodeToStack listnode >> addNodeToStack linode
return BlockStartMatch
, blockCanContain = const True
, blockContainsLines = False
, blockParagraph = False
, blockContinue = \node@(Node ndata children) -> do
let lidata = fromDyn (blockData ndata)
(ListItemData (BulletList '*') 0 False False)
guard $ null (blockBlanks ndata) ||
not (null children)
pos <- getPosition
gobbleSpaces (listItemIndent lidata) <|> 0 <$ lookAhead blankLine
return $! (pos, node)
, blockConstructor = fmap mconcat . renderChildren
, blockFinalize = \(Node cdata children) parent -> do
let lidata = fromDyn (blockData cdata)
(ListItemData (BulletList '*')
0 False False)
let blanks = removeConsecutive $ sort $
concat $ blockBlanks cdata :
map (blockBlanks . rootLabel)
(filter ((== "List") . blockType .
blockSpec . rootLabel) children)
curline <- sourceLine <$> getPosition
let blanksAtEnd = case blanks of
(l:_) -> l >= curline - 1
_ -> False
let blanksInside = case length blanks of
n | n > 1 -> True
| n == 1 -> not blanksAtEnd
| otherwise -> False
let lidata' = toDyn $ lidata{ listItemBlanksInside = blanksInside
, listItemBlanksAtEnd = blanksAtEnd }
defaultFinalizer (Node cdata{ blockData = lidata' } children)
parent
}
itemStart :: Monad m
=> BlockParser m il bl ListType
-> BlockParser m il bl (SourcePos, ListItemData)
itemStart parseListMarker = do
beforecol <- sourceColumn <$> getPosition
gobbleUpToSpaces 3
pos <- getPosition
ty <- parseListMarker
aftercol <- sourceColumn <$> getPosition
lookAhead whitespace
numspaces <- try (gobbleUpToSpaces 4 <* notFollowedBy whitespace)
<|> gobbleSpaces 1
<|> 1 <$ lookAhead lineEnd
return $! (pos, ListItemData{
listItemType = ty
, listItemIndent = (aftercol - beforecol) + numspaces
, listItemBlanksInside = False
, listItemBlanksAtEnd = False
})
bulletListMarker :: Monad m => BlockParser m il bl ListType
bulletListMarker = do
Tok (Symbol c) _ _ <- symbol '-' <|> symbol '*' <|> symbol '+'
return $! BulletList c
orderedListMarker :: Monad m => BlockParser m il bl ListType
orderedListMarker = do
Tok WordChars _ ds <- satisfyWord (\t -> T.all isDigit t && T.length t < 10)
(start :: Int) <- either fail (return . fst) (TR.decimal ds)
delimtype <- Period <$ symbol '.' <|> OneParen <$ symbol ')'
return $! OrderedList start Decimal delimtype
listSpec :: (Monad m, IsBlock il bl) => BlockSpec m il bl
listSpec = BlockSpec
{ blockType = "List"
, blockStart = mzero
, blockCanContain = \sp -> blockType sp == "ListItem"
, blockContainsLines = False
, blockParagraph = False
, blockContinue = \n -> (,n) <$> getPosition
, blockConstructor = \node -> do
let ListData lt ls = fromDyn (blockData (rootLabel node))
(ListData (BulletList '*') TightList)
list lt ls <$> renderChildren node
, blockFinalize = \(Node cdata children) parent -> do
let ListData lt _ = fromDyn (blockData cdata)
(ListData (BulletList '*') TightList)
let getListItemData (Node d _) =
fromDyn (blockData d)
(ListItemData (BulletList '*') 0 False False)
let childrenData = map getListItemData children
let ls = case childrenData of
c:cs | any listItemBlanksInside (c:cs) ||
(not (null cs) &&
any listItemBlanksAtEnd cs)
-> LooseList
_ -> TightList
blockBlanks' <- case childrenData of
c:_ | listItemBlanksAtEnd c -> do
curline <- sourceLine <$> getPosition
return $! curline - 1 : blockBlanks cdata
_ -> return $! blockBlanks cdata
let ldata' = toDyn (ListData lt ls)
let totight (Node nd cs)
| blockType (blockSpec nd) == "Paragraph"
= Node nd{ blockSpec = plainSpec } cs
| otherwise = Node nd cs
let childrenToTight (Node nd cs) = Node nd (map totight cs)
let children' =
if ls == TightList
then map childrenToTight children
else children
defaultFinalizer (Node cdata{ blockData = ldata'
, blockBlanks = blockBlanks' } children')
parent
}
thematicBreakSpec :: (Monad m, IsBlock il bl)
=> BlockSpec m il bl
thematicBreakSpec = BlockSpec
{ blockType = "ThematicBreak"
, blockStart = do
nonindentSpaces
pos <- getPosition
Tok (Symbol c) _ _ <- symbol '-'
<|> symbol '_'
<|> symbol '*'
skipWhile (hasType Spaces)
let tbchar = symbol c <* skipWhile (hasType Spaces)
count 2 tbchar
skipMany tbchar
(do lookAhead lineEnd
addNodeToStack (Node (defBlockData thematicBreakSpec){
blockStartPos = [pos] } [])
return BlockStartMatch) <|>
(BlockStartNoMatchBefore <$> getPosition)
, blockCanContain = const False
, blockContainsLines = False
, blockParagraph = False
, blockContinue = const mzero
, blockConstructor = \_ -> return thematicBreak
, blockFinalize = defaultFinalizer
}
indentedCodeSpec :: (Monad m, IsBlock il bl)
=> BlockSpec m il bl
indentedCodeSpec = BlockSpec
{ blockType = "IndentedCode"
, blockStart = do
interruptsParagraph >>= guard . not
getState >>= guard . not . maybeLazy
_ <- gobbleSpaces 4
pos <- getPosition
notFollowedBy blankLine
addNodeToStack $ Node (defBlockData indentedCodeSpec){
blockStartPos = [pos] } []
return BlockStartMatch
, blockCanContain = const False
, blockContainsLines = True
, blockParagraph = False
, blockContinue = \node -> do
void (gobbleSpaces 4)
<|> try (skipWhile (hasType Spaces) <* lookAhead lineEnd)
pos <- getPosition
return $! (pos, node)
, blockConstructor = \node ->
return $! codeBlock mempty (untokenize (getBlockText node))
, blockFinalize = \(Node cdata children) parent -> do
let blanks = takeWhile isblankLine $ blockLines cdata
let numblanks = length blanks
let cdata' = cdata{ blockLines =
drop numblanks $ blockLines cdata
, blockStartPos =
drop numblanks $ blockStartPos cdata
}
defaultFinalizer (Node cdata' children) parent
}
isblankLine :: [Tok] -> Bool
isblankLine [] = True
isblankLine [Tok LineEnd _ _] = True
isblankLine (Tok Spaces _ _ : xs) = isblankLine xs
isblankLine _ = False
fencedCodeSpec :: (Monad m, IsBlock il bl)
=> BlockSpec m il bl
fencedCodeSpec = BlockSpec
{ blockType = "FencedCode"
, blockStart = do
prepos <- getPosition
nonindentSpaces
pos <- getPosition
let indentspaces = sourceColumn pos - sourceColumn prepos
(c, ticks) <- (('`',) <$> many1 (symbol '`'))
<|> (('~',) <$> many1 (symbol '~'))
let fencelength = length ticks
guard $ fencelength >= 3
skipWhile (hasType Spaces)
let infoTok = noneOfToks (LineEnd : [Symbol '`' | c == '`'])
info <- T.strip . unEntity <$> many (pEscaped <|> infoTok)
lookAhead $ void lineEnd <|> eof
let infotoks = tokenize "info string" info
(content, attrs) <- parseFinalAttributes False infotoks
<|> (return $! (infotoks, mempty))
addNodeToStack $
Node (defBlockData fencedCodeSpec){
blockData = toDyn
(c, fencelength, indentspaces,
untokenize content, attrs),
blockStartPos = [pos] } []
return BlockStartMatch
, blockCanContain = const False
, blockContainsLines = True
, blockParagraph = False
, blockContinue = \node -> try (do
let ((c, fencelength, _, _, _)
:: (Char, Int, Int, Text, Attributes)) = fromDyn
(blockData (rootLabel node))
('`', 3, 0, mempty, mempty)
nonindentSpaces
pos <- getPosition
ts <- many1 (symbol c)
guard $ length ts >= fencelength
skipWhile (hasType Spaces)
lookAhead $ void lineEnd <|> eof
endOfBlock
return $! (pos, node))
<|> (do let ((_, _, indentspaces, _, _)
:: (Char, Int, Int, Text, Attributes)) = fromDyn
(blockData (rootLabel node))
('`', 3, 0, mempty, mempty)
pos <- getPosition
_ <- gobbleUpToSpaces indentspaces
return $! (pos, node))
, blockConstructor = \node -> do
let ((_, _, _, info, attrs) :: (Char, Int, Int, Text, Attributes)) =
fromDyn (blockData (rootLabel node)) ('`', 3, 0, mempty, mempty)
let codetext = untokenize $ drop 1 (getBlockText node)
return $!
if null attrs
then codeBlock info codetext
else addAttributes attrs $ codeBlock info codetext
, blockFinalize = defaultFinalizer
}
rawHtmlSpec :: (Monad m, IsBlock il bl)
=> BlockSpec m il bl
rawHtmlSpec = BlockSpec
{ blockType = "RawHTML"
, blockStart = do
pos <- getPosition
(rawHtmlType, toks) <- withRaw $
do nonindentSpaces
symbol '<'
ty <- choice $ map (\n -> n <$ startCond n) [1..7]
finished <- option False $ do
guard (ty /= 6 && ty /= 7)
endCond ty
return True
when (ty == 7) $ do
(n:_) <- nodeStack <$> getState
guard $ not $ blockParagraph (bspec n)
skipWhile (not . hasType LineEnd)
return $! if finished then 0 else ty
addNodeToStack $ Node (defBlockData rawHtmlSpec){
blockData = toDyn rawHtmlType,
blockLines = [toks],
blockStartPos = [pos] } []
return BlockStartMatch
, blockCanContain = const False
, blockContainsLines = True
, blockParagraph = False
, blockContinue = \node@(Node ndata children) -> try $ do
pos <- getPosition
case fromDyn (blockData (rootLabel node)) (0 :: Int) of
0 -> mzero
6 -> (pos, node) <$ notFollowedBy blankLine
7 -> (pos, node) <$ notFollowedBy blankLine
n ->
(do pos' <- getPosition
lookAhead (endCond n)
endOfBlock
toks <- many (satisfyTok (not . hasType LineEnd))
le <- option [] $ (:[]) <$> lookAhead lineEnd
return $! (pos', Node ndata{
blockData = toDyn (0 :: Int)
, blockLines = (toks ++ le) : blockLines ndata
} children)) <|> (return $! (pos, node))
, blockConstructor = \node ->
return $! rawBlock (Format "html")
(untokenize (getBlockText node))
, blockFinalize = defaultFinalizer
}
startCond :: Monad m => Int -> BlockParser m il bl ()
startCond 1 = void $ try $ do
satisfyWord (isOneOfCI ["script","pre","style"])
spaceTok
<|> symbol '>'
<|> lookAhead lineEnd
startCond 2 = void $ try $ do
symbol '!'
symbol '-'
symbol '-'
startCond 3 = void $ symbol '?'
startCond 4 = void $ try $ do
symbol '!'
satisfyWord (\t -> case T.uncons t of
Just (c, _) -> isAsciiUpper c
_ -> False)
startCond 5 = void $ try $ do
symbol '!'
symbol '['
satisfyWord (== "CDATA")
symbol '['
startCond 6 = void $ try $ do
optional (symbol '/')
satisfyWord (isOneOfCI ["address", "article", "aside", "base",
"basefont", "blockquote", "body", "caption", "center", "col",
"colgroup", "dd", "details", "dialog", "dir", "div", "dl",
"dt", "fieldset", "figcaption", "figure", "footer", "form", "frame",
"frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header",
"hr", "html", "iframe", "legend", "li", "link", "main", "menu",
"menuitem", "nav", "noframes", "ol", "optgroup", "option",
"p", "param", "section", "source", "summary", "table", "tbody",
"td", "tfoot", "th", "thead", "title", "tr", "track", "ul"])
spaceTok
<|> lookAhead lineEnd
<|> symbol '>'
<|> (symbol '/' >> symbol '>')
startCond 7 = void $ try $ do
toks <- htmlOpenTag <|> htmlClosingTag
guard $ not $ any (hasType LineEnd) toks
skipWhile (hasType Spaces)
lookAhead lineEnd
startCond n = fail $ "Unknown HTML block type " ++ show n
endCond :: Monad m => Int -> BlockParser m il bl ()
endCond 1 = try $ do
let closer = try $ do
symbol '<'
symbol '/'
satisfyWord (isOneOfCI ["script","pre","style"])
symbol '>'
skipManyTill (satisfyTok (not . hasType LineEnd)) closer
endCond 2 = try $ do
let closer = try $ symbol '-' >> symbol '-' >> symbol '>'
skipManyTill (satisfyTok (not . hasType LineEnd)) closer
endCond 3 = try $ do
let closer = try $ symbol '?' >> symbol '>'
skipManyTill (satisfyTok (not . hasType LineEnd)) closer
endCond 4 = try $
skipManyTill (satisfyTok (not . hasType LineEnd)) (symbol '>')
endCond 5 = try $ do
let closer = try $ symbol ']' >> symbol ']' >> symbol '>'
skipManyTill (satisfyTok (not . hasType LineEnd)) closer
endCond 6 = void blankLine
endCond 7 = void blankLine
endCond n = fail $ "Unknown HTML block type " ++ show n
getBlockText :: BlockNode m il bl -> [Tok]
getBlockText =
concat . reverse . blockLines . rootLabel
removeIndent :: [Tok] -> [Tok]
removeIndent = dropWhile (hasType Spaces)
removeConsecutive :: [Int] -> [Int]
removeConsecutive (x:y:zs)
| x == y + 1 = removeConsecutive (y:zs)
removeConsecutive xs = xs
collapseNodeStack :: [BlockNode m il bl] -> BlockParser m il bl (BlockNode m il bl)
collapseNodeStack [] = error "Empty node stack!"
collapseNodeStack (n:ns) = foldM go n ns
where go child parent
= if blockCanContain (bspec parent) (bspec child)
then blockFinalize (bspec child) child parent
else error $ "collapseNodeStack: " ++
T.unpack (blockType (bspec parent)) ++
" cannot contain " ++ T.unpack (blockType (bspec child))
bspec :: BlockNode m il bl -> BlockSpec m il bl
bspec = blockSpec . rootLabel
endOfBlock :: Monad m => BlockParser m il bl ()
endOfBlock = updateState $ \st -> st{ blockMatched = False }