{-# LANGUAGE RelaxedPolyRec #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}
module Text.Pandoc.Readers.Markdown ( readMarkdown, yamlToMeta ) where
import Control.Monad
import Control.Monad.Except (throwError)
import Data.Char (isAlphaNum, isPunctuation, isSpace)
import Data.List (transpose, elemIndex, sortOn)
import qualified Data.Map as M
import Data.Maybe
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.ByteString.Lazy as BL
import System.FilePath (addExtension, takeExtension)
import Text.HTML.TagSoup
import Text.Pandoc.Builder (Blocks, Inlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class.PandocMonad (PandocMonad (..), report)
import Text.Pandoc.Definition
import Text.Pandoc.Emoji (emojiToInline)
import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (tableWith)
import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag,
isCommentTag, isInlineTag, isTextTag)
import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
import Text.Pandoc.Shared
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.XML (fromEntities)
import Text.Pandoc.Readers.Metadata (yamlBsToMeta)
type MarkdownParser m = ParserT Text ParserState m
readMarkdown :: PandocMonad m
=> ReaderOptions
-> Text
-> m Pandoc
readMarkdown opts s = do
parsed <- readWithM parseMarkdown def{ stateOptions = opts }
(crFilter s <> "\n\n")
case parsed of
Right result -> return result
Left e -> throwError e
yamlToMeta :: PandocMonad m
=> ReaderOptions
-> BL.ByteString
-> m Meta
yamlToMeta opts bstr = do
let parser = do
meta <- yamlBsToMeta parseBlocks bstr
return $ runF meta defaultParserState
parsed <- readWithM parser def{ stateOptions = opts } ""
case parsed of
Right result -> return result
Left e -> throwError e
isBulletListMarker :: Char -> Bool
isBulletListMarker '*' = True
isBulletListMarker '+' = True
isBulletListMarker '-' = True
isBulletListMarker _ = False
isHruleChar :: Char -> Bool
isHruleChar '*' = True
isHruleChar '-' = True
isHruleChar '_' = True
isHruleChar _ = False
setextHChars :: [Char]
setextHChars = "=-"
isBlank :: Char -> Bool
isBlank ' ' = True
isBlank '\t' = True
isBlank '\n' = True
isBlank _ = False
inList :: PandocMonad m => MarkdownParser m ()
inList = do
ctx <- stateParserContext <$> getState
guard (ctx == ListItemState)
spnl :: PandocMonad m => ParserT Text st m ()
spnl = try $ do
skipSpaces
optional newline
skipSpaces
notFollowedBy (char '\n')
spnl' :: PandocMonad m => ParserT Text st m Text
spnl' = try $ do
xs <- many spaceChar
ys <- option "" $ try $ (:) <$> newline
<*> (many spaceChar <* notFollowedBy (char '\n'))
return $ T.pack $ xs ++ ys
indentSpaces :: PandocMonad m => MarkdownParser m Text
indentSpaces = try $ do
tabStop <- getOption readerTabStop
countChar tabStop (char ' ') <|>
textStr "\t" <?> "indentation"
nonindentSpaces :: PandocMonad m => MarkdownParser m Text
nonindentSpaces = do
n <- skipNonindentSpaces
return $ T.replicate n " "
skipNonindentSpaces :: PandocMonad m => MarkdownParser m Int
skipNonindentSpaces = do
tabStop <- getOption readerTabStop
gobbleAtMostSpaces (tabStop - 1) <* notFollowedBy spaceChar
litChar :: PandocMonad m => MarkdownParser m Char
litChar = escapedChar'
<|> characterReference
<|> noneOf "\n"
<|> try (newline >> notFollowedBy blankline >> return ' ')
inlinesInBalancedBrackets :: PandocMonad m => MarkdownParser m (F Inlines)
inlinesInBalancedBrackets =
try $ char '[' >> withRaw (go 1) >>=
parseFromString inlines . stripBracket . snd
where stripBracket t = case T.unsnoc t of
Just (t', ']') -> t'
_ -> t
go :: PandocMonad m => Int -> MarkdownParser m ()
go 0 = return ()
go openBrackets =
(() <$ (escapedChar <|>
code <|>
rawHtmlInline <|>
rawLaTeXInline') >> go openBrackets)
<|>
(do char ']'
Control.Monad.when (openBrackets > 1) $ go (openBrackets - 1))
<|>
(char '[' >> go (openBrackets + 1))
<|>
(anyChar >> go openBrackets)
rawTitleBlockLine :: PandocMonad m => MarkdownParser m Text
rawTitleBlockLine = do
char '%'
skipSpaces
first <- anyLine
rest <- many $ try $ do spaceChar
notFollowedBy blankline
skipSpaces
anyLine
return $ trim $ T.unlines (first:rest)
titleLine :: PandocMonad m => MarkdownParser m (F Inlines)
titleLine = try $ do
raw <- rawTitleBlockLine
res <- parseFromString' inlines raw
return $ trimInlinesF res
authorsLine :: PandocMonad m => MarkdownParser m (F [Inlines])
authorsLine = try $ do
raw <- rawTitleBlockLine
let sep = (char ';' <* spaces) <|> newline
let pAuthors = sepEndBy
(trimInlinesF . mconcat <$> many
(try $ notFollowedBy sep >> inline))
sep
sequence <$> parseFromString' pAuthors raw
dateLine :: PandocMonad m => MarkdownParser m (F Inlines)
dateLine = try $ do
raw <- rawTitleBlockLine
res <- parseFromString' inlines raw
return $ trimInlinesF res
titleBlock :: PandocMonad m => MarkdownParser m ()
titleBlock = pandocTitleBlock <|> mmdTitleBlock
pandocTitleBlock :: PandocMonad m => MarkdownParser m ()
pandocTitleBlock = try $ do
guardEnabled Ext_pandoc_title_block
lookAhead (char '%')
title <- option mempty titleLine
author <- option (return []) authorsLine
date <- option mempty dateLine
optional blanklines
let meta' = do title' <- title
author' <- author
date' <- date
return $
(if B.isNull title' then id else B.setMeta "title" title')
. (if null author' then id else B.setMeta "author" author')
. (if B.isNull date' then id else B.setMeta "date" date')
$ nullMeta
updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' }
yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks)
yamlMetaBlock = try $ do
guardEnabled Ext_yaml_metadata_block
string "---"
blankline
notFollowedBy blankline
rawYamlLines <- manyTill anyLine stopLine
let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
optional blanklines
newMetaF <- yamlBsToMeta parseBlocks
$ UTF8.fromTextLazy $ TL.fromStrict rawYaml
updateState $ \st -> st{ stateMeta' = stateMeta' st <> newMetaF }
return mempty
stopLine :: PandocMonad m => MarkdownParser m ()
stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
mmdTitleBlock :: PandocMonad m => MarkdownParser m ()
mmdTitleBlock = try $ do
guardEnabled Ext_mmd_title_block
firstPair <- kvPair False
restPairs <- many (kvPair True)
let kvPairs = firstPair : restPairs
blanklines
updateState $ \st -> st{ stateMeta' = stateMeta' st <>
return (Meta $ M.fromList kvPairs) }
kvPair :: PandocMonad m => Bool -> MarkdownParser m (Text, MetaValue)
kvPair allowEmpty = try $ do
key <- many1TillChar (alphaNum <|> oneOf "_- ") (char ':')
val <- trim <$> manyTillChar anyChar
(try $ newline >> lookAhead (blankline <|> nonspaceChar))
guard $ allowEmpty || not (T.null val)
let key' = T.concat $ T.words $ T.toLower key
let val' = MetaBlocks $ B.toList $ B.plain $ B.text val
return (key',val')
parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc
parseMarkdown = do
optional titleBlock
blocks <- parseBlocks
st <- getState
let notesUsed = stateNoteRefs st
let notesDefined = M.keys (stateNotes' st)
mapM_ (\n -> unless (n `Set.member` notesUsed) $
case M.lookup n (stateNotes' st) of
Just (pos, _) -> report (NoteDefinedButNotUsed n pos)
Nothing -> throwError $
PandocShouldNeverHappenError "note not found")
notesDefined
let doc = runF (do Pandoc _ bs <- B.doc <$> blocks
meta <- stateMeta' st
return $ Pandoc meta bs) st
reportLogMessages
return doc
referenceKey :: PandocMonad m => MarkdownParser m (F Blocks)
referenceKey = try $ do
pos <- getPosition
skipNonindentSpaces
(_,raw) <- reference
char ':'
skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
let sourceURL = fmap T.unwords $ many $ try $ do
skipMany spaceChar
notFollowedBy' referenceTitle
notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes
notFollowedBy' (() <$ reference)
many1Char $ notFollowedBy space >> litChar
let betweenAngles = try $ char '<' >> manyTillChar litChar (char '>')
src <- try betweenAngles <|> sourceURL
tit <- option "" referenceTitle
attr <- option nullAttr $ try $
do guardEnabled Ext_link_attributes
skipSpaces >> optional newline >> skipSpaces
attributes
addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes
>> many (try $ spnl >> keyValAttr)
blanklines
let attr' = extractIdClass $ foldl (\x f -> f x) attr addKvs
target = (escapeURI $ trimr src, tit)
st <- getState
let oldkeys = stateKeys st
let key = toKey raw
case M.lookup key oldkeys of
Just (t,a) | not (t == target && a == attr') ->
logMessage $ DuplicateLinkReference raw pos
_ -> return ()
updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys }
return $ return mempty
referenceTitle :: PandocMonad m => MarkdownParser m Text
referenceTitle = try $ do
skipSpaces >> optional newline >> skipSpaces
quotedTitle '"' <|> quotedTitle '\'' <|> charsInBalanced '(' ')' litChar
quotedTitle :: PandocMonad m => Char -> MarkdownParser m Text
quotedTitle c = try $ do
char c
notFollowedBy spaces
let pEnder = try $ char c >> notFollowedBy (satisfy isAlphaNum)
let regChunk = many1Char (noneOf ['\\','\n','&',c]) <|> countChar 1 litChar
let nestedChunk = (\x -> T.singleton c <> x <> T.singleton c) <$> quotedTitle c
T.unwords . T.words . T.concat <$> manyTill (nestedChunk <|> regChunk) pEnder
abbrevKey :: PandocMonad m => MarkdownParser m (F Blocks)
abbrevKey = do
guardEnabled Ext_abbreviations
try $ do
char '*'
reference
char ':'
skipMany (satisfy (/= '\n'))
blanklines
return $ return mempty
noteMarker :: PandocMonad m => MarkdownParser m Text
noteMarker = string "[^" >> many1TillChar (satisfy $ not . isBlank) (char ']')
rawLine :: PandocMonad m => MarkdownParser m Text
rawLine = try $ do
notFollowedBy blankline
notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker
optional indentSpaces
anyLine
rawLines :: PandocMonad m => MarkdownParser m Text
rawLines = do
first <- anyLine
rest <- many rawLine
return $ T.unlines (first:rest)
noteBlock :: PandocMonad m => MarkdownParser m (F Blocks)
noteBlock = do
guardEnabled Ext_footnotes
try $ do
pos <- getPosition
skipNonindentSpaces
ref <- noteMarker
char ':'
optional blankline
optional indentSpaces
first <- rawLines
rest <- many $ try $ blanklines >> indentSpaces >> rawLines
let raw = T.unlines (first:rest) <> "\n"
optional blanklines
parsed <- parseFromString' parseBlocks raw
oldnotes <- stateNotes' <$> getState
case M.lookup ref oldnotes of
Just _ -> logMessage $ DuplicateNoteReference ref pos
Nothing -> return ()
updateState $ \s -> s { stateNotes' =
M.insert ref (pos, parsed) oldnotes }
return mempty
parseBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
parseBlocks = mconcat <$> manyTill block eof
block :: PandocMonad m => MarkdownParser m (F Blocks)
block = do
res <- choice [ mempty <$ blanklines
, codeBlockFenced
, yamlMetaBlock
, bulletList
, divHtml
, divFenced
, header
, lhsCodeBlock
, htmlBlock
, table
, codeBlockIndented
, rawTeXBlock
, lineBlock
, blockQuote
, hrule
, orderedList
, definitionList
, noteBlock
, referenceKey
, abbrevKey
, para
, plain
] <?> "block"
trace (T.take 60 $ tshow $ B.toList $ runF res defaultParserState)
return res
header :: PandocMonad m => MarkdownParser m (F Blocks)
header = setextHeader <|> atxHeader <?> "header"
atxChar :: PandocMonad m => MarkdownParser m Char
atxChar = do
exts <- getOption readerExtensions
return $ if extensionEnabled Ext_literate_haskell exts
then '='
else '#'
atxHeader :: PandocMonad m => MarkdownParser m (F Blocks)
atxHeader = try $ do
level <- fmap length (atxChar >>= many1 . char)
notFollowedBy $ guardEnabled Ext_fancy_lists >>
(char '.' <|> char ')')
guardDisabled Ext_space_in_atx_header <|> notFollowedBy nonspaceChar
skipSpaces
(text, raw) <- withRaw $ do
oldAllowLineBreaks <- stateAllowLineBreaks <$> getState
updateState $ \st -> st{ stateAllowLineBreaks = False }
res <- trimInlinesF . mconcat <$>
many (notFollowedBy atxClosing >> inline)
updateState $ \st -> st{ stateAllowLineBreaks = oldAllowLineBreaks }
return res
attr <- atxClosing
attr' <- registerHeader attr (runF text defaultParserState)
guardDisabled Ext_implicit_header_references
<|> registerImplicitHeader raw attr'
return $ B.headerWith attr' level <$> text
atxClosing :: PandocMonad m => MarkdownParser m Attr
atxClosing = try $ do
attr' <- option nullAttr
(guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier)
skipMany . char =<< atxChar
skipSpaces
attr <- option attr'
(guardEnabled Ext_header_attributes >> attributes)
blanklines
return attr
setextHeaderEnd :: PandocMonad m => MarkdownParser m Attr
setextHeaderEnd = try $ do
attr <- option nullAttr
$ (guardEnabled Ext_mmd_header_identifiers >> mmdHeaderIdentifier)
<|> (guardEnabled Ext_header_attributes >> attributes)
blanklines
return attr
mmdHeaderIdentifier :: PandocMonad m => MarkdownParser m Attr
mmdHeaderIdentifier = do
(_, raw) <- reference
let raw' = trim $ stripFirstAndLast raw
let ident = T.concat $ T.words $ T.toLower raw'
let attr = (ident, [], [])
guardDisabled Ext_implicit_header_references
<|> registerImplicitHeader raw' attr
skipSpaces
return attr
setextHeader :: PandocMonad m => MarkdownParser m (F Blocks)
setextHeader = try $ do
lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline
skipSpaces
(text, raw) <- withRaw $ do
oldAllowLineBreaks <- stateAllowLineBreaks <$> getState
updateState $ \st -> st{ stateAllowLineBreaks = False }
res <- trimInlinesF . mconcat <$>
many (notFollowedBy setextHeaderEnd >> inline)
updateState $ \st -> st{ stateAllowLineBreaks = oldAllowLineBreaks }
return res
attr <- setextHeaderEnd
underlineChar <- oneOf setextHChars
many (char underlineChar)
blanklines
let level = fromMaybe 0 (elemIndex underlineChar setextHChars) + 1
attr' <- registerHeader attr (runF text defaultParserState)
guardDisabled Ext_implicit_header_references
<|> registerImplicitHeader raw attr'
return $ B.headerWith attr' level <$> text
registerImplicitHeader :: PandocMonad m => Text -> Attr -> MarkdownParser m ()
registerImplicitHeader raw attr@(ident, _, _)
| T.null raw = return ()
| otherwise = do
let key = toKey $ "[" <> raw <> "]"
updateState $ \s ->
s { stateHeaderKeys = M.insert key (("#" <> ident,""), attr)
(stateHeaderKeys s) }
hrule :: PandocMonad m => ParserT Text st m (F Blocks)
hrule = try $ do
skipSpaces
start <- satisfy isHruleChar
count 2 (skipSpaces >> char start)
skipMany (spaceChar <|> char start)
newline
optional blanklines
return $ return B.horizontalRule
indentedLine :: PandocMonad m => MarkdownParser m Text
indentedLine = indentSpaces >> anyLineNewline
blockDelimiter :: PandocMonad m
=> (Char -> Bool)
-> Maybe Int
-> ParserT Text ParserState m Int
blockDelimiter f len = try $ do
skipNonindentSpaces
c <- lookAhead (satisfy f)
case len of
Just l -> count l (char c) >> many (char c) >> return l
Nothing -> fmap ((+ 3) . length) (count 3 (char c) >> many (char c))
attributes :: PandocMonad m => MarkdownParser m Attr
attributes = try $ do
char '{'
spnl
attrs <- many (attribute <* spnl)
char '}'
return $ foldl (\x f -> f x) nullAttr attrs
attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr)
attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr
identifier :: PandocMonad m => MarkdownParser m Text
identifier = do
first <- letter
rest <- many $ alphaNum <|> oneOf "-_:."
return $ T.pack (first:rest)
identifierAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
identifierAttr = try $ do
char '#'
result <- identifier
return $ \(_,cs,kvs) -> (result,cs,kvs)
classAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
classAttr = try $ do
char '.'
result <- identifier
return $ \(id',cs,kvs) -> (id',cs ++ [result],kvs)
keyValAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
keyValAttr = try $ do
key <- identifier
char '='
val <- T.pack <$> enclosed (char '"') (char '"') litChar
<|> T.pack <$> enclosed (char '\'') (char '\'') litChar
<|> ("" <$ try (string "\"\""))
<|> ("" <$ try (string "''"))
<|> manyChar (escapedChar' <|> noneOf " \t\n\r}")
return $ \(id',cs,kvs) ->
case key of
"id" -> (val,cs,kvs)
"class" -> (id',cs ++ T.words val,kvs)
_ -> (id',cs,kvs ++ [(key,val)])
specialAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
specialAttr = do
char '-'
return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs)
rawAttribute :: PandocMonad m => MarkdownParser m Text
rawAttribute = do
char '{'
skipMany spaceChar
char '='
format <- many1Char $ satisfy (\c -> isAlphaNum c || c `elem` ['-', '_'])
skipMany spaceChar
char '}'
return format
codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks)
codeBlockFenced = try $ do
indentchars <- nonindentSpaces
let indentLevel = T.length indentchars
c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
<|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`'))
size <- blockDelimiter (== c) Nothing
skipMany spaceChar
rawattr <-
(Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute))
<|>
(Right <$> option ("",[],[])
(try (guardEnabled Ext_fenced_code_attributes >> attributes)
<|> ((\x -> ("",[toLanguageId x],[])) <$> many1Char nonspaceChar)))
blankline
contents <- T.intercalate "\n" <$>
manyTill (gobbleAtMostSpaces indentLevel >> anyLine)
(try $ do
blockDelimiter (== c) (Just size)
blanklines)
return $ return $
case rawattr of
Left syn -> B.rawBlock syn contents
Right attr -> B.codeBlockWith attr contents
toLanguageId :: Text -> Text
toLanguageId = T.toLower . go
where go "c++" = "cpp"
go "objective-c" = "objectivec"
go x = x
codeBlockIndented :: PandocMonad m => MarkdownParser m (F Blocks)
codeBlockIndented = do
contents <- many1 (indentedLine <|>
try (do b <- blanklines
l <- indentedLine
return $ b <> l))
optional blanklines
classes <- getOption readerIndentedCodeClasses
return $ return $ B.codeBlockWith ("", classes, []) $
stripTrailingNewlines $ T.concat contents
lhsCodeBlock :: PandocMonad m => MarkdownParser m (F Blocks)
lhsCodeBlock = do
guardEnabled Ext_literate_haskell
(return . B.codeBlockWith ("",["haskell","literate"],[]) <$>
(lhsCodeBlockBird <|> lhsCodeBlockLaTeX))
<|> (return . B.codeBlockWith ("",["haskell"],[]) <$>
lhsCodeBlockInverseBird)
lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m Text
lhsCodeBlockLaTeX = try $ do
string "\\begin{code}"
manyTill spaceChar newline
contents <- many1TillChar anyChar (try $ string "\\end{code}")
blanklines
return $ stripTrailingNewlines contents
lhsCodeBlockBird :: PandocMonad m => MarkdownParser m Text
lhsCodeBlockBird = lhsCodeBlockBirdWith '>'
lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m Text
lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<'
lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m Text
lhsCodeBlockBirdWith c = try $ do
pos <- getPosition
when (sourceColumn pos /= 1) $ Prelude.fail "Not in first column"
lns <- many1 $ birdTrackLine c
let lns' = if all (\ln -> T.null ln || T.take 1 ln == " ") lns
then map (T.drop 1) lns
else lns
blanklines
return $ T.intercalate "\n" lns'
birdTrackLine :: PandocMonad m => Char -> ParserT Text st m Text
birdTrackLine c = try $ do
char c
when (c == '<') $ notFollowedBy letter
anyLine
emailBlockQuoteStart :: PandocMonad m => MarkdownParser m Char
emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' <* optional (char ' ')
emailBlockQuote :: PandocMonad m => MarkdownParser m [Text]
emailBlockQuote = try $ do
emailBlockQuoteStart
let emailLine = manyChar $ nonEndline <|> try
(endline >> notFollowedBy emailBlockQuoteStart >>
return '\n')
let emailSep = try (newline >> emailBlockQuoteStart)
first <- emailLine
rest <- many $ try $ emailSep >> emailLine
let raw = first:rest
newline <|> (eof >> return '\n')
optional blanklines
return raw
blockQuote :: PandocMonad m => MarkdownParser m (F Blocks)
blockQuote = do
raw <- emailBlockQuote
contents <- parseFromString' parseBlocks $ T.intercalate "\n" raw <> "\n\n"
return $ B.blockQuote <$> contents
bulletListStart :: PandocMonad m => MarkdownParser m ()
bulletListStart = try $ do
optional newline
skipNonindentSpaces
notFollowedBy' (() <$ hrule)
satisfy isBulletListMarker
gobbleSpaces 1 <|> () <$ lookAhead newline
try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar) <|> return ()
orderedListStart :: PandocMonad m
=> Maybe (ListNumberStyle, ListNumberDelim)
-> MarkdownParser m (Int, ListNumberStyle, ListNumberDelim)
orderedListStart mbstydelim = try $ do
optional newline
skipNonindentSpaces
notFollowedBy $ string "p." >> spaceChar >> digit
(do guardDisabled Ext_fancy_lists
start <- many1Char digit >>= safeRead
char '.'
gobbleSpaces 1 <|> () <$ lookAhead newline
optional $ try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar)
return (start, DefaultStyle, DefaultDelim))
<|>
(do (num, style, delim) <- maybe
anyOrderedListMarker
(\(sty,delim) -> (\start -> (start,sty,delim)) <$>
orderedListMarker sty delim)
mbstydelim
gobbleSpaces 1 <|> () <$ lookAhead newline
when (delim == Period && (style == UpperAlpha ||
(style == UpperRoman &&
num `elem` [1, 5, 10, 50, 100, 500, 1000]))) $
() <$ lookAhead (newline <|> spaceChar)
optional $ try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar)
return (num, style, delim))
listStart :: PandocMonad m => MarkdownParser m ()
listStart = bulletListStart <|> Control.Monad.void (orderedListStart Nothing)
listLine :: PandocMonad m => Int -> MarkdownParser m Text
listLine continuationIndent = try $ do
notFollowedBy' (do gobbleSpaces continuationIndent
skipMany spaceChar
listStart)
notFollowedByHtmlCloser
notFollowedByDivCloser
optional (() <$ gobbleSpaces continuationIndent)
listLineCommon
listLineCommon :: PandocMonad m => MarkdownParser m Text
listLineCommon = T.concat <$> manyTill
( many1Char (satisfy $ \c -> c `notElem` ['\n', '<', '`'])
<|> fmap snd (withRaw code)
<|> fmap snd (htmlTag isCommentTag)
<|> countChar 1 anyChar
) newline
rawListItem :: PandocMonad m
=> Bool
-> MarkdownParser m a
-> MarkdownParser m (Text, Int)
rawListItem fourSpaceRule start = try $ do
pos1 <- getPosition
start
pos2 <- getPosition
let continuationIndent = if fourSpaceRule
then 4
else sourceColumn pos2 - sourceColumn pos1
first <- listLineCommon
rest <- many (do notFollowedBy listStart
notFollowedBy (() <$ codeBlockFenced)
notFollowedBy blankline
listLine continuationIndent)
blanks <- manyChar blankline
let result = T.unlines (first:rest) <> blanks
return (result, continuationIndent)
listContinuation :: PandocMonad m => Int -> MarkdownParser m Text
listContinuation continuationIndent = try $ do
x <- try $ do
notFollowedBy blankline
notFollowedByHtmlCloser
notFollowedByDivCloser
gobbleSpaces continuationIndent
anyLineNewline
xs <- many $ try $ do
notFollowedBy blankline
notFollowedByHtmlCloser
notFollowedByDivCloser
gobbleSpaces continuationIndent <|> notFollowedBy' listStart
anyLineNewline
blanks <- manyChar blankline
return $ T.concat (x:xs) <> blanks
blanklines' :: PandocMonad m => MarkdownParser m Text
blanklines' = blanklines <|> try checkDivCloser
where checkDivCloser = do
guardEnabled Ext_fenced_divs
divLevel <- stateFencedDivLevel <$> getState
guard (divLevel >= 1)
lookAhead divFenceEnd
return ""
notFollowedByDivCloser :: PandocMonad m => MarkdownParser m ()
notFollowedByDivCloser =
guardDisabled Ext_fenced_divs <|>
do divLevel <- stateFencedDivLevel <$> getState
guard (divLevel < 1) <|> notFollowedBy divFenceEnd
notFollowedByHtmlCloser :: PandocMonad m => MarkdownParser m ()
notFollowedByHtmlCloser = do
inHtmlBlock <- stateInHtmlBlock <$> getState
case inHtmlBlock of
Just t -> notFollowedBy' $ htmlTag (~== TagClose t)
Nothing -> return ()
listItem :: PandocMonad m
=> Bool
-> MarkdownParser m a
-> MarkdownParser m (F Blocks)
listItem fourSpaceRule start = try $ do
state <- getState
let oldContext = stateParserContext state
setState $ state {stateParserContext = ListItemState}
(first, continuationIndent) <- rawListItem fourSpaceRule start
continuations <- many (listContinuation continuationIndent)
let raw = T.concat (first:continuations)
contents <- parseFromString' parseBlocks raw
updateState (\st -> st {stateParserContext = oldContext})
exts <- getOption readerExtensions
return $ B.fromList . taskListItemFromAscii exts . B.toList <$> contents
orderedList :: PandocMonad m => MarkdownParser m (F Blocks)
orderedList = try $ do
(start, style, delim) <- lookAhead (orderedListStart Nothing)
unless (style `elem` [DefaultStyle, Decimal, Example] &&
delim `elem` [DefaultDelim, Period]) $
guardEnabled Ext_fancy_lists
when (style == Example) $ guardEnabled Ext_example_lists
fourSpaceRule <- (True <$ guardEnabled Ext_four_space_rule)
<|> return (style == Example)
items <- fmap sequence $ many1 $ listItem fourSpaceRule
(orderedListStart (Just (style, delim)))
start' <- if style == Example
then return start
else (start <$ guardEnabled Ext_startnum) <|> return 1
return $ B.orderedListWith (start', style, delim) <$> fmap compactify items
bulletList :: PandocMonad m => MarkdownParser m (F Blocks)
bulletList = do
fourSpaceRule <- (True <$ guardEnabled Ext_four_space_rule)
<|> return False
items <- fmap sequence $ many1 $ listItem fourSpaceRule bulletListStart
return $ B.bulletList <$> fmap compactify items
defListMarker :: PandocMonad m => MarkdownParser m ()
defListMarker = do
sps <- nonindentSpaces
char ':' <|> char '~'
tabStop <- getOption readerTabStop
let remaining = tabStop - (T.length sps + 1)
if remaining > 0
then try (count remaining (char ' ')) <|> string "\t" <|> many1 spaceChar
else mzero
return ()
definitionListItem :: PandocMonad m => Bool -> MarkdownParser m (F (Inlines, [Blocks]))
definitionListItem compact = try $ do
rawLine' <- anyLine
raw <- many1 $ defRawBlock compact
term <- parseFromString' (trimInlinesF <$> inlines) rawLine'
contents <- mapM (parseFromString' parseBlocks . (<> "\n")) raw
optional blanklines
return $ liftM2 (,) term (sequence contents)
defRawBlock :: PandocMonad m => Bool -> MarkdownParser m Text
defRawBlock compact = try $ do
hasBlank <- option False $ blankline >> return True
defListMarker
firstline <- anyLineNewline
let dline = try
( do notFollowedBy blankline
notFollowedByHtmlCloser
notFollowedByDivCloser
if compact
then () <$ indentSpaces
else (() <$ indentSpaces)
<|> notFollowedBy defListMarker
anyLine )
rawlines <- many dline
cont <- fmap T.concat $ many $ try $ do
trailing <- option "" blanklines
ln <- indentSpaces >> notFollowedBy blankline >> anyLine
lns <- many dline
return $ trailing <> T.unlines (ln:lns)
return $ trimr (firstline <> T.unlines rawlines <> cont) <>
if hasBlank || not (T.null cont) then "\n\n" else ""
definitionList :: PandocMonad m => MarkdownParser m (F Blocks)
definitionList = try $ do
lookAhead (anyLine >>
optional (blankline >> notFollowedBy (Control.Monad.void table)) >>
defListMarker)
compactDefinitionList <|> normalDefinitionList
compactDefinitionList :: PandocMonad m => MarkdownParser m (F Blocks)
compactDefinitionList = do
guardEnabled Ext_compact_definition_lists
items <- fmap sequence $ many1 $ definitionListItem True
return $ B.definitionList <$> fmap compactifyDL items
normalDefinitionList :: PandocMonad m => MarkdownParser m (F Blocks)
normalDefinitionList = do
guardEnabled Ext_definition_lists
items <- fmap sequence $ many1 $ definitionListItem False
return $ B.definitionList <$> items
para :: PandocMonad m => MarkdownParser m (F Blocks)
para = try $ do
exts <- getOption readerExtensions
let implicitFigures x
| extensionEnabled Ext_implicit_figures exts = do
x' <- x
case B.toList x' of
[Image attr alt (src,tit)]
| not (null alt) ->
return $ B.singleton
$ Image attr alt (src, "fig:" <> tit)
_ -> return x'
| otherwise = x
result <- implicitFigures . trimInlinesF <$> inlines1
option (B.plain <$> result)
$ try $ do
newline
(blanklines >> return mempty)
<|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote)
<|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
<|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
<|> (guardEnabled Ext_lists_without_preceding_blankline >>
notFollowedBy' inList >>
() <$ lookAhead listStart)
<|> do guardEnabled Ext_native_divs
inHtmlBlock <- stateInHtmlBlock <$> getState
case inHtmlBlock of
Just "div" -> () <$
lookAhead (htmlTag (~== TagClose ("div" :: Text)))
_ -> mzero
<|> do guardEnabled Ext_fenced_divs
divLevel <- stateFencedDivLevel <$> getState
if divLevel > 0
then lookAhead divFenceEnd
else mzero
return $ B.para <$> result
plain :: PandocMonad m => MarkdownParser m (F Blocks)
plain = fmap B.plain . trimInlinesF <$> inlines1
htmlElement :: PandocMonad m => MarkdownParser m Text
htmlElement = rawVerbatimBlock
<|> strictHtmlBlock
<|> fmap snd (htmlTag isBlockTag)
htmlBlock :: PandocMonad m => MarkdownParser m (F Blocks)
htmlBlock = do
guardEnabled Ext_raw_html
try (do
(TagOpen _ attrs) <- lookAhead $ fst <$> htmlTag isBlockTag
(return . B.rawBlock "html") <$> rawVerbatimBlock
<|> (do guardEnabled Ext_markdown_attribute
oldMarkdownAttribute <- stateMarkdownAttribute <$> getState
markdownAttribute <-
case lookup "markdown" attrs of
Just "0" -> False <$ updateState (\st -> st{
stateMarkdownAttribute = False })
Just _ -> True <$ updateState (\st -> st{
stateMarkdownAttribute = True })
Nothing -> return oldMarkdownAttribute
res <- if markdownAttribute
then rawHtmlBlocks
else htmlBlock'
updateState $ \st -> st{ stateMarkdownAttribute =
oldMarkdownAttribute }
return res)
<|> (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks))
<|> htmlBlock'
htmlBlock' :: PandocMonad m => MarkdownParser m (F Blocks)
htmlBlock' = try $ do
first <- htmlElement
skipMany spaceChar
optional blanklines
return $ if T.null first
then mempty
else return $ B.rawBlock "html" first
strictHtmlBlock :: PandocMonad m => MarkdownParser m Text
strictHtmlBlock = htmlInBalanced (not . isInlineTag)
rawVerbatimBlock :: PandocMonad m => MarkdownParser m Text
rawVerbatimBlock = htmlInBalanced isVerbTag
where isVerbTag (TagOpen "pre" _) = True
isVerbTag (TagOpen "style" _) = True
isVerbTag (TagOpen "script" _) = True
isVerbTag (TagOpen "textarea" _) = True
isVerbTag _ = False
rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks)
rawTeXBlock = do
guardEnabled Ext_raw_tex
result <- (B.rawBlock "tex" . trim . T.concat <$>
many1 ((<>) <$> rawConTeXtEnvironment <*> spnl'))
<|> (B.rawBlock "tex" . trim . T.concat <$>
many1 ((<>) <$> rawLaTeXBlock <*> spnl'))
return $ case B.toList result of
[RawBlock _ cs]
| T.all (`elem` [' ','\t','\n']) cs -> return mempty
_ -> return result
rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks)
rawHtmlBlocks = do
(TagOpen tagtype _, raw) <- htmlTag isBlockTag
skipMany spaceChar
indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0
oldInHtmlBlock <- stateInHtmlBlock <$> getState
updateState $ \st -> st{ stateInHtmlBlock = Just tagtype }
let closer = htmlTag (~== TagClose tagtype)
let block' = try $ do
gobbleAtMostSpaces indentlevel
notFollowedBy' closer
block
contents <- mconcat <$> many block'
result <-
try
(do gobbleAtMostSpaces indentlevel
(_, rawcloser) <- closer
return (return (B.rawBlock "html" $ stripMarkdownAttribute raw) <>
contents <>
return (B.rawBlock "html" rawcloser)))
<|> return (return (B.rawBlock "html" raw) <> contents)
updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
return result
stripMarkdownAttribute :: Text -> Text
stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s
where filterAttrib (TagOpen t as) = TagOpen t
[(k,v) | (k,v) <- as, k /= "markdown"]
filterAttrib x = x
lineBlock :: PandocMonad m => MarkdownParser m (F Blocks)
lineBlock = try $ do
guardEnabled Ext_line_blocks
lines' <- lineBlockLines >>=
mapM (parseFromString' (trimInlinesF <$> inlines))
return $ B.lineBlock <$> sequence lines'
dashedLine :: PandocMonad m
=> Char
-> ParserT Text st m (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many spaceChar
let lengthDashes = length dashes
lengthSp = length sp
return (lengthDashes, lengthDashes + lengthSp)
simpleTableHeader :: PandocMonad m
=> Bool
-> MarkdownParser m (F [Blocks], [Alignment], [Int])
simpleTableHeader headless = try $ do
rawContent <- if headless
then return ""
else anyLine
initSp <- nonindentSpaces
dashes <- many1 (dashedLine '-')
newline
let (lengths, lines') = unzip dashes
let indices = scanl (+) (T.length initSp) lines'
rawHeads <- fmap (tail . splitTextByIndices (init indices)) $
if headless
then lookAhead anyLine
else return rawContent
let aligns = zipWith alignType (map (: []) rawHeads) lengths
let rawHeads' = if headless
then replicate (length dashes) ""
else rawHeads
heads <- fmap sequence
$
mapM (parseFromString' (mconcat <$> many plain).trim) rawHeads'
return (heads, aligns, indices)
alignType :: [Text]
-> Int
-> Alignment
alignType [] _ = AlignDefault
alignType strLst len =
let nonempties = filter (not . T.null) $ map trimr strLst
(leftSpace, rightSpace) =
case sortOn T.length nonempties of
(x:_) -> (T.head x `elem` [' ', '\t'], T.length x < len)
[] -> (False, False)
in case (leftSpace, rightSpace) of
(True, False) -> AlignRight
(False, True) -> AlignLeft
(True, True) -> AlignCenter
(False, False) -> AlignDefault
tableFooter :: PandocMonad m => MarkdownParser m Text
tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines'
tableSep :: PandocMonad m => MarkdownParser m Char
tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n'
rawTableLine :: PandocMonad m
=> [Int]
-> MarkdownParser m [Text]
rawTableLine indices = do
notFollowedBy' (blanklines' <|> tableFooter)
line <- take1WhileP (/='\n') <* newline
return $ map trim $ tail $
splitTextByIndices (init indices) line
tableLine :: PandocMonad m
=> [Int]
-> MarkdownParser m (F [Blocks])
tableLine indices = rawTableLine indices >>=
fmap sequence . mapM (parseFromString' (mconcat <$> many plain))
multilineRow :: PandocMonad m
=> [Int]
-> MarkdownParser m (F [Blocks])
multilineRow indices = do
colLines <- many1 (rawTableLine indices)
let cols = map T.unlines $ transpose colLines
fmap sequence $ mapM (parseFromString' (mconcat <$> many plain)) cols
tableCaption :: PandocMonad m => MarkdownParser m (F Inlines)
tableCaption = try $ do
guardEnabled Ext_table_captions
skipNonindentSpaces
(string ":" <* notFollowedBy (satisfy isPunctuation)) <|> string "Table:"
trimInlinesF <$> inlines1 <* blanklines
simpleTable :: PandocMonad m
=> Bool
-> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
simpleTable headless = do
(aligns, _widths, heads', lines') <-
tableWith (simpleTableHeader headless) tableLine
(return ())
(if headless then tableFooter else tableFooter <|> blanklines')
return (aligns, replicate (length aligns) 0, heads', lines')
multilineTable :: PandocMonad m
=> Bool
-> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
multilineTable headless =
tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter
multilineTableHeader :: PandocMonad m
=> Bool
-> MarkdownParser m (F [Blocks], [Alignment], [Int])
multilineTableHeader headless = try $ do
unless headless $
tableSep >> notFollowedBy blankline
rawContent <- if headless
then return $ repeat ""
else many1 $ notFollowedBy tableSep >> anyLine
initSp <- nonindentSpaces
dashes <- many1 (dashedLine '-')
newline
let (lengths, lines') = unzip dashes
let indices = scanl (+) (T.length initSp) lines'
let indices' = case reverse indices of
[] -> []
(x:xs) -> reverse (x+1:xs)
rawHeadsList <- if headless
then fmap (map (:[]) . tail .
splitTextByIndices (init indices')) $ lookAhead anyLine
else return $ transpose $ map
(tail . splitTextByIndices (init indices'))
rawContent
let aligns = zipWith alignType rawHeadsList lengths
let rawHeads = if headless
then replicate (length dashes) ""
else map (T.unlines . map trim) rawHeadsList
heads <- fmap sequence $
mapM (parseFromString' (mconcat <$> many plain).trim) rawHeads
return (heads, aligns, indices')
gridTable :: PandocMonad m => Bool
-> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
gridTable headless = gridTableWith' parseBlocks headless
pipeBreak :: PandocMonad m => MarkdownParser m ([Alignment], [Int])
pipeBreak = try $ do
nonindentSpaces
openPipe <- (True <$ char '|') <|> return False
first <- pipeTableHeaderPart
rest <- many $ sepPipe *> pipeTableHeaderPart
guard $ not (null rest && not openPipe)
optional (char '|')
blankline
return $ unzip (first:rest)
pipeTable :: PandocMonad m => MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
pipeTable = try $ do
nonindentSpaces
lookAhead nonspaceChar
(heads,(aligns, seplengths)) <- (,) <$> pipeTableRow <*> pipeBreak
let heads' = take (length aligns) <$> heads
lines' <- many pipeTableRow
let lines'' = map (take (length aligns) <$>) lines'
let maxlength = maximum $
map (\x -> T.length . stringify $ runF x def) (heads' : lines'')
numColumns <- getOption readerColumns
let widths = if maxlength > numColumns
then map (\len ->
fromIntegral len / fromIntegral (sum seplengths))
seplengths
else replicate (length aligns) 0.0
return (aligns, widths, heads', sequence lines'')
sepPipe :: PandocMonad m => MarkdownParser m ()
sepPipe = try $ do
char '|' <|> char '+'
notFollowedBy blankline
pipeTableRow :: PandocMonad m => MarkdownParser m (F [Blocks])
pipeTableRow = try $ do
scanForPipe
skipMany spaceChar
openPipe <- (True <$ char '|') <|> return False
let chunk = void (code <|> math <|> rawHtmlInline <|> escapedChar <|> rawLaTeXInline')
<|> void (noneOf "|\n\r")
let cellContents = ((trim . snd) <$> withRaw (many chunk)) >>=
parseFromString' pipeTableCell
cells <- cellContents `sepEndBy1` char '|'
guard $ not (length cells == 1 && not openPipe)
blankline
return $ sequence cells
pipeTableCell :: PandocMonad m => MarkdownParser m (F Blocks)
pipeTableCell =
(do result <- inlines1
return $ B.plain <$> result)
<|> return mempty
pipeTableHeaderPart :: PandocMonad m => ParserT Text st m (Alignment, Int)
pipeTableHeaderPart = try $ do
skipMany spaceChar
left <- optionMaybe (char ':')
pipe <- many1 (char '-')
right <- optionMaybe (char ':')
skipMany spaceChar
let len = length pipe + maybe 0 (const 1) left + maybe 0 (const 1) right
return
(case (left,right) of
(Nothing,Nothing) -> AlignDefault
(Just _,Nothing) -> AlignLeft
(Nothing,Just _) -> AlignRight
(Just _,Just _) -> AlignCenter, len)
scanForPipe :: PandocMonad m => ParserT Text st m ()
scanForPipe = do
inp <- getInput
case T.break (\c -> c == '\n' || c == '|') inp of
(_, T.uncons -> Just ('|', _)) -> return ()
_ -> mzero
tableWith :: PandocMonad m
=> MarkdownParser m (F [Blocks], [Alignment], [Int])
-> ([Int] -> MarkdownParser m (F [Blocks]))
-> MarkdownParser m sep
-> MarkdownParser m end
-> MarkdownParser m ([Alignment], [Double], F [Blocks], F [[Blocks]])
tableWith headerParser rowParser lineParser footerParser = try $ do
(heads, aligns, indices) <- headerParser
lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser
footerParser
numColumns <- getOption readerColumns
let widths = if null indices
then replicate (length aligns) 0.0
else widthsFromIndices numColumns indices
return (aligns, widths, heads, lines')
table :: PandocMonad m => MarkdownParser m (F Blocks)
table = try $ do
frontCaption <- option Nothing (Just <$> tableCaption)
(aligns, widths, heads, lns) <-
try (guardEnabled Ext_pipe_tables >> scanForPipe >> pipeTable) <|>
try (guardEnabled Ext_multiline_tables >>
multilineTable False) <|>
try (guardEnabled Ext_simple_tables >>
(simpleTable True <|> simpleTable False)) <|>
try (guardEnabled Ext_multiline_tables >>
multilineTable True) <|>
try (guardEnabled Ext_grid_tables >>
(gridTable False <|> gridTable True)) <?> "table"
optional blanklines
caption <- case frontCaption of
Nothing -> option (return mempty) tableCaption
Just c -> return c
let totalWidth = sum widths
let widths' = if totalWidth < 1
then widths
else map (/ totalWidth) widths
return $ do
caption' <- caption
heads' <- heads
lns' <- lns
return $ B.table caption' (zip aligns widths') heads' lns'
inlines :: PandocMonad m => MarkdownParser m (F Inlines)
inlines = mconcat <$> many inline
inlines1 :: PandocMonad m => MarkdownParser m (F Inlines)
inlines1 = mconcat <$> many1 inline
inline :: PandocMonad m => MarkdownParser m (F Inlines)
inline = choice [ whitespace
, bareURL
, str
, endline
, code
, strongOrEmph
, note
, cite
, bracketedSpan
, link
, image
, math
, strikeout
, subscript
, superscript
, inlineNote
, autoLink
, spanHtml
, rawHtmlInline
, escapedNewline
, escapedChar
, rawLaTeXInline'
, exampleRef
, smart
, return . B.singleton <$> charRef
, emoji
, symbol
, ltSign
] <?> "inline"
escapedChar' :: PandocMonad m => MarkdownParser m Char
escapedChar' = try $ do
char '\\'
(guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum))
<|> (guardEnabled Ext_angle_brackets_escapable >>
oneOf "\\`*_{}[]()>#+-.!~\"<>")
<|> oneOf "\\`*_{}[]()>#+-.!~\""
escapedNewline :: PandocMonad m => MarkdownParser m (F Inlines)
escapedNewline = try $ do
guardEnabled Ext_escaped_line_breaks
char '\\'
lookAhead (char '\n')
return $ return B.linebreak
escapedChar :: PandocMonad m => MarkdownParser m (F Inlines)
escapedChar = do
result <- escapedChar'
case result of
' ' -> return $ return $ B.str "\160"
_ -> return $ return $ B.str $ T.singleton result
ltSign :: PandocMonad m => MarkdownParser m (F Inlines)
ltSign = do
guardDisabled Ext_raw_html
<|> (notFollowedByHtmlCloser >> notFollowedBy' (htmlTag isBlockTag))
char '<'
return $ return $ B.str "<"
exampleRef :: PandocMonad m => MarkdownParser m (F Inlines)
exampleRef = try $ do
guardEnabled Ext_example_lists
char '@'
lab <- many1Char (alphaNum <|> oneOf "-_")
return $ do
st <- askF
return $ case M.lookup lab (stateExamples st) of
Just n -> B.str $ tshow n
Nothing -> B.str $ "@" <> lab
symbol :: PandocMonad m => MarkdownParser m (F Inlines)
symbol = do
result <- noneOf "<\\\n\t "
<|> try (do lookAhead $ char '\\'
notFollowedBy' (() <$ rawTeXBlock)
char '\\')
return $ return $ B.str $ T.singleton result
code :: PandocMonad m => MarkdownParser m (F Inlines)
code = try $ do
starts <- many1 (char '`')
skipSpaces
result <- (trim . T.concat) <$>
manyTill (notFollowedBy (inList >> listStart) >>
(many1Char (noneOf "`\n") <|> many1Char (char '`') <|>
(char '\n' >> notFollowedBy' blankline >> return " ")))
(try (skipSpaces >> count (length starts) (char '`') >>
notFollowedBy (char '`')))
rawattr <-
(Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute))
<|>
(Right <$> option ("",[],[])
(try (guardEnabled Ext_inline_code_attributes >> attributes)))
return $ return $
case rawattr of
Left syn -> B.rawInline syn result
Right attr -> B.codeWith attr result
math :: PandocMonad m => MarkdownParser m (F Inlines)
math = (return . B.displayMath <$> (mathDisplay >>= applyMacros))
<|> (return . B.math <$> (mathInline >>= applyMacros)) <+?>
(guardEnabled Ext_smart *> (return <$> apostrophe)
<* notFollowedBy (space <|> satisfy isPunctuation))
enclosure :: PandocMonad m
=> Char
-> MarkdownParser m (F Inlines)
enclosure c = do
guardDisabled Ext_intraword_underscores
<|> guard (c == '*')
<|> (guard =<< notAfterString)
cs <- many1Char (char c)
(return (B.str cs) <>) <$> whitespace
<|>
case T.length cs of
3 -> three c
2 -> two c mempty
1 -> one c mempty
_ -> return (return $ B.str cs)
ender :: PandocMonad m => Char -> Int -> MarkdownParser m ()
ender c n = try $ do
count n (char c)
guard (c == '*')
<|> guardDisabled Ext_intraword_underscores
<|> notFollowedBy alphaNum
three :: PandocMonad m => Char -> MarkdownParser m (F Inlines)
three c = do
contents <- mconcat <$> many (notFollowedBy (ender c 1) >> inline)
(ender c 3 >> updateLastStrPos >> return ((B.strong . B.emph) <$> contents))
<|> (ender c 2 >> updateLastStrPos >> one c (B.strong <$> contents))
<|> (ender c 1 >> updateLastStrPos >> two c (B.emph <$> contents))
<|> return (return (B.str $ T.pack [c,c,c]) <> contents)
two :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines)
two c prefix' = do
contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline)
(ender c 2 >> updateLastStrPos >>
return (B.strong <$> (prefix' <> contents)))
<|> return (return (B.str $ T.pack [c,c]) <> (prefix' <> contents))
one :: PandocMonad m => Char -> F Inlines -> MarkdownParser m (F Inlines)
one c prefix' = do
contents <- mconcat <$> many ( (notFollowedBy (ender c 1) >> inline)
<|> try (string [c,c] >>
notFollowedBy (ender c 1) >>
two c mempty) )
(ender c 1 >> updateLastStrPos >> return (B.emph <$> (prefix' <> contents)))
<|> return (return (B.str $ T.singleton c) <> (prefix' <> contents))
strongOrEmph :: PandocMonad m => MarkdownParser m (F Inlines)
strongOrEmph = enclosure '*' <|> enclosure '_'
inlinesBetween :: PandocMonad m
=> (Show b)
=> MarkdownParser m a
-> MarkdownParser m b
-> MarkdownParser m (F Inlines)
inlinesBetween start end =
(trimInlinesF . mconcat) <$> try (start >> many1Till inner end)
where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline)
innerSpace = try $ whitespace <* notFollowedBy' end
strikeout :: PandocMonad m => MarkdownParser m (F Inlines)
strikeout = fmap B.strikeout <$>
(guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd)
where strikeStart = string "~~" >> lookAhead nonspaceChar
>> notFollowedBy (char '~')
strikeEnd = try $ string "~~"
superscript :: PandocMonad m => MarkdownParser m (F Inlines)
superscript = fmap B.superscript <$> try (do
guardEnabled Ext_superscript
char '^'
mconcat <$> many1Till (do notFollowedBy spaceChar
notFollowedBy newline
inline) (char '^'))
subscript :: PandocMonad m => MarkdownParser m (F Inlines)
subscript = fmap B.subscript <$> try (do
guardEnabled Ext_subscript
char '~'
mconcat <$> many1Till (do notFollowedBy spaceChar
notFollowedBy newline
inline) (char '~'))
whitespace :: PandocMonad m => MarkdownParser m (F Inlines)
whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
regsp = skipMany spaceChar >> return B.space
nonEndline :: PandocMonad m => ParserT Text st m Char
nonEndline = satisfy (/='\n')
str :: PandocMonad m => MarkdownParser m (F Inlines)
str = do
result <- mconcat <$> many1
( take1WhileP isAlphaNum
<|> "." <$ try (char '.' <* notFollowedBy (char '.')) )
updateLastStrPos
(do guardEnabled Ext_smart
abbrevs <- getOption readerAbbreviations
if not (T.null result) && T.last result == '.' && result `Set.member` abbrevs
then try (do ils <- whitespace
return $ do
ils' <- ils
case B.toList ils' of
[Space] ->
return (B.str result <> B.str "\160")
_ -> return (B.str result <> ils'))
<|> return (return (B.str result))
else return (return (B.str result)))
<|> return (return (B.str result))
endline :: PandocMonad m => MarkdownParser m (F Inlines)
endline = try $ do
newline
notFollowedBy blankline
getState >>= guard . stateAllowLineBreaks
notFollowedBy (inList >> listStart)
guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart
guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
guardEnabled Ext_blank_before_header <|> (notFollowedBy . char =<< atxChar)
guardDisabled Ext_backtick_code_blocks <|>
notFollowedBy (() <$ (lookAhead (char '`') >> codeBlockFenced))
notFollowedByHtmlCloser
notFollowedByDivCloser
(eof >> return mempty)
<|> (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak))
<|> (guardEnabled Ext_ignore_line_breaks >> return mempty)
<|> (skipMany spaceChar >> return (return B.softbreak))
reference :: PandocMonad m => MarkdownParser m (F Inlines, Text)
reference = do
guardDisabled Ext_footnotes <|> notFollowedBy' (string "[^")
guardDisabled Ext_citations <|> notFollowedBy' (string "[@")
withRaw $ trimInlinesF <$> inlinesInBalancedBrackets
parenthesizedChars :: PandocMonad m => MarkdownParser m Text
parenthesizedChars = do
result <- charsInBalanced '(' ')' litChar
return $ "(" <> result <> ")"
source :: PandocMonad m => MarkdownParser m (Text, Text)
source = do
char '('
skipSpaces
let urlChunk =
try parenthesizedChars
<|> (notFollowedBy (oneOf " )") >> countChar 1 litChar)
<|> try (many1Char spaceChar <* notFollowedBy (oneOf "\"')"))
let sourceURL = (T.unwords . T.words . T.concat) <$> many urlChunk
let betweenAngles = try $
char '<' >> manyTillChar litChar (char '>')
src <- try betweenAngles <|> sourceURL
tit <- option "" $ try $ spnl >> linkTitle
skipSpaces
char ')'
return (escapeURI $ trimr src, tit)
linkTitle :: PandocMonad m => MarkdownParser m Text
linkTitle = quotedTitle '"' <|> quotedTitle '\''
link :: PandocMonad m => MarkdownParser m (F Inlines)
link = try $ do
st <- getState
guard $ stateAllowLinks st
setState $ st{ stateAllowLinks = False }
(lab,raw) <- reference
setState $ st{ stateAllowLinks = True }
regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw)
bracketedSpan :: PandocMonad m => MarkdownParser m (F Inlines)
bracketedSpan = try $ do
guardEnabled Ext_bracketed_spans
(lab,_) <- reference
attr <- attributes
return $ if isSmallCaps attr
then B.smallcaps <$> lab
else B.spanWith attr <$> lab
isSmallCaps :: Attr -> Bool
isSmallCaps ("",["smallcaps"],[]) = True
isSmallCaps ("",[],kvs) =
case lookup "style" kvs of
Just s -> T.toLower (T.filter (`notElem` [' ', '\t', ';']) s) ==
"font-variant:small-caps"
Nothing -> False
isSmallCaps _ = False
regLink :: PandocMonad m
=> (Attr -> Text -> Text -> Inlines -> Inlines)
-> F Inlines
-> MarkdownParser m (F Inlines)
regLink constructor lab = try $ do
(src, tit) <- source
attr <- option nullAttr $
guardEnabled Ext_link_attributes >> attributes
return $ constructor attr src tit <$> lab
referenceLink :: PandocMonad m
=> (Attr -> Text -> Text -> Inlines -> Inlines)
-> (F Inlines, Text)
-> MarkdownParser m (F Inlines)
referenceLink constructor (lab, raw) = do
sp <- (True <$ lookAhead (char ' ')) <|> return False
(_,raw') <- option (mempty, "") $
lookAhead (try (do guardEnabled Ext_citations
guardDisabled Ext_spaced_reference_links <|> spnl
normalCite
return (mempty, "")))
<|>
try ((guardDisabled Ext_spaced_reference_links <|> spnl) >> reference)
when (raw' == "") $ guardEnabled Ext_shortcut_reference_links
let labIsRef = raw' == "" || raw' == "[]"
let key = toKey $ if labIsRef then raw else raw'
parsedRaw <- parseFromString' inlines raw'
fallback <- parseFromString' inlines $ dropBrackets raw
implicitHeaderRefs <- option False $
True <$ guardEnabled Ext_implicit_header_references
let makeFallback = do
parsedRaw' <- parsedRaw
fallback' <- fallback
return $ B.str "[" <> fallback' <> B.str "]" <>
(if sp && not (T.null raw) then B.space else mempty) <>
parsedRaw'
return $ do
keys <- asksF stateKeys
case M.lookup key keys of
Nothing ->
if implicitHeaderRefs
then do
headerKeys <- asksF stateHeaderKeys
case M.lookup key headerKeys of
Just ((src, tit), _) -> constructor nullAttr src tit <$> lab
Nothing -> makeFallback
else makeFallback
Just ((src,tit), attr) -> constructor attr src tit <$> lab
dropBrackets :: Text -> Text
dropBrackets = dropRB . dropLB
where dropRB (T.unsnoc -> Just (xs,']')) = xs
dropRB xs = xs
dropLB (T.uncons -> Just ('[',xs)) = xs
dropLB xs = xs
bareURL :: PandocMonad m => MarkdownParser m (F Inlines)
bareURL = try $ do
guardEnabled Ext_autolink_bare_uris
getState >>= guard . stateAllowLinks
(cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
notFollowedBy $ try $ spaces >> htmlTag (~== TagClose ("a" :: Text))
return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig)
autoLink :: PandocMonad m => MarkdownParser m (F Inlines)
autoLink = try $ do
getState >>= guard . stateAllowLinks
char '<'
(cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
extra <- fromEntities <$> manyTillChar nonspaceChar (char '>')
attr <- option ("", [cls], []) $ try $
guardEnabled Ext_link_attributes >> attributes
return $ return $ B.linkWith attr (src <> escapeURI extra) ""
(B.str $ orig <> extra)
image :: PandocMonad m => MarkdownParser m (F Inlines)
image = try $ do
char '!'
(lab,raw) <- reference
defaultExt <- getOption readerDefaultImageExtension
let constructor attr' src = case takeExtension (T.unpack src) of
"" -> B.imageWith attr' (T.pack $ addExtension (T.unpack src)
$ T.unpack defaultExt)
_ -> B.imageWith attr' src
regLink constructor lab <|> referenceLink constructor (lab,raw)
note :: PandocMonad m => MarkdownParser m (F Inlines)
note = try $ do
guardEnabled Ext_footnotes
ref <- noteMarker
updateState $ \st -> st{ stateNoteRefs = Set.insert ref (stateNoteRefs st) }
return $ do
notes <- asksF stateNotes'
case M.lookup ref notes of
Nothing -> return $ B.str $ "[^" <> ref <> "]"
Just (_pos, contents) -> do
st <- askF
let contents' = runF contents st{ stateNotes' = M.empty }
return $ B.note contents'
inlineNote :: PandocMonad m => MarkdownParser m (F Inlines)
inlineNote = try $ do
guardEnabled Ext_inline_notes
char '^'
contents <- inlinesInBalancedBrackets
return $ B.note . B.para <$> contents
rawLaTeXInline' :: PandocMonad m => MarkdownParser m (F Inlines)
rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
notFollowedBy' rawConTeXtEnvironment
s <- rawLaTeXInline
return $ return $ B.rawInline "tex" s
rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text
rawConTeXtEnvironment = try $ do
string "\\start"
completion <- inBrackets (letter <|> digit <|> spaceChar)
<|> many1Char letter
contents <- manyTill (rawConTeXtEnvironment <|> countChar 1 anyChar)
(try $ string "\\stop" >> textStr completion)
return $ "\\start" <> completion <> T.concat contents <> "\\stop" <> completion
inBrackets :: PandocMonad m => ParserT Text st m Char -> ParserT Text st m Text
inBrackets parser = do
char '['
contents <- manyChar parser
char ']'
return $ "[" <> contents <> "]"
spanHtml :: PandocMonad m => MarkdownParser m (F Inlines)
spanHtml = try $ do
guardEnabled Ext_native_spans
(TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("span" :: Text) [])
contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose ("span" :: Text)))
let ident = fromMaybe "" $ lookup "id" attrs
let classes = maybe [] T.words $ lookup "class" attrs
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
return $ if isSmallCaps (ident, classes, keyvals)
then B.smallcaps <$> contents
else B.spanWith (ident, classes, keyvals) <$> contents
divHtml :: PandocMonad m => MarkdownParser m (F Blocks)
divHtml = try $ do
guardEnabled Ext_native_divs
(TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) [])
oldInHtmlBlock <- stateInHtmlBlock <$> getState
updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
bls <- option "" (blankline >> option "" blanklines)
contents <- mconcat <$>
many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text))) >> block)
closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text)))
if closed
then do
updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
let ident = fromMaybe "" $ lookup "id" attrs
let classes = maybe [] T.words $ lookup "class" attrs
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
return $ B.divWith (ident, classes, keyvals) <$> contents
else
return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents
divFenced :: PandocMonad m => MarkdownParser m (F Blocks)
divFenced = try $ do
guardEnabled Ext_fenced_divs
string ":::"
skipMany (char ':')
skipMany spaceChar
attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar)
skipMany spaceChar
skipMany (char ':')
blankline
updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st + 1 }
bs <- mconcat <$> manyTill block divFenceEnd
updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st - 1 }
return $ B.divWith attribs <$> bs
divFenceEnd :: PandocMonad m => MarkdownParser m ()
divFenceEnd = try $ do
string ":::"
skipMany (char ':')
blanklines
return ()
rawHtmlInline :: PandocMonad m => MarkdownParser m (F Inlines)
rawHtmlInline = do
guardEnabled Ext_raw_html
inHtmlBlock <- stateInHtmlBlock <$> getState
let isCloseBlockTag t = case inHtmlBlock of
Just t' -> t ~== TagClose t'
Nothing -> False
mdInHtml <- option False $
( guardEnabled Ext_markdown_in_html_blocks
<|> guardEnabled Ext_markdown_attribute
) >> return True
(_,result) <- htmlTag $ if mdInHtml
then (\x -> isInlineTag x &&
not (isCloseBlockTag x))
else not . isTextTag
return $ return $ B.rawInline "html" result
emojiChars :: [Char]
emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-']
emoji :: PandocMonad m => MarkdownParser m (F Inlines)
emoji = try $ do
guardEnabled Ext_emoji
char ':'
emojikey <- many1Char (oneOf emojiChars)
char ':'
case emojiToInline emojikey of
Just i -> return (return $ B.singleton i)
Nothing -> mzero
cite :: PandocMonad m => MarkdownParser m (F Inlines)
cite = do
guardEnabled Ext_citations
textualCite
<|> do (cs, raw) <- withRaw normalCite
return $ flip B.cite (B.text raw) <$> cs
textualCite :: PandocMonad m => MarkdownParser m (F Inlines)
textualCite = try $ do
(suppressAuthor, key) <- citeKey
let first = Citation{ citationId = key
, citationPrefix = []
, citationSuffix = []
, citationMode = if suppressAuthor
then SuppressAuthor
else AuthorInText
, citationNoteNum = 0
, citationHash = 0
}
mbrest <- option Nothing $ try $ spnl >> Just <$> withRaw normalCite
case mbrest of
Just (rest, raw) ->
return $ (flip B.cite (B.text $ "@" <> key <> " " <> raw) . (first:))
<$> rest
Nothing ->
(do
(cs, raw) <- withRaw $ bareloc first
let (spaces',raw') = T.span isSpace raw
spc | T.null spaces' = mempty
| otherwise = B.space
lab <- parseFromString' inlines $ dropBrackets raw'
fallback <- referenceLink B.linkWith (lab,raw')
return $ do
fallback' <- fallback
cs' <- cs
return $
case B.toList fallback' of
Link{}:_ -> B.cite [first] (B.str $ "@" <> key) <> spc <> fallback'
_ -> B.cite cs' (B.text $ "@" <> key <> " " <> raw))
<|> return (do st <- askF
return $ case M.lookup key (stateExamples st) of
Just n -> B.str $ tshow n
_ -> B.cite [first] $ B.str $ "@" <> key)
bareloc :: PandocMonad m => Citation -> MarkdownParser m (F [Citation])
bareloc c = try $ do
spnl
char '['
notFollowedBy $ char '^'
suff <- suffix
rest <- option (return []) $ try $ char ';' >> citeList
spnl
char ']'
notFollowedBy $ oneOf "[("
return $ do
suff' <- suff
rest' <- rest
return $ c{ citationSuffix = B.toList suff' } : rest'
normalCite :: PandocMonad m => MarkdownParser m (F [Citation])
normalCite = try $ do
char '['
spnl
citations <- citeList
spnl
char ']'
return citations
suffix :: PandocMonad m => MarkdownParser m (F Inlines)
suffix = try $ do
hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
spnl
rest <- trimInlinesF . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline)
return $ if hasSpace
then (B.space <>) <$> rest
else rest
prefix :: PandocMonad m => MarkdownParser m (F Inlines)
prefix = trimInlinesF . mconcat <$>
manyTill inline (char ']' <|> fmap (const ']') (lookAhead citeKey))
citeList :: PandocMonad m => MarkdownParser m (F [Citation])
citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl)
citation :: PandocMonad m => MarkdownParser m (F Citation)
citation = try $ do
pref <- prefix
(suppress_author, key) <- citeKey
suff <- suffix
return $ do
x <- pref
y <- suff
return Citation{ citationId = key
, citationPrefix = B.toList x
, citationSuffix = B.toList y
, citationMode = if suppress_author
then SuppressAuthor
else NormalCitation
, citationNoteNum = 0
, citationHash = 0
}
smart :: PandocMonad m => MarkdownParser m (F Inlines)
smart = do
guardEnabled Ext_smart
doubleQuoted <|> singleQuoted <|>
choice (map (return <$>) [apostrophe, dash, ellipses])
singleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
singleQuoted = try $ do
singleQuoteStart
withQuoteContext InSingleQuote $
fmap B.singleQuoted . trimInlinesF . mconcat <$>
many1Till inline singleQuoteEnd
doubleQuoted :: PandocMonad m => MarkdownParser m (F Inlines)
doubleQuoted = try $ do
doubleQuoteStart
withQuoteContext InDoubleQuote $
fmap B.doubleQuoted . trimInlinesF . mconcat <$>
many1Till inline doubleQuoteEnd