{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}
module Text.Pandoc.Readers.Textile ( readTextile) where
import Control.Monad (guard, liftM)
import Control.Monad.Except (throwError)
import Data.Char (digitToInt, isUpper)
import Data.List (intersperse, transpose)
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup (Tag (..), fromAttrib)
import Text.HTML.TagSoup.Match
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
import Text.Pandoc.CSS
import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
import Text.Pandoc.Shared (crFilter, trim, underlineSpan, tshow)
readTextile :: PandocMonad m
=> ReaderOptions
-> Text
-> m Pandoc
readTextile opts s = do
parsed <- readWithM parseTextile def{ stateOptions = opts }
(crFilter s <> "\n\n")
case parsed of
Right result -> return result
Left e -> throwError e
parseTextile :: PandocMonad m => ParserT Text ParserState m Pandoc
parseTextile = do
many blankline
startPos <- getPosition
let firstPassParser = noteBlock <|> lineClump
manyTill firstPassParser eof >>= setInput . T.concat
setPosition startPos
st' <- getState
let reversedNotes = stateNotes st'
updateState $ \s -> s { stateNotes = reverse reversedNotes }
Pandoc nullMeta . B.toList <$> parseBlocks
noteMarker :: PandocMonad m => ParserT Text ParserState m Text
noteMarker = skipMany spaceChar >> string "fn" >> T.pack <$> manyTill digit (char '.')
noteBlock :: PandocMonad m => ParserT Text ParserState m Text
noteBlock = try $ do
startPos <- getPosition
ref <- noteMarker
optional blankline
contents <- T.unlines <$> many1Till anyLine (blanklines <|> noteBlock)
endPos <- getPosition
let newnote = (ref, contents <> "\n")
st <- getState
let oldnotes = stateNotes st
updateState $ \s -> s { stateNotes = newnote : oldnotes }
return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
parseBlocks :: PandocMonad m => ParserT Text ParserState m Blocks
parseBlocks = mconcat <$> manyTill block eof
blockParsers :: PandocMonad m => [ParserT Text ParserState m Blocks]
blockParsers = [ codeBlock
, header
, blockQuote
, hrule
, commentBlock
, anyList
, rawHtmlBlock
, rawLaTeXBlock'
, table
, maybeExplicitBlock "p" para
, mempty <$ blanklines
]
block :: PandocMonad m => ParserT Text ParserState m Blocks
block = do
res <- choice blockParsers <?> "block"
trace (T.take 60 $ tshow $ B.toList res)
return res
commentBlock :: PandocMonad m => ParserT Text ParserState m Blocks
commentBlock = try $ do
string "###."
manyTill anyLine blanklines
return mempty
codeBlock :: PandocMonad m => ParserT Text ParserState m Blocks
codeBlock = codeBlockBc <|> codeBlockPre
codeBlockBc :: PandocMonad m => ParserT Text ParserState m Blocks
codeBlockBc = try $ do
string "bc."
extended <- option False (True <$ char '.')
char ' '
let starts = ["p", "table", "bq", "bc", "h1", "h2", "h3",
"h4", "h5", "h6", "pre", "###", "notextile"]
let ender = choice $ map explicitBlockStart starts
contents <- if extended
then do
f <- anyLine
rest <- many (notFollowedBy ender *> anyLine)
return (f:rest)
else manyTill anyLine blanklines
return $ B.codeBlock (trimTrailingNewlines (T.unlines contents))
trimTrailingNewlines :: Text -> Text
trimTrailingNewlines = T.dropWhileEnd (=='\n')
codeBlockPre :: PandocMonad m => ParserT Text ParserState m Blocks
codeBlockPre = try $ do
(t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
result' <- T.pack <$> manyTill anyChar (htmlTag (tagClose (=="pre")))
let result'' = case T.uncons result' of
Just ('\n', xs) -> xs
_ -> result'
let result''' = case T.unsnoc result'' of
Just (xs, '\n') -> xs
_ -> result''
let classes = T.words $ fromAttrib "class" t
let ident = fromAttrib "id" t
let kvs = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
return $ B.codeBlockWith (ident,classes,kvs) result'''
header :: PandocMonad m => ParserT Text ParserState m Blocks
header = try $ do
char 'h'
level <- digitToInt <$> oneOf "123456"
attr <- attributes
char '.'
lookAhead whitespace
name <- trimInlines . mconcat <$> many inline
attr' <- registerHeader attr name
return $ B.headerWith attr' level name
blockQuote :: PandocMonad m => ParserT Text ParserState m Blocks
blockQuote = try $ do
string "bq" >> attributes >> char '.' >> whitespace
B.blockQuote <$> para
hrule :: PandocMonad m => ParserT Text st m Blocks
hrule = try $ do
skipSpaces
start <- oneOf "-*"
count 2 (skipSpaces >> char start)
skipMany (spaceChar <|> char start)
newline
optional blanklines
return B.horizontalRule
anyList :: PandocMonad m => ParserT Text ParserState m Blocks
anyList = try $ anyListAtDepth 1 <* blanklines
anyListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
anyListAtDepth depth = choice [ bulletListAtDepth depth,
orderedListAtDepth depth,
definitionList ]
bulletListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth)
bulletListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
bulletListItemAtDepth = genericListItemAtDepth '*'
orderedListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
orderedListAtDepth depth = try $ do
items <- many1 (orderedListItemAtDepth depth)
return $ B.orderedList items
orderedListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
orderedListItemAtDepth = genericListItemAtDepth '#'
genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT Text ParserState m Blocks
genericListItemAtDepth c depth = try $ do
count depth (char c) >> attributes >> whitespace
contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|>
try (newline >> codeBlockPre))
newline
sublist <- option mempty (anyListAtDepth (depth + 1))
return $ contents <> sublist
definitionList :: PandocMonad m => ParserT Text ParserState m Blocks
definitionList = try $ B.definitionList <$> many1 definitionListItem
listStart :: PandocMonad m => ParserT Text ParserState m ()
listStart = genericListStart '*'
<|> () <$ genericListStart '#'
<|> () <$ definitionListStart
genericListStart :: PandocMonad m => Char -> ParserT Text st m ()
genericListStart c = () <$ try (many1 (char c) >> whitespace)
basicDLStart :: PandocMonad m => ParserT Text ParserState m ()
basicDLStart = do
char '-'
whitespace
notFollowedBy newline
definitionListStart :: PandocMonad m => ParserT Text ParserState m Inlines
definitionListStart = try $ do
basicDLStart
trimInlines . mconcat <$>
many1Till inline
( try (newline *> lookAhead basicDLStart)
<|> try (lookAhead (() <$ string ":="))
)
definitionListItem :: PandocMonad m => ParserT Text ParserState m (Inlines, [Blocks])
definitionListItem = try $ do
term <- (mconcat . intersperse B.linebreak) <$> many1 definitionListStart
def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef)
return (term, def')
where inlineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
inlineDef = liftM (\d -> [B.plain d])
$ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline
multilineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
multilineDef = try $ do
optional whitespace >> newline
s <- T.pack <$> many1Till anyChar (try (string "=:" >> newline))
ds <- parseFromString' parseBlocks (s <> "\n\n")
return [ds]
rawHtmlBlock :: PandocMonad m => ParserT Text ParserState m Blocks
rawHtmlBlock = try $ do
skipMany spaceChar
(_,b) <- htmlTag isBlockTag
optional blanklines
return $ B.rawBlock "html" b
rawLaTeXBlock' :: PandocMonad m => ParserT Text ParserState m Blocks
rawLaTeXBlock' = do
guardEnabled Ext_raw_tex
B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
para :: PandocMonad m => ParserT Text ParserState m Blocks
para = B.para . trimInlines . mconcat <$> many1 inline
toAlignment :: Char -> Alignment
toAlignment '<' = AlignLeft
toAlignment '>' = AlignRight
toAlignment '=' = AlignCenter
toAlignment _ = AlignDefault
cellAttributes :: PandocMonad m => ParserT Text ParserState m (Bool, Alignment)
cellAttributes = try $ do
isHeader <- option False (True <$ char '_')
optional $ try $ oneOf "/\\" >> many1 digit
alignment <- option AlignDefault $ toAlignment <$> oneOf "<>="
_ <- attributes
char '.'
return (isHeader, alignment)
tableCell :: PandocMonad m => ParserT Text ParserState m ((Bool, Alignment), Blocks)
tableCell = try $ do
char '|'
(isHeader, alignment) <- option (False, AlignDefault) cellAttributes
notFollowedBy blankline
raw <- trim . T.pack <$>
many (noneOf "|\n" <|> try (char '\n' <* notFollowedBy blankline))
content <- mconcat <$> parseFromString' (many inline) raw
return ((isHeader, alignment), B.plain content)
tableRow :: PandocMonad m => ParserT Text ParserState m [((Bool, Alignment), Blocks)]
tableRow = try $ do
optional $ try $ do
_ <- attributes
char '.'
many1 spaceChar
many1 tableCell <* char '|' <* blankline
table :: PandocMonad m => ParserT Text ParserState m Blocks
table = try $ do
caption <- option mempty $ try $ do
string "table"
_ <- attributes
char '.'
rawcapt <- trim <$> anyLine
parseFromString' (mconcat <$> many inline) rawcapt
rawrows <- many1 $ skipMany ignorableRow >> tableRow
skipMany ignorableRow
blanklines
let (headers, rows) = case rawrows of
(toprow:rest) | any (fst . fst) toprow ->
(toprow, rest)
_ -> (mempty, rawrows)
let nbOfCols = maximum $ map length (headers:rows)
let aligns = map minimum $ transpose $ map (map (snd . fst)) (headers:rows)
return $ B.table caption
(zip aligns (replicate nbOfCols 0.0))
(map snd headers)
(map (map snd) rows)
ignorableRow :: PandocMonad m => ParserT Text ParserState m ()
ignorableRow = try $ do
char '|'
oneOf ":^-~"
_ <- attributes
char '.'
_ <- anyLine
return ()
explicitBlockStart :: PandocMonad m => Text -> ParserT Text ParserState m ()
explicitBlockStart name = try $ do
string (T.unpack name)
attributes
char '.'
optional whitespace
optional endline
maybeExplicitBlock :: PandocMonad m
=> Text
-> ParserT Text ParserState m Blocks
-> ParserT Text ParserState m Blocks
maybeExplicitBlock name blk = try $ do
optional $ explicitBlockStart name
blk
inline :: PandocMonad m => ParserT Text ParserState m Inlines
inline = choice inlineParsers <?> "inline"
inlineParsers :: PandocMonad m => [ParserT Text ParserState m Inlines]
inlineParsers = [ str
, whitespace
, endline
, code
, escapedInline
, inlineMarkup
, groupedInlineMarkup
, rawHtmlInline
, rawLaTeXInline'
, note
, link
, image
, mark
, (B.str . T.singleton) <$> characterReference
, smartPunctuation inline
, symbol
]
inlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
, simpleInline (string "**") B.strong
, simpleInline (string "__") B.emph
, simpleInline (char '*') B.strong
, simpleInline (char '_') B.emph
, simpleInline (char '+') underlineSpan
, simpleInline (char '-' <* notFollowedBy (char '-')) B.strikeout
, simpleInline (char '^') B.superscript
, simpleInline (char '~') B.subscript
, simpleInline (char '%') id
]
mark :: PandocMonad m => ParserT Text st m Inlines
mark = try $ char '(' >> (try tm <|> try reg <|> copy)
reg :: PandocMonad m => ParserT Text st m Inlines
reg = do
oneOf "Rr"
char ')'
return $ B.str "\174"
tm :: PandocMonad m => ParserT Text st m Inlines
tm = do
oneOf "Tt"
oneOf "Mm"
char ')'
return $ B.str "\8482"
copy :: PandocMonad m => ParserT Text st m Inlines
copy = do
oneOf "Cc"
char ')'
return $ B.str "\169"
note :: PandocMonad m => ParserT Text ParserState m Inlines
note = try $ do
ref <- char '[' *> many1 digit <* char ']'
notes <- stateNotes <$> getState
case lookup (T.pack ref) notes of
Nothing -> Prelude.fail "note not found"
Just raw -> B.note <$> parseFromString' parseBlocks raw
markupChars :: [Char]
markupChars = "\\*#_@~-+^|%=[]&"
stringBreakers :: [Char]
stringBreakers = " \t\n\r.,\"'?!;:<>«»„“”‚‘’()[]"
wordBoundaries :: [Char]
wordBoundaries = markupChars <> stringBreakers
hyphenedWords :: PandocMonad m => ParserT Text ParserState m Text
hyphenedWords = do
x <- wordChunk
xs <- many (try $ char '-' >> wordChunk)
return $ T.intercalate "-" (x:xs)
wordChunk :: PandocMonad m => ParserT Text ParserState m Text
wordChunk = try $ do
hd <- noneOf wordBoundaries
tl <- many ( noneOf wordBoundaries <|>
try (notFollowedBy' note *> oneOf markupChars
<* lookAhead (noneOf wordBoundaries) ) )
return $ T.pack $ hd:tl
str :: PandocMonad m => ParserT Text ParserState m Inlines
str = do
baseStr <- hyphenedWords
fullStr <- option baseStr $ try $ do
guard $ T.all isUpper baseStr
acro <- T.pack <$> enclosed (char '(') (char ')') anyChar'
return $ T.concat [baseStr, " (", acro, ")"]
updateLastStrPos
return $ B.str fullStr
whitespace :: PandocMonad m => ParserT Text st m Inlines
whitespace = many1 spaceChar >> return B.space <?> "whitespace"
endline :: PandocMonad m => ParserT Text ParserState m Inlines
endline = try $ do
newline
notFollowedBy blankline
notFollowedBy listStart
notFollowedBy rawHtmlBlock
return B.linebreak
rawHtmlInline :: PandocMonad m => ParserT Text ParserState m Inlines
rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
rawLaTeXInline' :: PandocMonad m => ParserT Text ParserState m Inlines
rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
B.rawInline "latex" <$> rawLaTeXInline
link :: PandocMonad m => ParserT Text ParserState m Inlines
link = try $ do
bracketed <- (True <$ char '[') <|> return False
char '"' *> notFollowedBy (oneOf " \t\n\r")
attr <- attributes
name <- trimInlines . mconcat <$>
withQuoteContext InDoubleQuote (many1Till inline (char '"'))
char ':'
let stop = if bracketed
then char ']'
else lookAhead $ space <|> eof' <|>
try (oneOf "!.,;:" *>
(space <|> newline <|> eof'))
url <- T.pack <$> many1Till nonspaceChar stop
let name' = if B.toList name == [Str "$"] then B.str url else name
return $ if attr == nullAttr
then B.link url "" name'
else B.spanWith attr $ B.link url "" name'
image :: PandocMonad m => ParserT Text ParserState m Inlines
image = try $ do
char '!' >> notFollowedBy space
(ident, cls, kvs) <- attributes
let attr = case lookup "style" kvs of
Just stls -> (ident, cls, pickStylesToKVs ["width", "height"] stls)
Nothing -> (ident, cls, kvs)
src <- T.pack <$> many1 (noneOf " \t\n\r!(")
alt <- fmap T.pack $ option "" $ try $ char '(' *> manyTill anyChar (char ')')
char '!'
return $ B.imageWith attr src alt (B.str alt)
escapedInline :: PandocMonad m => ParserT Text ParserState m Inlines
escapedInline = escapedEqs <|> escapedTag
escapedEqs :: PandocMonad m => ParserT Text ParserState m Inlines
escapedEqs = B.str . T.pack <$>
try (string "==" *> manyTill anyChar' (try $ string "=="))
escapedTag :: PandocMonad m => ParserT Text ParserState m Inlines
escapedTag = B.str . T.pack <$>
try (string "<notextile>" *>
manyTill anyChar' (try $ string "</notextile>"))
symbol :: PandocMonad m => ParserT Text ParserState m Inlines
symbol = B.str . T.singleton <$> (notFollowedBy newline *>
notFollowedBy rawHtmlBlock *>
oneOf wordBoundaries)
code :: PandocMonad m => ParserT Text ParserState m Inlines
code = code1 <|> code2
anyChar' :: PandocMonad m => ParserT Text ParserState m Char
anyChar' =
satisfy (/='\n') <|>
try (char '\n' <* notFollowedBy blankline)
code1 :: PandocMonad m => ParserT Text ParserState m Inlines
code1 = B.code . T.pack <$> surrounded (char '@') anyChar'
code2 :: PandocMonad m => ParserT Text ParserState m Inlines
code2 = do
htmlTag (tagOpen (=="tt") null)
B.code . T.pack <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
attributes :: PandocMonad m => ParserT Text ParserState m Attr
attributes = foldl (flip ($)) ("",[],[]) <$>
try (do special <- option id specialAttribute
attrs <- many attribute
return (special : attrs))
specialAttribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
specialAttribute = do
alignStr <- ("center" <$ char '=') <|>
("justify" <$ try (string "<>")) <|>
("right" <$ char '>') <|>
("left" <$ char '<')
notFollowedBy spaceChar
return $ addStyle $ T.pack $ "text-align:" ++ alignStr
attribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
attribute = try $
(classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar
classIdAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
classIdAttr = try $ do
char '('
ws <- T.words `fmap` T.pack <$> manyTill anyChar' (char ')')
case reverse ws of
[]
-> return $ \(_,_,keyvals) -> ("",[],keyvals)
((T.uncons -> Just ('#', ident')):classes')
-> return $ \(_,_,keyvals) -> (ident',classes',keyvals)
classes'
-> return $ \(_,_,keyvals) -> ("",classes',keyvals)
styleAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
styleAttr = do
style <- try $ enclosed (char '{') (char '}') anyChar'
return $ addStyle $ T.pack style
addStyle :: Text -> Attr -> Attr
addStyle style (id',classes,keyvals) =
(id',classes,keyvals')
where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"]
style' = style <> ";" <> T.concat [v | ("style",v) <- keyvals]
langAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
langAttr = do
lang <- try $ enclosed (char '[') (char ']') alphaNum
return $ \(id',classes,keyvals) -> (id',classes,("lang",T.pack lang):keyvals)
surrounded :: (PandocMonad m, Show t)
=> ParserT Text st m t
-> ParserT Text st m a
-> ParserT Text st m [a]
surrounded border =
enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
simpleInline :: PandocMonad m
=> ParserT Text ParserState m t
-> (Inlines -> Inlines)
-> ParserT Text ParserState m Inlines
simpleInline border construct = try $ do
notAfterString
border *> notFollowedBy (oneOf " \t\n\r")
attr <- attributes
body <- trimInlines . mconcat <$>
withQuoteContext InSingleQuote
(manyTill (notFollowedBy newline >> inline)
(try border <* notFollowedBy alphaNum))
return $ construct $
if attr == nullAttr
then body
else B.spanWith attr body
groupedInlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
groupedInlineMarkup = try $ do
char '['
sp1 <- option mempty $ B.space <$ whitespace
result <- withQuoteContext InSingleQuote inlineMarkup
sp2 <- option mempty $ B.space <$ whitespace
char ']'
return $ sp1 <> result <> sp2
eof' :: Monad m => ParserT Text s m Char
eof' = '\n' <$ eof