{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards #-}
module Text.Pandoc.Readers.Docx
( readDocx
) where
import Prelude
import Codec.Archive.Zip
import Control.Monad.Reader
import Control.Monad.State.Strict
import qualified Data.ByteString.Lazy as B
import Data.Default (Default)
import Data.List (delete, intersect)
import qualified Data.Map as M
import Data.Maybe (isJust, fromMaybe)
import Data.Sequence (ViewL (..), viewl)
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
import Text.Pandoc.Builder
import Text.Pandoc.MediaBag (MediaBag)
import Text.Pandoc.Options
import Text.Pandoc.Readers.Docx.Combine
import Text.Pandoc.Readers.Docx.Lists
import Text.Pandoc.Readers.Docx.Parse
import Text.Pandoc.Shared
import Text.Pandoc.Walk
import Text.TeXMath (writeTeX)
import Control.Monad.Except (throwError)
import Text.Pandoc.Class (PandocMonad)
import qualified Text.Pandoc.Class as P
import Text.Pandoc.Error
import Text.Pandoc.Logging
readDocx :: PandocMonad m
=> ReaderOptions
-> B.ByteString
-> m Pandoc
readDocx opts bytes
| Right archive <- toArchiveOrFail bytes
, Right (docx, parserWarnings) <- archiveToDocxWithWarnings archive = do
mapM_ (P.report . DocxParserWarning) parserWarnings
(meta, blks) <- docxToOutput opts docx
return $ Pandoc meta blks
readDocx _ _ =
throwError $ PandocSomeError "couldn't parse docx file"
data DState = DState { docxAnchorMap :: M.Map String String
, docxAnchorSet :: Set.Set String
, docxImmedPrevAnchor :: Maybe String
, docxMediaBag :: MediaBag
, docxDropCap :: Inlines
, docxListState :: M.Map (String, String) Integer
, docxPrevPara :: Inlines
}
instance Default DState where
def = DState { docxAnchorMap = M.empty
, docxAnchorSet = mempty
, docxImmedPrevAnchor = Nothing
, docxMediaBag = mempty
, docxDropCap = mempty
, docxListState = M.empty
, docxPrevPara = mempty
}
data DEnv = DEnv { docxOptions :: ReaderOptions
, docxInHeaderBlock :: Bool
}
instance Default DEnv where
def = DEnv def False
type DocxContext m = ReaderT DEnv (StateT DState m)
evalDocxContext :: PandocMonad m => DocxContext m a -> DEnv -> DState -> m a
evalDocxContext ctx env st = flip evalStateT st $flip runReaderT env ctx
spansToKeep :: [String]
spansToKeep = []
divsToKeep :: [String]
divsToKeep = ["list-item", "Definition", "DefinitionTerm"]
metaStyles :: M.Map String String
metaStyles = M.fromList [ ("Title", "title")
, ("Subtitle", "subtitle")
, ("Author", "author")
, ("Date", "date")
, ("Abstract", "abstract")]
sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart])
sepBodyParts = span (\bp -> isMetaPar bp || isEmptyPar bp)
isMetaPar :: BodyPart -> Bool
isMetaPar (Paragraph pPr _) =
not $ null $ intersect (pStyle pPr) (M.keys metaStyles)
isMetaPar _ = False
isEmptyPar :: BodyPart -> Bool
isEmptyPar (Paragraph _ parParts) =
all isEmptyParPart parParts
where
isEmptyParPart (PlainRun (Run _ runElems)) = all isEmptyElem runElems
isEmptyParPart _ = False
isEmptyElem (TextRun s) = trim s == ""
isEmptyElem _ = True
isEmptyPar _ = False
bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map String MetaValue)
bodyPartsToMeta' [] = return M.empty
bodyPartsToMeta' (bp : bps)
| (Paragraph pPr parParts) <- bp
, (c : _)<- (pStyle pPr) `intersect` (M.keys metaStyles)
, (Just metaField) <- M.lookup c metaStyles = do
inlines <- smushInlines <$> mapM parPartToInlines parParts
remaining <- bodyPartsToMeta' bps
let
f (MetaInlines ils) (MetaInlines ils') = MetaBlocks [Para ils, Para ils']
f (MetaInlines ils) (MetaBlocks blks) = MetaBlocks (Para ils : blks)
f m (MetaList mv) = MetaList (m : mv)
f m n = MetaList [m, n]
return $ M.insertWith f metaField (MetaInlines (toList inlines)) remaining
bodyPartsToMeta' (_ : bps) = bodyPartsToMeta' bps
bodyPartsToMeta :: PandocMonad m => [BodyPart] -> DocxContext m Meta
bodyPartsToMeta bps = do
mp <- bodyPartsToMeta' bps
let mp' =
case M.lookup "author" mp of
Just mv -> M.insert "author" (fixAuthors mv) mp
Nothing -> mp
return $ Meta mp'
fixAuthors :: MetaValue -> MetaValue
fixAuthors (MetaBlocks blks) =
MetaList $ map g $ filter f blks
where f (Para _) = True
f _ = False
g (Para ils) = MetaInlines ils
g _ = MetaInlines []
fixAuthors mv = mv
codeStyles :: [String]
codeStyles = ["VerbatimChar"]
codeDivs :: [String]
codeDivs = ["SourceCode"]
runElemToInlines :: RunElem -> Inlines
runElemToInlines (TextRun s) = text s
runElemToInlines LnBrk = linebreak
runElemToInlines Tab = space
runElemToInlines SoftHyphen = text "\xad"
runElemToInlines NoBreakHyphen = text "\x2011"
runElemToString :: RunElem -> String
runElemToString (TextRun s) = s
runElemToString LnBrk = ['\n']
runElemToString Tab = ['\t']
runElemToString SoftHyphen = ['\xad']
runElemToString NoBreakHyphen = ['\x2011']
runToString :: Run -> String
runToString (Run _ runElems) = concatMap runElemToString runElems
runToString _ = ""
parPartToString :: ParPart -> String
parPartToString (PlainRun run) = runToString run
parPartToString (InternalHyperLink _ runs) = concatMap runToString runs
parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs
parPartToString _ = ""
blacklistedCharStyles :: [String]
blacklistedCharStyles = ["Hyperlink"]
resolveDependentRunStyle :: PandocMonad m => RunStyle -> DocxContext m RunStyle
resolveDependentRunStyle rPr
| Just (s, _) <- rStyle rPr, s `elem` blacklistedCharStyles =
return rPr
| Just (_, cs) <- rStyle rPr = do
opts <- asks docxOptions
if isEnabled Ext_styles opts
then return rPr
else do rPr' <- resolveDependentRunStyle cs
return $
RunStyle { isBold = case isBold rPr of
Just bool -> Just bool
Nothing -> isBold rPr'
, isItalic = case isItalic rPr of
Just bool -> Just bool
Nothing -> isItalic rPr'
, isSmallCaps = case isSmallCaps rPr of
Just bool -> Just bool
Nothing -> isSmallCaps rPr'
, isStrike = case isStrike rPr of
Just bool -> Just bool
Nothing -> isStrike rPr'
, rVertAlign = case rVertAlign rPr of
Just valign -> Just valign
Nothing -> rVertAlign rPr'
, rUnderline = case rUnderline rPr of
Just ulstyle -> Just ulstyle
Nothing -> rUnderline rPr'
, rStyle = rStyle rPr }
| otherwise = return rPr
runStyleToTransform :: PandocMonad m => RunStyle -> DocxContext m (Inlines -> Inlines)
runStyleToTransform rPr
| Just (s, _) <- rStyle rPr
, s `elem` spansToKeep = do
transform <- runStyleToTransform rPr{rStyle = Nothing}
return $ spanWith ("", [s], []) . transform
| Just (s, _) <- rStyle rPr = do
opts <- asks docxOptions
let extraInfo = if isEnabled Ext_styles opts
then spanWith ("", [], [("custom-style", s)])
else id
transform <- runStyleToTransform rPr{rStyle = Nothing}
return $ extraInfo . transform
| Just True <- isItalic rPr = do
transform <- runStyleToTransform rPr{isItalic = Nothing}
return $ emph . transform
| Just True <- isBold rPr = do
transform <- runStyleToTransform rPr{isBold = Nothing}
return $ strong . transform
| Just True <- isSmallCaps rPr = do
transform <- runStyleToTransform rPr{isSmallCaps = Nothing}
return $ smallcaps . transform
| Just True <- isStrike rPr = do
transform <- runStyleToTransform rPr{isStrike = Nothing}
return $ strikeout . transform
| Just SupScrpt <- rVertAlign rPr = do
transform <- runStyleToTransform rPr{rVertAlign = Nothing}
return $ superscript . transform
| Just SubScrpt <- rVertAlign rPr = do
transform <- runStyleToTransform rPr{rVertAlign = Nothing}
return $ subscript . transform
| Just "single" <- rUnderline rPr = do
transform <- runStyleToTransform rPr{rUnderline = Nothing}
return $ underlineSpan . transform
| otherwise = return id
runToInlines :: PandocMonad m => Run -> DocxContext m Inlines
runToInlines (Run rs runElems)
| Just (s, _) <- rStyle rs
, s `elem` codeStyles = do
rPr <- resolveDependentRunStyle rs
let codeString = code $ concatMap runElemToString runElems
return $ case rVertAlign rPr of
Just SupScrpt -> superscript codeString
Just SubScrpt -> subscript codeString
_ -> codeString
| otherwise = do
rPr <- resolveDependentRunStyle rs
let ils = smushInlines (map runElemToInlines runElems)
transform <- runStyleToTransform rPr
return $ transform ils
runToInlines (Footnote bps) = do
blksList <- smushBlocks <$> mapM bodyPartToBlocks bps
return $ note blksList
runToInlines (Endnote bps) = do
blksList <- smushBlocks <$> mapM bodyPartToBlocks bps
return $ note blksList
runToInlines (InlineDrawing fp title alt bs ext) = do
(lift . lift) $ P.insertMedia fp Nothing bs
return $ imageWith (extentToAttr ext) fp title $ text alt
runToInlines InlineChart = return $ spanWith ("", ["chart"], []) $ text "[CHART]"
extentToAttr :: Extent -> Attr
extentToAttr (Just (w, h)) =
("", [], [("width", showDim w), ("height", showDim h)] )
where
showDim d = show (d / 914400) ++ "in"
extentToAttr _ = nullAttr
blocksToInlinesWarn :: PandocMonad m => String -> Blocks -> DocxContext m Inlines
blocksToInlinesWarn cmtId blks = do
let blkList = toList blks
notParaOrPlain :: Block -> Bool
notParaOrPlain (Para _) = False
notParaOrPlain (Plain _) = False
notParaOrPlain _ = True
unless ( not (any notParaOrPlain blkList)) $
lift $ P.report $ DocxParserWarning $
"Docx comment " ++ cmtId ++ " will not retain formatting"
return $ blocksToInlines' blkList
parPartToInlines :: PandocMonad m => ParPart -> DocxContext m Inlines
parPartToInlines parPart =
case parPart of
(BookMark _ anchor) | anchor `notElem` dummyAnchors -> do
inHdrBool <- asks docxInHeaderBlock
ils <- parPartToInlines' parPart
immedPrevAnchor <- gets docxImmedPrevAnchor
unless (isJust immedPrevAnchor || inHdrBool)
(modify $ \s -> s{ docxImmedPrevAnchor = Just anchor})
return ils
_ -> do
ils <- parPartToInlines' parPart
modify $ \s -> s{ docxImmedPrevAnchor = Nothing}
return ils
parPartToInlines' :: PandocMonad m => ParPart -> DocxContext m Inlines
parPartToInlines' (PlainRun r) = runToInlines r
parPartToInlines' (ChangedRuns (TrackedChange Insertion (ChangeInfo _ author date)) runs) = do
opts <- asks docxOptions
case readerTrackChanges opts of
AcceptChanges -> smushInlines <$> mapM runToInlines runs
RejectChanges -> return mempty
AllChanges -> do
ils <- smushInlines <$> mapM runToInlines runs
let attr = ("", ["insertion"], [("author", author), ("date", date)])
return $ spanWith attr ils
parPartToInlines' (ChangedRuns (TrackedChange Deletion (ChangeInfo _ author date)) runs) = do
opts <- asks docxOptions
case readerTrackChanges opts of
AcceptChanges -> return mempty
RejectChanges -> smushInlines <$> mapM runToInlines runs
AllChanges -> do
ils <- smushInlines <$> mapM runToInlines runs
let attr = ("", ["deletion"], [("author", author), ("date", date)])
return $ spanWith attr ils
parPartToInlines' (CommentStart cmtId author date bodyParts) = do
opts <- asks docxOptions
case readerTrackChanges opts of
AllChanges -> do
blks <- smushBlocks <$> mapM bodyPartToBlocks bodyParts
ils <- blocksToInlinesWarn cmtId blks
let attr = ("", ["comment-start"], [("id", cmtId), ("author", author), ("date", date)])
return $ spanWith attr ils
_ -> return mempty
parPartToInlines' (CommentEnd cmtId) = do
opts <- asks docxOptions
case readerTrackChanges opts of
AllChanges -> do
let attr = ("", ["comment-end"], [("id", cmtId)])
return $ spanWith attr mempty
_ -> return mempty
parPartToInlines' (BookMark _ anchor) | anchor `elem` dummyAnchors =
return mempty
parPartToInlines' (BookMark _ anchor) =
do
inHdrBool <- asks docxInHeaderBlock
anchorMap <- gets docxAnchorMap
immedPrevAnchor <- gets docxImmedPrevAnchor
case immedPrevAnchor of
Just prevAnchor -> do
unless inHdrBool
(modify $ \s -> s { docxAnchorMap = M.insert anchor prevAnchor anchorMap})
return mempty
Nothing -> do
exts <- readerExtensions <$> asks docxOptions
let newAnchor =
if not inHdrBool && anchor `elem` M.elems anchorMap
then uniqueIdent exts [Str anchor]
(Set.fromList $ M.elems anchorMap)
else anchor
unless inHdrBool
(modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap})
return $ spanWith (newAnchor, ["anchor"], []) mempty
parPartToInlines' (Drawing fp title alt bs ext) = do
(lift . lift) $ P.insertMedia fp Nothing bs
return $ imageWith (extentToAttr ext) fp title $ text alt
parPartToInlines' Chart =
return $ spanWith ("", ["chart"], []) $ text "[CHART]"
parPartToInlines' (InternalHyperLink anchor runs) = do
ils <- smushInlines <$> mapM runToInlines runs
return $ link ('#' : anchor) "" ils
parPartToInlines' (ExternalHyperLink target runs) = do
ils <- smushInlines <$> mapM runToInlines runs
return $ link target "" ils
parPartToInlines' (PlainOMath exps) =
return $ math $ writeTeX exps
parPartToInlines' (Field info runs) =
case info of
HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url runs
UnknownField -> smushInlines <$> mapM runToInlines runs
parPartToInlines' NullParPart = return mempty
isAnchorSpan :: Inline -> Bool
isAnchorSpan (Span (_, classes, kvs) _) =
classes == ["anchor"] &&
null kvs
isAnchorSpan _ = False
dummyAnchors :: [String]
dummyAnchors = ["_GoBack"]
makeHeaderAnchor :: PandocMonad m => Blocks -> DocxContext m Blocks
makeHeaderAnchor bs = traverse makeHeaderAnchor' bs
makeHeaderAnchor' :: PandocMonad m => Block -> DocxContext m Block
makeHeaderAnchor' (Header n (ident, classes, kvs) ils)
| (c:_) <- filter isAnchorSpan ils
, (Span (anchIdent, ["anchor"], _) cIls) <- c = do
hdrIDMap <- gets docxAnchorMap
exts <- readerExtensions <$> asks docxOptions
let newIdent = if null ident
then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap)
else ident
newIls = concatMap f ils where f il | il == c = cIls
| otherwise = [il]
modify $ \s -> s {docxAnchorMap = M.insert anchIdent newIdent hdrIDMap}
makeHeaderAnchor' $ Header n (newIdent, classes, kvs) newIls
makeHeaderAnchor' (Header n (ident, classes, kvs) ils) =
do
hdrIDMap <- gets docxAnchorMap
exts <- readerExtensions <$> asks docxOptions
let newIdent = if null ident
then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap)
else ident
modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap}
return $ Header n (newIdent, classes, kvs) ils
makeHeaderAnchor' blk = return blk
singleParaToPlain :: Blocks -> Blocks
singleParaToPlain blks
| (Para ils :< seeq) <- viewl $ unMany blks
, Seq.null seeq =
singleton $ Plain ils
singleParaToPlain blks = blks
cellToBlocks :: PandocMonad m => Cell -> DocxContext m Blocks
cellToBlocks (Cell bps) = do
blks <- smushBlocks <$> mapM bodyPartToBlocks bps
return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
rowToBlocksList :: PandocMonad m => Row -> DocxContext m [Blocks]
rowToBlocksList (Row cells) = do
blksList <- mapM cellToBlocks cells
return $ map singleParaToPlain blksList
trimSps :: Inlines -> Inlines
trimSps (Many ils) = Many $ Seq.dropWhileL isSp $Seq.dropWhileR isSp ils
where isSp Space = True
isSp SoftBreak = True
isSp LineBreak = True
isSp _ = False
parStyleToTransform :: PandocMonad m => ParagraphStyle -> DocxContext m (Blocks -> Blocks)
parStyleToTransform pPr
| (c:cs) <- pStyle pPr
, c `elem` divsToKeep = do
let pPr' = pPr { pStyle = cs }
transform <- parStyleToTransform pPr'
return $ divWith ("", [c], []) . transform
| (c:cs) <- pStyle pPr,
c `elem` listParagraphDivs = do
let pPr' = pPr { pStyle = cs, indentation = Nothing}
transform <- parStyleToTransform pPr'
return $ divWith ("", [c], []) . transform
| (c:cs) <- pStyle pPr
, Just True <- pBlockQuote pPr = do
opts <- asks docxOptions
let pPr' = pPr { pStyle = cs }
transform <- parStyleToTransform pPr'
let extraInfo = if isEnabled Ext_styles opts
then divWith ("", [], [("custom-style", c)])
else id
return $ extraInfo . blockQuote . transform
| (c:cs) <- pStyle pPr = do
opts <- asks docxOptions
let pPr' = pPr { pStyle = cs}
transform <- parStyleToTransform pPr'
let extraInfo = if isEnabled Ext_styles opts
then divWith ("", [], [("custom-style", c)])
else id
return $ extraInfo . transform
| null (pStyle pPr)
, Just left <- indentation pPr >>= leftParIndent
, Just hang <- indentation pPr >>= hangingParIndent = do
let pPr' = pPr { indentation = Nothing }
transform <- parStyleToTransform pPr'
return $ case (left - hang) > 0 of
True -> blockQuote . transform
False -> transform
| null (pStyle pPr),
Just left <- indentation pPr >>= leftParIndent = do
let pPr' = pPr { indentation = Nothing }
transform <- parStyleToTransform pPr'
return $ case left > 0 of
True -> blockQuote . transform
False -> transform
parStyleToTransform _ = return id
bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks
bodyPartToBlocks (Paragraph pPr parparts)
| not $ null $ codeDivs `intersect` (pStyle pPr) = do
transform <- parStyleToTransform pPr
return $
transform $
codeBlock $
concatMap parPartToString parparts
| Just (style, n) <- pHeading pPr = do
ils <-local (\s-> s{docxInHeaderBlock=True})
(smushInlines <$> mapM parPartToInlines parparts)
makeHeaderAnchor $
headerWith ("", delete style (pStyle pPr), []) n ils
| otherwise = do
ils <- (trimSps . smushInlines) <$> mapM parPartToInlines parparts
prevParaIls <- gets docxPrevPara
dropIls <- gets docxDropCap
let ils' = dropIls <> ils
if dropCap pPr
then do modify $ \s -> s { docxDropCap = ils' }
return mempty
else do modify $ \s -> s { docxDropCap = mempty }
let ils'' = prevParaIls <>
(if isNull prevParaIls then mempty else space) <>
ils'
opts <- asks docxOptions
case () of
_ | isNull ils'' && not (isEnabled Ext_empty_paragraphs opts) ->
return mempty
_ | Just (TrackedChange Insertion _) <- pChange pPr
, AcceptChanges <- readerTrackChanges opts -> do
modify $ \s -> s {docxPrevPara = mempty}
transform <- parStyleToTransform pPr
return $ transform $ para ils''
_ | Just (TrackedChange Insertion _) <- pChange pPr
, RejectChanges <- readerTrackChanges opts -> do
modify $ \s -> s {docxPrevPara = ils''}
return mempty
_ | Just (TrackedChange Insertion cInfo) <- pChange pPr
, AllChanges <- readerTrackChanges opts
, ChangeInfo _ cAuthor cDate <- cInfo -> do
let attr = ("", ["paragraph-insertion"], [("author", cAuthor), ("date", cDate)])
insertMark = spanWith attr mempty
transform <- parStyleToTransform pPr
return $ transform $
para $ ils'' <> insertMark
_ | Just (TrackedChange Deletion _) <- pChange pPr
, AcceptChanges <- readerTrackChanges opts -> do
modify $ \s -> s {docxPrevPara = ils''}
return mempty
_ | Just (TrackedChange Deletion _) <- pChange pPr
, RejectChanges <- readerTrackChanges opts -> do
modify $ \s -> s {docxPrevPara = mempty}
transform <- parStyleToTransform pPr
return $ transform $ para ils''
_ | Just (TrackedChange Deletion cInfo) <- pChange pPr
, AllChanges <- readerTrackChanges opts
, ChangeInfo _ cAuthor cDate <- cInfo -> do
let attr = ("", ["paragraph-deletion"], [("author", cAuthor), ("date", cDate)])
insertMark = spanWith attr mempty
transform <- parStyleToTransform pPr
return $ transform $
para $ ils'' <> insertMark
_ | otherwise -> do
modify $ \s -> s {docxPrevPara = mempty}
transform <- parStyleToTransform pPr
return $ transform $ para ils''
bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do
listState <- gets docxListState
let startFromState = M.lookup (numId, lvl) listState
Level _ fmt txt startFromLevelInfo = levelInfo
start = case startFromState of
Just n -> n + 1
Nothing -> fromMaybe 1 startFromLevelInfo
kvs = [ ("level", lvl)
, ("num-id", numId)
, ("format", fmt)
, ("text", txt)
, ("start", show start)
]
modify $ \st -> st{ docxListState = M.insert (numId, lvl) start listState}
blks <- bodyPartToBlocks (Paragraph pPr parparts)
return $ divWith ("", ["list-item"], kvs) blks
bodyPartToBlocks (ListItem pPr _ _ _ parparts) =
let pPr' = pPr {pStyle = "ListParagraph": pStyle pPr}
in
bodyPartToBlocks $ Paragraph pPr' parparts
bodyPartToBlocks (Tbl _ _ _ []) =
return $ para mempty
bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do
let caption = text cap
(hdr, rows) = case firstRowFormatting look of
True | null rs -> (Nothing, [r])
| otherwise -> (Just r, rs)
False -> (Nothing, r:rs)
cells <- mapM rowToBlocksList rows
let width = maybe 0 maximum $ nonEmpty $ map rowLength parts
nonEmpty [] = Nothing
nonEmpty l = Just l
rowLength :: Row -> Int
rowLength (Row c) = length c
let cells' = map (\row -> take width (row ++ repeat mempty)) cells
hdrCells <- case hdr of
Just r' -> rowToBlocksList r'
Nothing -> return $ replicate width mempty
let alignments = replicate width AlignDefault
widths = replicate width 0 :: [Double]
return $ table caption (zip alignments widths) hdrCells cells'
bodyPartToBlocks (OMathPara e) =
return $ para $ displayMath (writeTeX e)
rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline
rewriteLink' l@(Link attr ils ('#':target, title)) = do
anchorMap <- gets docxAnchorMap
case M.lookup target anchorMap of
Just newTarget -> do
modify $ \s -> s{docxAnchorSet = Set.insert newTarget (docxAnchorSet s)}
return $ Link attr ils ('#':newTarget, title)
Nothing -> do
modify $ \s -> s{docxAnchorSet = Set.insert target (docxAnchorSet s)}
return l
rewriteLink' il = return il
rewriteLinks :: PandocMonad m => [Block] -> DocxContext m [Block]
rewriteLinks = mapM (walkM rewriteLink')
removeOrphanAnchors'' :: PandocMonad m => Inline -> DocxContext m [Inline]
removeOrphanAnchors'' s@(Span (ident, classes, _) ils)
| "anchor" `elem` classes = do
anchorSet <- gets docxAnchorSet
return $ if ident `Set.member` anchorSet
then [s]
else ils
removeOrphanAnchors'' il = return [il]
removeOrphanAnchors' :: PandocMonad m => [Inline] -> DocxContext m [Inline]
removeOrphanAnchors' ils = liftM concat $ mapM removeOrphanAnchors'' ils
removeOrphanAnchors :: PandocMonad m => [Block] -> DocxContext m [Block]
removeOrphanAnchors = mapM (walkM removeOrphanAnchors')
bodyToOutput :: PandocMonad m => Body -> DocxContext m (Meta, [Block])
bodyToOutput (Body bps) = do
let (metabps, blkbps) = sepBodyParts bps
meta <- bodyPartsToMeta metabps
blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps
blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks
blks'' <- removeOrphanAnchors blks'
return (meta, blks'')
docxToOutput :: PandocMonad m
=> ReaderOptions
-> Docx
-> m (Meta, [Block])
docxToOutput opts (Docx (Document _ body)) =
let dEnv = def { docxOptions = opts} in
evalDocxContext (bodyToOutput body) dEnv def