{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ViewPatterns #-}
module Text.Pandoc.Readers.Docx
( readDocx
) where
import Codec.Archive.Zip
import Control.Monad.Reader
import Control.Monad.State.Strict
import qualified Data.ByteString.Lazy as B
import Data.Default (Default)
import Data.List (delete, intersect)
import Data.Char (isSpace)
import qualified Data.Map as M
import qualified Data.Text as T
import Data.Maybe (isJust, fromMaybe)
import Data.Sequence (ViewL (..), viewl)
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
import Text.Pandoc.Builder as Pandoc
import Text.Pandoc.MediaBag (MediaBag)
import Text.Pandoc.Options
import Text.Pandoc.Readers.Docx.Combine
import Text.Pandoc.Readers.Docx.Lists
import Text.Pandoc.Readers.Docx.Parse as Docx
import Text.Pandoc.Shared
import Text.Pandoc.Walk
import Text.TeXMath (writeTeX)
import Control.Monad.Except (throwError)
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import qualified Text.Pandoc.Class.PandocMonad as P
import Text.Pandoc.Error
import Text.Pandoc.Logging
readDocx :: PandocMonad m
=> ReaderOptions
-> B.ByteString
-> m Pandoc
readDocx opts bytes = do
case toArchiveOrFail bytes of
Right archive -> do
case archiveToDocxWithWarnings archive of
Right (docx, parserWarnings) -> do
mapM_ (P.report . DocxParserWarning) parserWarnings
(meta, blks) <- docxToOutput opts docx
return $ Pandoc meta blks
Left docxerr -> throwError $ PandocSomeError $
"couldn't parse docx file: " <> T.pack (show docxerr)
Left err -> throwError $ PandocSomeError $
"couldn't unpack docx container: " <> T.pack err
data DState = DState { docxAnchorMap :: M.Map T.Text T.Text
, docxAnchorSet :: Set.Set T.Text
, docxImmedPrevAnchor :: Maybe T.Text
, docxMediaBag :: MediaBag
, docxDropCap :: Inlines
, docxListState :: M.Map (T.Text, T.Text) Integer
, docxPrevPara :: Inlines
}
instance Default DState where
def = DState { docxAnchorMap = M.empty
, docxAnchorSet = mempty
, docxImmedPrevAnchor = Nothing
, docxMediaBag = mempty
, docxDropCap = mempty
, docxListState = M.empty
, docxPrevPara = mempty
}
data DEnv = DEnv { docxOptions :: ReaderOptions
, docxInHeaderBlock :: Bool
, docxInBidi :: Bool
}
instance Default DEnv where
def = DEnv def False False
type DocxContext m = ReaderT DEnv (StateT DState m)
evalDocxContext :: PandocMonad m => DocxContext m a -> DEnv -> DState -> m a
evalDocxContext ctx env st = flip evalStateT st $ runReaderT ctx env
spansToKeep :: [CharStyleName]
spansToKeep = []
divsToKeep :: [ParaStyleName]
divsToKeep = ["Definition", "Definition Term"]
metaStyles :: M.Map ParaStyleName T.Text
metaStyles = M.fromList [ ("Title", "title")
, ("Subtitle", "subtitle")
, ("Author", "author")
, ("Date", "date")
, ("Abstract", "abstract")]
sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart])
sepBodyParts = span (\bp -> isMetaPar bp || isEmptyPar bp)
isMetaPar :: BodyPart -> Bool
isMetaPar (Paragraph pPr _) =
not $ null $ intersect (getStyleNames $ pStyle pPr) (M.keys metaStyles)
isMetaPar _ = False
isEmptyPar :: BodyPart -> Bool
isEmptyPar (Paragraph _ parParts) =
all isEmptyParPart parParts
where
isEmptyParPart (PlainRun (Run _ runElems)) = all isEmptyElem runElems
isEmptyParPart _ = False
isEmptyElem (TextRun s) = trim s == ""
isEmptyElem _ = True
isEmptyPar _ = False
bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map T.Text MetaValue)
bodyPartsToMeta' [] = return M.empty
bodyPartsToMeta' (bp : bps)
| (Paragraph pPr parParts) <- bp
, (c : _)<- getStyleNames (pStyle pPr) `intersect` M.keys metaStyles
, (Just metaField) <- M.lookup c metaStyles = do
inlines <- smushInlines <$> mapM parPartToInlines parParts
remaining <- bodyPartsToMeta' bps
let
f (MetaInlines ils) (MetaInlines ils') = MetaBlocks [Para ils, Para ils']
f (MetaInlines ils) (MetaBlocks blks) = MetaBlocks (Para ils : blks)
f m (MetaList mv) = MetaList (m : mv)
f m n = MetaList [m, n]
return $ M.insertWith f metaField (MetaInlines (toList inlines)) remaining
bodyPartsToMeta' (_ : bps) = bodyPartsToMeta' bps
bodyPartsToMeta :: PandocMonad m => [BodyPart] -> DocxContext m Meta
bodyPartsToMeta bps = do
mp <- bodyPartsToMeta' bps
let mp' =
case M.lookup "author" mp of
Just mv -> M.insert "author" (fixAuthors mv) mp
Nothing -> mp
return $ Meta mp'
fixAuthors :: MetaValue -> MetaValue
fixAuthors (MetaBlocks blks) = MetaList [MetaInlines ils | Para ils <- blks]
fixAuthors mv = mv
isInheritedFromStyles :: (Eq (StyleName s), HasStyleName s, HasParentStyle s) => [StyleName s] -> s -> Bool
isInheritedFromStyles names sty
| getStyleName sty `elem` names = True
| Just psty <- getParentStyle sty = isInheritedFromStyles names psty
| otherwise = False
hasStylesInheritedFrom :: [ParaStyleName] -> ParagraphStyle -> Bool
hasStylesInheritedFrom ns s = any (isInheritedFromStyles ns) $ pStyle s
removeStyleNamed :: ParaStyleName -> ParagraphStyle -> ParagraphStyle
removeStyleNamed sn ps = ps{pStyle = filter (\psd -> getStyleName psd /= sn) $ pStyle ps}
isCodeCharStyle :: CharStyle -> Bool
isCodeCharStyle = isInheritedFromStyles ["Verbatim Char"]
isCodeDiv :: ParagraphStyle -> Bool
isCodeDiv = hasStylesInheritedFrom ["Source Code"]
isBlockQuote :: ParStyle -> Bool
isBlockQuote =
isInheritedFromStyles [
"Quote", "Block Text", "Block Quote", "Block Quotation"
]
runElemToInlines :: RunElem -> Inlines
runElemToInlines (TextRun s) = text s
runElemToInlines LnBrk = linebreak
runElemToInlines Tab = space
runElemToInlines SoftHyphen = text "\xad"
runElemToInlines NoBreakHyphen = text "\x2011"
runElemToText :: RunElem -> T.Text
runElemToText (TextRun s) = s
runElemToText LnBrk = T.singleton '\n'
runElemToText Tab = T.singleton '\t'
runElemToText SoftHyphen = T.singleton '\xad'
runElemToText NoBreakHyphen = T.singleton '\x2011'
runToText :: Run -> T.Text
runToText (Run _ runElems) = T.concat $ map runElemToText runElems
runToText _ = ""
parPartToText :: ParPart -> T.Text
parPartToText (PlainRun run) = runToText run
parPartToText (InternalHyperLink _ runs) = T.concat $ map runToText runs
parPartToText (ExternalHyperLink _ runs) = T.concat $ map runToText runs
parPartToText _ = ""
blacklistedCharStyles :: [CharStyleName]
blacklistedCharStyles = ["Hyperlink"]
resolveDependentRunStyle :: PandocMonad m => RunStyle -> DocxContext m RunStyle
resolveDependentRunStyle rPr
| Just s <- rParentStyle rPr
, getStyleName s `notElem` blacklistedCharStyles = do
opts <- asks docxOptions
if isEnabled Ext_styles opts
then return rPr
else leftBiasedMergeRunStyle rPr <$> resolveDependentRunStyle (cStyleData s)
| otherwise = return rPr
runStyleToTransform :: PandocMonad m => RunStyle -> DocxContext m (Inlines -> Inlines)
runStyleToTransform rPr
| Just sn <- getStyleName <$> rParentStyle rPr
, sn `elem` spansToKeep = do
transform <- runStyleToTransform rPr{rParentStyle = Nothing}
return $ spanWith ("", [normalizeToClassName sn], []) . transform
| Just s <- rParentStyle rPr = do
ei <- extraInfo spanWith s
transform <- runStyleToTransform rPr{rParentStyle = Nothing}
return $ ei . transform
| Just True <- isItalic rPr = do
transform <- runStyleToTransform rPr{isItalic = Nothing}
return $ emph . transform
| Just True <- isBold rPr = do
transform <- runStyleToTransform rPr{isBold = Nothing}
return $ strong . transform
| Just True <- isSmallCaps rPr = do
transform <- runStyleToTransform rPr{isSmallCaps = Nothing}
return $ smallcaps . transform
| Just True <- isStrike rPr = do
transform <- runStyleToTransform rPr{isStrike = Nothing}
return $ strikeout . transform
| Just True <- isRTL rPr = do
transform <- runStyleToTransform rPr{isRTL = Nothing}
return $ spanWith ("",[],[("dir","rtl")]) . transform
| Just False <- isRTL rPr = do
transform <- runStyleToTransform rPr{isRTL = Nothing}
inBidi <- asks docxInBidi
return $ if inBidi
then spanWith ("",[],[("dir","ltr")]) . transform
else transform
| Just SupScrpt <- rVertAlign rPr = do
transform <- runStyleToTransform rPr{rVertAlign = Nothing}
return $ superscript . transform
| Just SubScrpt <- rVertAlign rPr = do
transform <- runStyleToTransform rPr{rVertAlign = Nothing}
return $ subscript . transform
| Just "single" <- rUnderline rPr = do
transform <- runStyleToTransform rPr{rUnderline = Nothing}
return $ Pandoc.underline . transform
| otherwise = return id
runToInlines :: PandocMonad m => Run -> DocxContext m Inlines
runToInlines (Run rs runElems)
| maybe False isCodeCharStyle $ rParentStyle rs = do
rPr <- resolveDependentRunStyle rs
let codeString = code $ T.concat $ map runElemToText runElems
return $ case rVertAlign rPr of
Just SupScrpt -> superscript codeString
Just SubScrpt -> subscript codeString
_ -> codeString
| otherwise = do
rPr <- resolveDependentRunStyle rs
let ils = smushInlines (map runElemToInlines runElems)
transform <- runStyleToTransform rPr
return $ transform ils
runToInlines (Footnote bps) = note . smushBlocks <$> mapM bodyPartToBlocks bps
runToInlines (Endnote bps) = note . smushBlocks <$> mapM bodyPartToBlocks bps
runToInlines (InlineDrawing fp title alt bs ext) = do
(lift . lift) $ P.insertMedia fp Nothing bs
return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt
runToInlines InlineChart = return $ spanWith ("", ["chart"], []) $ text "[CHART]"
extentToAttr :: Extent -> Attr
extentToAttr (Just (w, h)) =
("", [], [("width", showDim w), ("height", showDim h)] )
where
showDim d = tshow (d / 914400) <> "in"
extentToAttr _ = nullAttr
blocksToInlinesWarn :: PandocMonad m => T.Text -> Blocks -> DocxContext m Inlines
blocksToInlinesWarn cmtId blks = do
let paraOrPlain :: Block -> Bool
paraOrPlain (Para _) = True
paraOrPlain (Plain _) = True
paraOrPlain _ = False
unless (all paraOrPlain blks) $
lift $ P.report $ DocxParserWarning $
"Docx comment " <> cmtId <> " will not retain formatting"
return $ blocksToInlines' (toList blks)
parPartToInlines :: PandocMonad m => ParPart -> DocxContext m Inlines
parPartToInlines parPart =
case parPart of
(BookMark _ anchor) | anchor `notElem` dummyAnchors -> do
inHdrBool <- asks docxInHeaderBlock
ils <- parPartToInlines' parPart
immedPrevAnchor <- gets docxImmedPrevAnchor
unless (isJust immedPrevAnchor || inHdrBool)
(modify $ \s -> s{ docxImmedPrevAnchor = Just anchor})
return ils
_ -> do
ils <- parPartToInlines' parPart
modify $ \s -> s{ docxImmedPrevAnchor = Nothing}
return ils
parPartToInlines' :: PandocMonad m => ParPart -> DocxContext m Inlines
parPartToInlines' (PlainRun r) = runToInlines r
parPartToInlines' (ChangedRuns (TrackedChange Insertion (ChangeInfo _ author date)) runs) = do
opts <- asks docxOptions
case readerTrackChanges opts of
AcceptChanges -> smushInlines <$> mapM runToInlines runs
RejectChanges -> return mempty
AllChanges -> do
ils <- smushInlines <$> mapM runToInlines runs
let attr = ("", ["insertion"], [("author", author), ("date", date)])
return $ spanWith attr ils
parPartToInlines' (ChangedRuns (TrackedChange Deletion (ChangeInfo _ author date)) runs) = do
opts <- asks docxOptions
case readerTrackChanges opts of
AcceptChanges -> return mempty
RejectChanges -> smushInlines <$> mapM runToInlines runs
AllChanges -> do
ils <- smushInlines <$> mapM runToInlines runs
let attr = ("", ["deletion"], [("author", author), ("date", date)])
return $ spanWith attr ils
parPartToInlines' (CommentStart cmtId author date bodyParts) = do
opts <- asks docxOptions
case readerTrackChanges opts of
AllChanges -> do
blks <- smushBlocks <$> mapM bodyPartToBlocks bodyParts
ils <- blocksToInlinesWarn cmtId blks
let attr = ("", ["comment-start"], [("id", cmtId), ("author", author), ("date", date)])
return $ spanWith attr ils
_ -> return mempty
parPartToInlines' (CommentEnd cmtId) = do
opts <- asks docxOptions
case readerTrackChanges opts of
AllChanges -> do
let attr = ("", ["comment-end"], [("id", cmtId)])
return $ spanWith attr mempty
_ -> return mempty
parPartToInlines' (BookMark _ anchor) | anchor `elem` dummyAnchors =
return mempty
parPartToInlines' (BookMark _ anchor) =
do
inHdrBool <- asks docxInHeaderBlock
anchorMap <- gets docxAnchorMap
immedPrevAnchor <- gets docxImmedPrevAnchor
case immedPrevAnchor of
Just prevAnchor -> do
unless inHdrBool
(modify $ \s -> s { docxAnchorMap = M.insert anchor prevAnchor anchorMap})
return mempty
Nothing -> do
exts <- readerExtensions <$> asks docxOptions
let newAnchor =
if not inHdrBool && anchor `elem` M.elems anchorMap
then uniqueIdent exts [Str anchor]
(Set.fromList $ M.elems anchorMap)
else anchor
unless inHdrBool
(modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap})
return $ spanWith (newAnchor, ["anchor"], []) mempty
parPartToInlines' (Drawing fp title alt bs ext) = do
(lift . lift) $ P.insertMedia fp Nothing bs
return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt
parPartToInlines' Chart =
return $ spanWith ("", ["chart"], []) $ text "[CHART]"
parPartToInlines' (InternalHyperLink anchor runs) = do
ils <- smushInlines <$> mapM runToInlines runs
return $ link ("#" <> anchor) "" ils
parPartToInlines' (ExternalHyperLink target runs) = do
ils <- smushInlines <$> mapM runToInlines runs
return $ link target "" ils
parPartToInlines' (PlainOMath exps) =
return $ math $ writeTeX exps
parPartToInlines' (Field info runs) =
case info of
HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url runs
UnknownField -> smushInlines <$> mapM runToInlines runs
parPartToInlines' NullParPart = return mempty
isAnchorSpan :: Inline -> Bool
isAnchorSpan (Span (_, ["anchor"], []) _) = True
isAnchorSpan _ = False
dummyAnchors :: [T.Text]
dummyAnchors = ["_GoBack"]
makeHeaderAnchor :: PandocMonad m => Blocks -> DocxContext m Blocks
makeHeaderAnchor bs = traverse makeHeaderAnchor' bs
makeHeaderAnchor' :: PandocMonad m => Block -> DocxContext m Block
makeHeaderAnchor' (Header n (ident, classes, kvs) ils)
| (c:_) <- filter isAnchorSpan ils
, (Span (anchIdent, ["anchor"], _) cIls) <- c = do
hdrIDMap <- gets docxAnchorMap
exts <- readerExtensions <$> asks docxOptions
let newIdent = if T.null ident
then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap)
else ident
newIls = concatMap f ils where f il | il == c = cIls
| otherwise = [il]
modify $ \s -> s {docxAnchorMap = M.insert anchIdent newIdent hdrIDMap}
makeHeaderAnchor' $ Header n (newIdent, classes, kvs) newIls
makeHeaderAnchor' (Header n (ident, classes, kvs) ils) =
do
hdrIDMap <- gets docxAnchorMap
exts <- readerExtensions <$> asks docxOptions
let newIdent = if T.null ident
then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap)
else ident
modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap}
return $ Header n (newIdent, classes, kvs) ils
makeHeaderAnchor' blk = return blk
singleParaToPlain :: Blocks -> Blocks
singleParaToPlain blks
| (Para ils :< seeq) <- viewl $ unMany blks
, Seq.null seeq =
singleton $ Plain ils
singleParaToPlain blks = blks
cellToBlocks :: PandocMonad m => Docx.Cell -> DocxContext m Blocks
cellToBlocks (Docx.Cell bps) = do
blks <- smushBlocks <$> mapM bodyPartToBlocks bps
return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks
rowToBlocksList :: PandocMonad m => Docx.Row -> DocxContext m [Blocks]
rowToBlocksList (Docx.Row cells) = do
blksList <- mapM cellToBlocks cells
return $ map singleParaToPlain blksList
trimSps :: Inlines -> Inlines
trimSps (Many ils) = Many $ Seq.dropWhileL isSp $Seq.dropWhileR isSp ils
where isSp Space = True
isSp SoftBreak = True
isSp LineBreak = True
isSp _ = False
extraInfo :: (Eq (StyleName a), PandocMonad m, HasStyleName a)
=> (Attr -> i -> i) -> a -> DocxContext m (i -> i)
extraInfo f s = do
opts <- asks docxOptions
return $ if isEnabled Ext_styles opts
then f ("", [], [("custom-style", fromStyleName $ getStyleName s)])
else id
parStyleToTransform :: PandocMonad m => ParagraphStyle -> DocxContext m (Blocks -> Blocks)
parStyleToTransform pPr = case pStyle pPr of
c@(getStyleName -> styleName):cs
| styleName `elem` divsToKeep -> do
let pPr' = pPr { pStyle = cs }
transform <- parStyleToTransform pPr'
return $ divWith ("", [normalizeToClassName styleName], []) . transform
| styleName `elem` listParagraphStyles -> do
let pPr' = pPr { pStyle = cs, indentation = Nothing}
transform <- parStyleToTransform pPr'
return $ divWith ("", [normalizeToClassName styleName], []) . transform
| otherwise -> do
let pPr' = pPr { pStyle = cs }
transform <- parStyleToTransform pPr'
ei <- extraInfo divWith c
return $ ei . (if isBlockQuote c then blockQuote else id) . transform
[]
| Just left <- indentation pPr >>= leftParIndent -> do
let pPr' = pPr { indentation = Nothing }
hang = fromMaybe 0 $ indentation pPr >>= hangingParIndent
transform <- parStyleToTransform pPr'
return $ if (left - hang) > 0
then blockQuote . transform
else transform
| otherwise -> return id
normalizeToClassName :: (FromStyleName a) => a -> T.Text
normalizeToClassName = T.map go . fromStyleName
where go c | isSpace c = '-'
| otherwise = c
bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks
bodyPartToBlocks (Paragraph pPr parparts)
| Just True <- pBidi pPr = do
let pPr' = pPr { pBidi = Nothing }
local (\s -> s{ docxInBidi = True })
(bodyPartToBlocks (Paragraph pPr' parparts))
| isCodeDiv pPr = do
transform <- parStyleToTransform pPr
return $
transform $
codeBlock $
T.concat $
map parPartToText parparts
| Just (style, n) <- pHeading pPr = do
ils <-local (\s-> s{docxInHeaderBlock=True})
(smushInlines <$> mapM parPartToInlines parparts)
makeHeaderAnchor $
headerWith ("", map normalizeToClassName . delete style $ getStyleNames (pStyle pPr), []) n ils
| otherwise = do
ils <- trimSps . smushInlines <$> mapM parPartToInlines parparts
prevParaIls <- gets docxPrevPara
dropIls <- gets docxDropCap
let ils' = dropIls <> ils
let (paraOrPlain, pPr')
| hasStylesInheritedFrom ["Compact"] pPr = (plain, removeStyleNamed "Compact" pPr)
| otherwise = (para, pPr)
if dropCap pPr'
then do modify $ \s -> s { docxDropCap = ils' }
return mempty
else do modify $ \s -> s { docxDropCap = mempty }
let ils'' = (if isNull prevParaIls then mempty
else prevParaIls <> space) <> ils'
handleInsertion = do
modify $ \s -> s {docxPrevPara = mempty}
transform <- parStyleToTransform pPr'
return $ transform $ paraOrPlain ils''
opts <- asks docxOptions
case (pChange pPr', readerTrackChanges opts) of
_ | isNull ils'', not (isEnabled Ext_empty_paragraphs opts) ->
return mempty
(Just (TrackedChange Insertion _), AcceptChanges) ->
handleInsertion
(Just (TrackedChange Insertion _), RejectChanges) -> do
modify $ \s -> s {docxPrevPara = ils''}
return mempty
(Just (TrackedChange Insertion (ChangeInfo _ cAuthor cDate))
, AllChanges) -> do
let attr = ("", ["paragraph-insertion"], [("author", cAuthor), ("date", cDate)])
insertMark = spanWith attr mempty
transform <- parStyleToTransform pPr'
return $ transform $
paraOrPlain $ ils'' <> insertMark
(Just (TrackedChange Deletion _), AcceptChanges) -> do
modify $ \s -> s {docxPrevPara = ils''}
return mempty
(Just (TrackedChange Deletion _), RejectChanges) ->
handleInsertion
(Just (TrackedChange Deletion (ChangeInfo _ cAuthor cDate))
, AllChanges) -> do
let attr = ("", ["paragraph-deletion"], [("author", cAuthor), ("date", cDate)])
insertMark = spanWith attr mempty
transform <- parStyleToTransform pPr'
return $ transform $
paraOrPlain $ ils'' <> insertMark
_ -> handleInsertion
bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do
listState <- gets docxListState
let startFromState = M.lookup (numId, lvl) listState
Level _ fmt txt startFromLevelInfo = levelInfo
start = case startFromState of
Just n -> n + 1
Nothing -> fromMaybe 1 startFromLevelInfo
kvs = [ ("level", lvl)
, ("num-id", numId)
, ("format", fmt)
, ("text", txt)
, ("start", tshow start)
]
modify $ \st -> st{ docxListState =
let notExpired (_, lvl') _ = lvl' <= lvl
in M.insert (numId, lvl) start (M.filterWithKey notExpired listState) }
blks <- bodyPartToBlocks (Paragraph pPr parparts)
return $ divWith ("", ["list-item"], kvs) blks
bodyPartToBlocks (ListItem pPr _ _ _ parparts) =
let pPr' = pPr {pStyle = constructBogusParStyleData "list-paragraph": pStyle pPr}
in
bodyPartToBlocks $ Paragraph pPr' parparts
bodyPartToBlocks (Tbl _ _ _ []) =
return $ para mempty
bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do
let cap' = simpleCaption $ plain $ text cap
(hdr, rows) = case firstRowFormatting look of
True | null rs -> (Nothing, [r])
| otherwise -> (Just r, rs)
False -> (Nothing, r:rs)
cells <- mapM rowToBlocksList rows
let width = maybe 0 maximum $ nonEmpty $ map rowLength parts
nonEmpty [] = Nothing
nonEmpty l = Just l
rowLength :: Docx.Row -> Int
rowLength (Docx.Row c) = length c
let toRow = Pandoc.Row nullAttr . map simpleCell
toHeaderRow l = if null l then [] else [toRow l]
let cells' = map (\row -> toRow $ take width (row ++ repeat mempty)) cells
hdrCells <- case hdr of
Just r' -> toHeaderRow <$> rowToBlocksList r'
Nothing -> return []
let alignments = replicate width AlignDefault
widths = replicate width ColWidthDefault
return $ table cap'
(zip alignments widths)
(TableHead nullAttr hdrCells)
[TableBody nullAttr 0 [] cells']
(TableFoot nullAttr [])
bodyPartToBlocks (OMathPara e) =
return $ para $ displayMath (writeTeX e)
rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline
rewriteLink' l@(Link attr ils (T.uncons -> Just ('#',target), title)) = do
anchorMap <- gets docxAnchorMap
case M.lookup target anchorMap of
Just newTarget -> do
modify $ \s -> s{docxAnchorSet = Set.insert newTarget (docxAnchorSet s)}
return $ Link attr ils ("#" <> newTarget, title)
Nothing -> do
modify $ \s -> s{docxAnchorSet = Set.insert target (docxAnchorSet s)}
return l
rewriteLink' il = return il
rewriteLinks :: PandocMonad m => [Block] -> DocxContext m [Block]
rewriteLinks = mapM (walkM rewriteLink')
removeOrphanAnchors'' :: PandocMonad m => Inline -> DocxContext m [Inline]
removeOrphanAnchors'' s@(Span (ident, classes, _) ils)
| "anchor" `elem` classes = do
anchorSet <- gets docxAnchorSet
return $ if ident `Set.member` anchorSet
then [s]
else ils
removeOrphanAnchors'' il = return [il]
removeOrphanAnchors' :: PandocMonad m => [Inline] -> DocxContext m [Inline]
removeOrphanAnchors' ils = liftM concat $ mapM removeOrphanAnchors'' ils
removeOrphanAnchors :: PandocMonad m => [Block] -> DocxContext m [Block]
removeOrphanAnchors = mapM (walkM removeOrphanAnchors')
bodyToOutput :: PandocMonad m => Body -> DocxContext m (Meta, [Block])
bodyToOutput (Body bps) = do
let (metabps, blkbps) = sepBodyParts bps
meta <- bodyPartsToMeta metabps
blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps
blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks
blks'' <- removeOrphanAnchors blks'
return (meta, blks'')
docxToOutput :: PandocMonad m
=> ReaderOptions
-> Docx
-> m (Meta, [Block])
docxToOutput opts (Docx (Document _ body)) =
let dEnv = def { docxOptions = opts} in
evalDocxContext (bodyToOutput body) dEnv def