{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.Pandoc.Writers.FB2 (writeFB2) where
import Control.Monad (zipWithM)
import Control.Monad.Except (catchError)
import Control.Monad.State.Strict (StateT, evalStateT, get, gets, lift, liftM, modify)
import Data.ByteString.Base64 (encode)
import Data.Char (isAscii, isControl, isSpace)
import Data.Either (lefts, rights)
import Data.List (intercalate)
import Data.Text (Text, pack)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Network.HTTP (urlEncode)
import Text.XML.Light
import qualified Text.XML.Light as X
import qualified Text.XML.Light.Cursor as XC
import qualified Text.XML.Light.Input as XI
import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
import qualified Text.Pandoc.Class.PandocMonad as P
import Text.Pandoc.Definition
import Text.Pandoc.Logging
import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def)
import Text.Pandoc.Shared (capitalize, isURI, orderedListMarkers,
makeSections, tshow, stringify)
import Text.Pandoc.Writers.Shared (lookupMetaString, toLegacyTable)
data FbRenderState = FbRenderState
{ footnotes :: [ (Int, Text, [Content]) ]
, imagesToFetch :: [ (Text, Text) ]
, parentListMarker :: Text
, writerOptions :: WriterOptions
} deriving (Show)
type FBM m = StateT FbRenderState m
newFB :: FbRenderState
newFB = FbRenderState { footnotes = [], imagesToFetch = []
, parentListMarker = ""
, writerOptions = def }
data ImageMode = NormalImage | InlineImage deriving (Eq)
instance Show ImageMode where
show NormalImage = "imageType"
show InlineImage = "inlineImageType"
writeFB2 :: PandocMonad m
=> WriterOptions
-> Pandoc
-> m Text
writeFB2 opts doc = flip evalStateT newFB $ pandocToFB2 opts doc
pandocToFB2 :: PandocMonad m
=> WriterOptions
-> Pandoc
-> FBM m Text
pandocToFB2 opts (Pandoc meta blocks) = do
modify (\s -> s { writerOptions = opts })
desc <- description meta
title <- cMapM toXml . docTitle $ meta
secs <- renderSections 1 blocks
let body = el "body" $ el "title" (el "p" title) : secs
notes <- renderFootnotes
(imgs,missing) <- fmap imagesToFetch get >>= \s -> lift (fetchImages s)
let body' = replaceImagesWithAlt missing body
let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs)
return $ pack $ xml_head ++ showContent fb2_xml ++ "\n"
where
xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
fb2_attrs =
let xmlns = "http://www.gribuser.ru/xml/fictionbook/2.0"
xlink = "http://www.w3.org/1999/xlink"
in [ uattr "xmlns" xmlns
, attr ("xmlns", "l") xlink ]
description :: PandocMonad m => Meta -> FBM m Content
description meta' = do
let genre = case lookupMetaString "genre" meta' of
"" -> el "genre" ("unrecognised" :: String)
s -> el "genre" (T.unpack s)
bt <- booktitle meta'
let as = authors meta'
dd <- docdate meta'
annotation <- case lookupMeta "abstract" meta' of
Just (MetaBlocks bs) -> list . el "annotation" <$> cMapM blockToXml (map unPlain bs)
_ -> pure mempty
let lang = case lookupMeta "lang" meta' of
Just (MetaInlines [Str s]) -> [el "lang" $ iso639 s]
Just (MetaString s) -> [el "lang" $ iso639 s]
_ -> []
where iso639 = T.unpack . T.takeWhile (/= '-')
let coverimage url = do
let img = Image nullAttr mempty (url, "")
im <- insertImage InlineImage img
return [el "coverpage" im]
coverpage <- case lookupMeta "cover-image" meta' of
Just (MetaInlines ils) -> coverimage (stringify ils)
Just (MetaString s) -> coverimage s
_ -> return []
return $ el "description"
[ el "title-info" (genre :
(as ++ bt ++ annotation ++ dd ++ coverpage ++ lang))
, el "document-info" [el "program-used" ("pandoc" :: String)]
]
booktitle :: PandocMonad m => Meta -> FBM m [Content]
booktitle meta' = do
t <- cMapM toXml . docTitle $ meta'
return $ [el "book-title" t | not (null t)]
authors :: Meta -> [Content]
authors meta' = cMap author (docAuthors meta')
author :: [Inline] -> [Content]
author ss =
let ws = words . cMap plain $ ss
email = el "email" <$> take 1 (filter ('@' `elem`) ws)
ws' = filter ('@' `notElem`) ws
names = case ws' of
[nickname] -> [ el "nickname" nickname ]
[fname, lname] -> [ el "first-name" fname
, el "last-name" lname ]
(fname:rest) -> [ el "first-name" fname
, el "middle-name" (concat . init $ rest)
, el "last-name" (last rest) ]
[] -> []
in list $ el "author" (names ++ email)
docdate :: PandocMonad m => Meta -> FBM m [Content]
docdate meta' = do
let ss = docDate meta'
d <- cMapM toXml ss
return $ [el "date" d | not (null d)]
renderSections :: PandocMonad m => Int -> [Block] -> FBM m [Content]
renderSections level blocks = do
let blocks' = makeSections False Nothing blocks
let isSection (Div (_,"section":_,_) (Header{}:_)) = True
isSection _ = False
let (initialBlocks, secs) = break isSection blocks'
let blocks'' = if null initialBlocks
then blocks'
else Div ("",["section"],[])
(Header 1 nullAttr mempty : initialBlocks) : secs
cMapM (renderSection level) blocks''
renderSection :: PandocMonad m => Int -> Block -> FBM m [Content]
renderSection lvl (Div (id',"section":_,_) (Header _ _ title : xs)) = do
title' <- if null title
then return []
else list . el "title" <$> formatTitle title
content <- cMapM (renderSection (lvl + 1)) xs
let sectionContent = if T.null id'
then el "section" (title' ++ content)
else el "section" ([uattr "id" id'], title' ++ content)
return [sectionContent]
renderSection _ b = blockToXml b
formatTitle :: PandocMonad m => [Inline] -> FBM m [Content]
formatTitle inlines =
cMapM (blockToXml . Para) $ split (== LineBreak) inlines
split :: (a -> Bool) -> [a] -> [[a]]
split _ [] = []
split cond xs = let (b,a) = break cond xs
in (b:split cond (drop 1 a))
isLineBreak :: Inline -> Bool
isLineBreak LineBreak = True
isLineBreak _ = False
renderFootnotes :: PandocMonad m => FBM m [Content]
renderFootnotes = do
fns <- footnotes `liftM` get
if null fns
then return []
else return . list $
el "body" ([uattr "name" "notes"], map renderFN (reverse fns))
where
renderFN (n, idstr, cs) =
let fn_texts = el "title" (el "p" (show n)) : cs
in el "section" ([uattr "id" idstr], fn_texts)
fetchImages :: PandocMonad m => [(Text,Text)] -> m ([Content],[Text])
fetchImages links = do
imgs <- mapM (uncurry fetchImage) links
return (rights imgs, lefts imgs)
fetchImage :: PandocMonad m => Text -> Text -> m (Either Text Content)
fetchImage href link = do
mbimg <-
case (isURI link, readDataURI link) of
(True, Just (mime,_,True,base64)) ->
let mime' = T.toLower mime
in if mime' == "image/png" || mime' == "image/jpeg"
then return (Just (mime',base64))
else return Nothing
(True, Just _) -> return Nothing
_ ->
catchError (do (bs, mbmime) <- P.fetchItem link
case mbmime of
Nothing -> do
report $ CouldNotDetermineMimeType link
return Nothing
Just mime -> return $ Just (mime,
TE.decodeUtf8 $ encode bs))
(\e ->
do report $ CouldNotFetchResource link (tshow e)
return Nothing)
case mbimg of
Just (imgtype, imgdata) ->
return . Right $ el "binary"
( [uattr "id" href
, uattr "content-type" imgtype]
, txt imgdata )
_ -> return (Left ("#" <> href))
readDataURI :: Text
-> Maybe (Text,Text,Bool,Text)
readDataURI uri =
case T.stripPrefix "data:" uri of
Nothing -> Nothing
Just rest ->
let meta = T.takeWhile (/= ',') rest
uridata = T.drop (T.length meta + 1) rest
parts = T.split (== ';') meta
(mime,cs,enc)=foldr upd ("text/plain","US-ASCII",False) parts
in Just (mime,cs,enc,uridata)
where
upd str m@(mime,cs,enc)
| isMimeType str = (str,cs,enc)
| Just str' <- T.stripPrefix "charset=" str = (mime,str',enc)
| str == "base64" = (mime,cs,True)
| otherwise = m
isMimeType :: Text -> Bool
isMimeType s =
case T.split (=='/') s of
[mtype,msubtype] ->
(T.toLower mtype `elem` types
|| "x-" `T.isPrefixOf` T.toLower mtype)
&& T.all valid mtype
&& T.all valid msubtype
_ -> False
where
types = ["text","image","audio","video","application","message","multipart"]
valid c = isAscii c && not (isControl c) && not (isSpace c) &&
c `notElem` ("()<>@,;:\\\"/[]?=" :: String)
footnoteID :: Int -> Text
footnoteID i = "n" <> tshow i
mkitem :: PandocMonad m => Text -> [Block] -> FBM m [Content]
mkitem mrk bs = do
pmrk <- gets parentListMarker
let nmrk = pmrk <> mrk <> " "
modify (\s -> s { parentListMarker = nmrk})
item <- cMapM blockToXml $ plainToPara $ indentBlocks nmrk bs
modify (\s -> s { parentListMarker = pmrk })
return item
blockToXml :: PandocMonad m => Block -> FBM m [Content]
blockToXml (Plain ss) = cMapM toXml ss
blockToXml (Para [Math DisplayMath formula]) = insertMath NormalImage formula
blockToXml (Para [Image atr alt (src,tgt)])
| Just tit <- T.stripPrefix "fig:" tgt
= insertImage NormalImage (Image atr alt (src,tit))
blockToXml (Para ss) = list . el "p" <$> cMapM toXml ss
blockToXml (CodeBlock _ s) = return . spaceBeforeAfter .
map (el "p" . el "code" . T.unpack) . T.lines $ s
blockToXml (RawBlock f str) =
if f == Format "fb2"
then return $ XI.parseXML str
else return []
blockToXml (Div _ bs) = cMapM blockToXml bs
blockToXml (BlockQuote bs) = list . el "cite" <$> cMapM blockToXml bs
blockToXml (LineBlock lns) =
list . el "poem" <$> mapM stanza (split null lns)
where
v xs = el "v" <$> cMapM toXml xs
stanza xs = el "stanza" <$> mapM v xs
blockToXml (OrderedList a bss) =
concat <$> zipWithM mkitem markers bss
where
markers = orderedListMarkers a
blockToXml (BulletList bss) =
cMapM (mkitem "•") bss
blockToXml (DefinitionList defs) =
cMapM mkdef defs
where
mkdef (term, bss) = do
items <- cMapM (cMapM blockToXml . plainToPara . indentBlocks (T.replicate 4 " ")) bss
t <- wrap "strong" term
return (el "p" t : items)
blockToXml h@Header{} = do
report $ BlockNotRendered h
return []
blockToXml HorizontalRule = return [ el "empty-line" () ]
blockToXml (Table _ blkCapt specs thead tbody tfoot) = do
let (caption, aligns, _, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot
hd <- if null headers then pure [] else (:[]) <$> mkrow "th" headers aligns
bd <- mapM (\r -> mkrow "td" r aligns) rows
c <- el "emphasis" <$> cMapM toXml caption
return [el "table" (hd <> bd), el "p" c]
where
mkrow :: PandocMonad m => String -> [[Block]] -> [Alignment] -> FBM m Content
mkrow tag cells aligns' =
el "tr" <$> mapM (mkcell tag) (zip cells aligns')
mkcell :: PandocMonad m => String -> ([Block], Alignment) -> FBM m Content
mkcell tag (cell, align) = do
cblocks <- cMapM blockToXml cell
return $ el tag ([align_attr align], cblocks)
align_attr a = Attr (QName "align" Nothing Nothing) (align_str a)
align_str AlignLeft = "left"
align_str AlignCenter = "center"
align_str AlignRight = "right"
align_str AlignDefault = "left"
blockToXml Null = return []
plainToPara :: [Block] -> [Block]
plainToPara [] = []
plainToPara (Plain inlines : rest) =
Para inlines : plainToPara rest
plainToPara (Para inlines : rest) =
Para inlines : HorizontalRule : plainToPara rest
plainToPara (p:rest) = p : plainToPara rest
unPlain :: Block -> Block
unPlain (Plain inlines) = Para inlines
unPlain x = x
indentPrefix :: Text -> Block -> Block
indentPrefix spacer = indentBlock
where
indentBlock (Plain ins) = Plain (Str spacer:ins)
indentBlock (Para ins) = Para (Str spacer:ins)
indentBlock (CodeBlock a s) =
let s' = T.unlines . map (spacer<>) . T.lines $ s
in CodeBlock a s'
indentBlock (BlockQuote bs) = BlockQuote (map indent bs)
indentBlock (Header l attr' ins) = Header l attr' (indentLines ins)
indentBlock everythingElse = everythingElse
indentLines :: [Inline] -> [Inline]
indentLines ins = let lns = split isLineBreak ins :: [[Inline]]
in intercalate [LineBreak] $ map (Str spacer:) lns
indent :: Block -> Block
indent = indentPrefix spacer
where
spacer :: Text
spacer = T.replicate 4 " "
indentBlocks :: Text -> [Block] -> [Block]
indentBlocks _ [] = []
indentBlocks prefix (x:xs) = indentPrefix prefix x : map (indentPrefix $ T.replicate (T.length prefix) " ") xs
toXml :: PandocMonad m => Inline -> FBM m [Content]
toXml (Str s) = return [txt s]
toXml (Span _ ils) = cMapM toXml ils
toXml (Emph ss) = list `liftM` wrap "emphasis" ss
toXml (Underline ss) = list `liftM` wrap "underline" ss
toXml (Strong ss) = list `liftM` wrap "strong" ss
toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss
toXml (Superscript ss) = list `liftM` wrap "sup" ss
toXml (Subscript ss) = list `liftM` wrap "sub" ss
toXml (SmallCaps ss) = cMapM toXml $ capitalize ss
toXml (Quoted SingleQuote ss) = do
inner <- cMapM toXml ss
return $ [txt "‘"] ++ inner ++ [txt "’"]
toXml (Quoted DoubleQuote ss) = do
inner <- cMapM toXml ss
return $ [txt "“"] ++ inner ++ [txt "”"]
toXml (Cite _ ss) = cMapM toXml ss
toXml (Code _ s) = return [el "code" $ T.unpack s]
toXml Space = return [txt " "]
toXml SoftBreak = return [txt "\n"]
toXml LineBreak = return [txt "\n"]
toXml (Math _ formula) = insertMath InlineImage formula
toXml il@(RawInline _ _) = do
report $ InlineNotRendered il
return []
toXml (Link _ text (url,_)) = do
ln_text <- cMapM toXml text
return [ el "a" ( [ attr ("l","href") url ], ln_text) ]
toXml img@Image{} = insertImage InlineImage img
toXml (Note bs) = do
fns <- footnotes `liftM` get
let n = 1 + length fns
let fn_id = footnoteID n
fn_desc <- cMapM blockToXml bs
modify (\s -> s { footnotes = (n, fn_id, fn_desc) : fns })
let fn_ref = txt $ "[" <> tshow n <> "]"
return . list $ el "a" ( [ attr ("l","href") ("#" <> fn_id)
, uattr "type" "note" ]
, fn_ref )
insertMath :: PandocMonad m => ImageMode -> Text -> FBM m [Content]
insertMath immode formula = do
htmlMath <- fmap (writerHTMLMathMethod . writerOptions) get
case htmlMath of
WebTeX url -> do
let alt = [Code nullAttr formula]
let imgurl = url <> T.pack (urlEncode $ T.unpack formula)
let img = Image nullAttr alt (imgurl, "")
insertImage immode img
_ -> return [el "code" $ T.unpack formula]
insertImage :: PandocMonad m => ImageMode -> Inline -> FBM m [Content]
insertImage immode (Image _ alt (url,ttl)) = do
images <- imagesToFetch `liftM` get
let n = 1 + length images
let fname = "image" <> tshow n
modify (\s -> s { imagesToFetch = (fname, url) : images })
let ttlattr = case (immode, T.null ttl) of
(NormalImage, False) -> [ uattr "title" ttl ]
_ -> []
return . list $
el "image" $
[ attr ("l","href") ("#" <> fname)
, attr ("l","type") (tshow immode)
, uattr "alt" (T.pack $ cMap plain alt) ]
++ ttlattr
insertImage _ _ = error "unexpected inline instead of image"
replaceImagesWithAlt :: [Text] -> Content -> Content
replaceImagesWithAlt missingHrefs body =
let cur = XC.fromContent body
cur' = replaceAll cur
in XC.toTree . XC.root $ cur'
where
replaceAll :: XC.Cursor -> XC.Cursor
replaceAll c =
let n = XC.current c
c' = if isImage n && isMissing n
then XC.modifyContent replaceNode c
else c
in case XC.nextDF c' of
(Just cnext) -> replaceAll cnext
Nothing -> c'
isImage :: Content -> Bool
isImage (Elem e) = elName e == uname "image"
isImage _ = False
isMissing (Elem img@Element{}) =
let imgAttrs = elAttribs img
badAttrs = map (attr ("l","href")) missingHrefs
in any (`elem` imgAttrs) badAttrs
isMissing _ = False
replaceNode :: Content -> Content
replaceNode n@(Elem img@Element{}) =
let attrs = elAttribs img
alt = getAttrVal attrs (uname "alt")
imtype = getAttrVal attrs (qname "l" "type")
in case (alt, imtype) of
(Just alt', Just imtype') ->
if imtype' == show NormalImage
then el "p" alt'
else txt $ T.pack alt'
(Just alt', Nothing) -> txt $ T.pack alt'
_ -> n
replaceNode n = n
getAttrVal :: [X.Attr] -> QName -> Maybe String
getAttrVal attrs name =
case filter ((name ==) . attrKey) attrs of
(a:_) -> Just (attrVal a)
_ -> Nothing
wrap :: PandocMonad m => String -> [Inline] -> FBM m Content
wrap tagname inlines = el tagname `liftM` cMapM toXml inlines
list :: a -> [a]
list = (:[])
plain :: Inline -> String
plain (Str s) = T.unpack s
plain (Emph ss) = cMap plain ss
plain (Underline ss) = cMap plain ss
plain (Span _ ss) = cMap plain ss
plain (Strong ss) = cMap plain ss
plain (Strikeout ss) = cMap plain ss
plain (Superscript ss) = cMap plain ss
plain (Subscript ss) = cMap plain ss
plain (SmallCaps ss) = cMap plain ss
plain (Quoted _ ss) = cMap plain ss
plain (Cite _ ss) = cMap plain ss
plain (Code _ s) = T.unpack s
plain Space = " "
plain SoftBreak = " "
plain LineBreak = "\n"
plain (Math _ s) = T.unpack s
plain (RawInline _ _) = ""
plain (Link _ text (url,_)) = concat (map plain text ++ [" <", T.unpack url, ">"])
plain (Image _ alt _) = cMap plain alt
plain (Note _) = ""
el :: (Node t)
=> String
-> t
-> Content
el name cs = Elem $ unode name cs
spaceBeforeAfter :: [Content] -> [Content]
spaceBeforeAfter cs =
let emptyline = el "empty-line" ()
in [emptyline] ++ cs ++ [emptyline]
txt :: Text -> Content
txt s = Text $ CData CDataText (T.unpack s) Nothing
uattr :: String -> Text -> Text.XML.Light.Attr
uattr name = Attr (uname name) . T.unpack
attr :: (String, String) -> Text -> Text.XML.Light.Attr
attr (ns, name) = Attr (qname ns name) . T.unpack
uname :: String -> QName
uname name = QName name Nothing Nothing
qname :: String -> String -> QName
qname ns name = QName name Nothing (Just ns)
cMap :: (a -> [b]) -> [a] -> [b]
cMap = concatMap
cMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b]
cMapM f xs = concat `liftM` mapM f xs