{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Text.Pandoc.Readers.Ipynb ( readIpynb )
where
import Data.Char (isDigit)
import Data.Maybe (fromMaybe)
import Data.Digest.Pure.SHA (sha1, showDigest)
import Text.Pandoc.Options
import qualified Data.Scientific as Scientific
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Logging
import Text.Pandoc.Definition
import Data.Ipynb as Ipynb
import Text.Pandoc.Class.PandocMonad
import Text.Pandoc.MIME (extensionFromMimeType)
import Text.Pandoc.Shared (tshow)
import Text.Pandoc.UTF8
import Text.Pandoc.Walk (walk)
import Text.Pandoc.Error
import Data.Text (Text)
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.ByteString.Lazy as BL
import Data.Aeson as Aeson
import Control.Monad.Except (throwError)
import Text.Pandoc.Readers.Markdown (readMarkdown)
import qualified Text.Pandoc.UTF8 as UTF8
readIpynb :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readIpynb opts t = do
let src = BL.fromStrict (TE.encodeUtf8 t)
case eitherDecode src of
Right (notebook4 :: Notebook NbV4) -> notebookToPandoc opts notebook4
Left _ ->
case eitherDecode src of
Right (notebook3 :: Notebook NbV3) -> notebookToPandoc opts notebook3
Left err -> throwError $ PandocIpynbDecodingError $ T.pack err
notebookToPandoc :: PandocMonad m
=> ReaderOptions -> Notebook a -> m Pandoc
notebookToPandoc opts notebook = do
let cells = notebookCells notebook
let (fmt,fmtminor) = notebookFormat notebook
let m = M.insert "nbformat" (MetaString $ tshow fmt) $
M.insert "nbformat_minor" (MetaString $ tshow fmtminor) $
jsonMetaToMeta (notebookMetadata notebook)
let lang = case M.lookup "kernelspec" m of
Just (MetaMap ks) ->
case M.lookup "language" ks of
Just (MetaString l) -> l
_ -> "python"
_ -> "python"
bs <- mconcat <$> mapM (cellToBlocks opts lang) cells
let Pandoc _ blocks = B.doc bs
return $ Pandoc (Meta $ M.insert "jupyter" (MetaMap m) mempty) blocks
cellToBlocks :: PandocMonad m
=> ReaderOptions -> Text -> Ipynb.Cell a -> m B.Blocks
cellToBlocks opts lang c = do
let Source ts = cellSource c
let source = mconcat ts
let kvs = jsonMetaToPairs (cellMetadata c)
let attachments = maybe mempty M.toList $ cellAttachments c
mapM_ addAttachment attachments
case cellType c of
Ipynb.Markdown -> do
Pandoc _ bs <- walk fixImage <$> readMarkdown opts source
return $ B.divWith ("",["cell","markdown"],kvs)
$ B.fromList bs
Ipynb.Heading lev -> do
Pandoc _ bs <- readMarkdown opts
(T.replicate lev "#" <> " " <> source)
return $ B.divWith ("",["cell","markdown"],kvs)
$ B.fromList bs
Ipynb.Raw -> do
let format = fromMaybe "ipynb" $ lookup "format" kvs
let format' =
case format of
"text/html" -> "html"
"text/latex" -> "latex"
"application/pdf" -> "latex"
"text/markdown" -> "markdown"
"text/x-rsrt" -> "rst"
_ -> format
return $ B.divWith ("",["cell","raw"],kvs) $ B.rawBlock format' source
Ipynb.Code{ codeOutputs = outputs, codeExecutionCount = ec } -> do
outputBlocks <- mconcat <$> mapM outputToBlock outputs
let kvs' = maybe kvs (\x -> ("execution_count", tshow x):kvs) ec
return $ B.divWith ("",["cell","code"],kvs') $
B.codeBlockWith ("",[lang],[]) source
<> outputBlocks
fixImage :: Inline -> Inline
fixImage (Image attr lab (src,tit))
| "attachment:" `T.isPrefixOf` src = Image attr lab (T.drop 11 src, tit)
fixImage x = x
addAttachment :: PandocMonad m => (Text, MimeBundle) -> m ()
addAttachment (fname, mimeBundle) = do
let fp = T.unpack fname
case M.toList (unMimeBundle mimeBundle) of
(mimeType, BinaryData bs):_ ->
insertMedia fp (Just mimeType) (BL.fromStrict bs)
(mimeType, TextualData t):_ ->
insertMedia fp (Just mimeType)
(BL.fromStrict $ TE.encodeUtf8 t)
(mimeType, JsonData v):_ ->
insertMedia fp (Just mimeType) (encode v)
[] -> report $ CouldNotFetchResource fname "no attachment"
outputToBlock :: PandocMonad m => Output a -> m B.Blocks
outputToBlock Stream{ streamName = sName,
streamText = Source text } =
return $ B.divWith ("",["output","stream",sName],[])
$ B.codeBlock $ T.concat text
outputToBlock DisplayData{ displayData = data',
displayMetadata = metadata' } =
B.divWith ("",["output", "display_data"],[]) <$>
handleData metadata' data'
outputToBlock ExecuteResult{ executeCount = ec,
executeData = data',
executeMetadata = metadata' } =
B.divWith ("",["output", "execute_result"],[("execution_count",tshow ec)])
<$> handleData metadata' data'
outputToBlock Err{ errName = ename,
errValue = evalue,
errTraceback = traceback } =
return $ B.divWith ("",["output","error"],
[("ename",ename),
("evalue",evalue)])
$ B.codeBlock $ T.unlines traceback
handleData :: PandocMonad m
=> JSONMeta -> MimeBundle -> m B.Blocks
handleData metadata (MimeBundle mb) =
mconcat <$> mapM dataBlock (M.toList mb)
where
dataBlock :: PandocMonad m => (MimeType, MimeData) -> m B.Blocks
dataBlock (mt, d)
| "image/" `T.isPrefixOf` mt || mt == "application/pdf"
= do
let meta = case M.lookup mt metadata of
Just v@Object{} ->
case fromJSON v of
Success m' -> m'
Error _ -> mempty
_ -> mempty
let metaPairs = jsonMetaToPairs meta
let bl = case d of
BinaryData bs -> BL.fromStrict bs
TextualData t -> BL.fromStrict $ UTF8.fromText t
JsonData v -> encode v
let fname = T.pack (showDigest (sha1 bl)) <>
case extensionFromMimeType mt of
Nothing -> ""
Just ext -> "." <> ext
insertMedia (T.unpack fname) (Just mt) bl
return $ B.para $ B.imageWith ("",[],metaPairs) fname "" mempty
dataBlock ("text/html", TextualData t)
= return $ B.rawBlock "html" t
dataBlock ("text/latex", TextualData t)
= return $ B.rawBlock "latex" t
dataBlock ("text/plain", TextualData t) =
return $ B.codeBlock t
dataBlock (_, JsonData v) =
return $ B.codeBlockWith ("",["json"],[]) $ T.pack $ toStringLazy $ encode v
dataBlock _ = return mempty
jsonMetaToMeta :: JSONMeta -> M.Map Text MetaValue
jsonMetaToMeta = M.map valueToMetaValue
where
valueToMetaValue :: Value -> MetaValue
valueToMetaValue x@Object{} =
case fromJSON x of
Error s -> MetaString $ T.pack s
Success jm' -> MetaMap $ jsonMetaToMeta jm'
valueToMetaValue x@Array{} =
case fromJSON x of
Error s -> MetaString $ T.pack s
Success xs -> MetaList $ map valueToMetaValue xs
valueToMetaValue (Bool b) = MetaBool b
valueToMetaValue (String t) = MetaString t
valueToMetaValue (Number n)
| Scientific.isInteger n = MetaString (tshow (floor n :: Integer))
| otherwise = MetaString (tshow n)
valueToMetaValue Aeson.Null = MetaString ""
jsonMetaToPairs :: JSONMeta -> [(Text, Text)]
jsonMetaToPairs = M.toList . M.map
(\case
String t
| not (T.all isDigit t)
, t /= "true"
, t /= "false"
-> t
x -> T.pack $ UTF8.toStringLazy $ Aeson.encode x)