module Text.Pandoc.Shared (
splitBy,
splitByIndices,
splitStringByIndices,
substitute,
ordNub,
backslashEscapes,
escapeStringUsing,
stripTrailingNewlines,
trim,
triml,
trimr,
stripFirstAndLast,
camelCaseToHyphenated,
toRomanNumeral,
escapeURI,
tabFilter,
normalizeDate,
orderedListMarkers,
normalizeSpaces,
extractSpaces,
normalize,
normalizeInlines,
normalizeBlocks,
removeFormatting,
stringify,
capitalize,
compactify,
compactify',
compactify'DL,
Element (..),
hierarchicalize,
uniqueIdent,
isHeaderBlock,
headerShift,
isTightList,
addMetaField,
makeMeta,
renderTags',
inDirectory,
getDefaultReferenceDocx,
getDefaultReferenceODT,
readDataFile,
readDataFileUTF8,
fetchItem,
fetchItem',
openURL,
collapseFilePath,
err,
warn,
mapLeft,
hush,
blocksToInlines,
safeRead,
withTempDir,
pandocVersion
) where
import Text.Pandoc.Definition
import Text.Pandoc.Walk
import Text.Pandoc.MediaBag (MediaBag, lookupMedia)
import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
import qualified Text.Pandoc.Builder as B
import qualified Text.Pandoc.UTF8 as UTF8
import System.Environment (getProgName)
import System.Exit (exitWith, ExitCode(..))
import Data.Char ( toLower, isLower, isUpper, isAlpha,
isLetter, isDigit, isSpace )
import Data.List ( find, stripPrefix, intercalate )
import Data.Version ( showVersion )
import qualified Data.Map as M
import Network.URI ( escapeURIString, nonStrictRelativeTo,
unEscapeString, parseURIReference, isAllowedInURI,
parseURI, URI(..) )
import qualified Data.Set as Set
import System.Directory
import System.FilePath (splitDirectories, isPathSeparator)
import qualified System.FilePath.Posix as Posix
import Text.Pandoc.MIME (MimeType, getMimeType)
import System.FilePath ( (</>), takeExtension, dropExtension)
import Data.Generics (Typeable, Data)
import qualified Control.Monad.State as S
import qualified Control.Exception as E
import Control.Monad (msum, unless, MonadPlus(..))
import Text.Pandoc.Pretty (charWidth)
import Text.Pandoc.Compat.Time
import Data.Time.Clock.POSIX
import System.IO (stderr)
import System.IO.Temp
import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
renderOptions)
import Text.Pandoc.Compat.Monoid ((<>))
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as B8
import Data.ByteString.Base64 (decodeLenient)
import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
import qualified Data.Text as T (toUpper, pack, unpack)
import Data.ByteString.Lazy (toChunks, fromChunks)
import qualified Data.ByteString.Lazy as BL
import Paths_pandoc (version)
import Codec.Archive.Zip
#ifdef EMBED_DATA_FILES
import Text.Pandoc.Data (dataFiles)
#else
import Paths_pandoc (getDataFileName)
#endif
#ifdef HTTP_CLIENT
import Network.HTTP.Client (httpLbs, responseBody, responseHeaders,
Request(port,host))
#if MIN_VERSION_http_client(0,4,30)
import Network.HTTP.Client (parseRequest)
#else
import Network.HTTP.Client (parseUrl)
#endif
#if MIN_VERSION_http_client(0,4,18)
import Network.HTTP.Client (newManager)
#else
import Network.HTTP.Client (withManager)
#endif
import Network.HTTP.Client.Internal (addProxy)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import System.Environment (getEnv)
import Network.HTTP.Types.Header ( hContentType)
import Network (withSocketsDo)
#else
import Network.URI (parseURI)
import Network.HTTP (findHeader, rspBody,
RequestMethod(..), HeaderName(..), mkRequest)
import Network.Browser (browse, setAllowRedirects, setOutHandler, request)
#endif
pandocVersion :: String
pandocVersion = showVersion version
splitBy :: (a -> Bool) -> [a] -> [[a]]
splitBy _ [] = []
splitBy isSep lst =
let (first, rest) = break isSep lst
rest' = dropWhile isSep rest
in first:(splitBy isSep rest')
splitByIndices :: [Int] -> [a] -> [[a]]
splitByIndices [] lst = [lst]
splitByIndices (x:xs) lst = first:(splitByIndices (map (\y -> y x) xs) rest)
where (first, rest) = splitAt x lst
splitStringByIndices :: [Int] -> [Char] -> [[Char]]
splitStringByIndices [] lst = [lst]
splitStringByIndices (x:xs) lst =
let (first, rest) = splitAt' x lst in
first : (splitStringByIndices (map (\y -> y x) xs) rest)
splitAt' :: Int -> [Char] -> ([Char],[Char])
splitAt' _ [] = ([],[])
splitAt' n xs | n <= 0 = ([],xs)
splitAt' n (x:xs) = (x:ys,zs)
where (ys,zs) = splitAt' (n charWidth x) xs
substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
substitute _ _ [] = []
substitute [] _ xs = xs
substitute target replacement lst@(x:xs) =
case stripPrefix target lst of
Just lst' -> replacement ++ substitute target replacement lst'
Nothing -> x : substitute target replacement xs
ordNub :: (Ord a) => [a] -> [a]
ordNub l = go Set.empty l
where
go _ [] = []
go s (x:xs) = if x `Set.member` s then go s xs
else x : go (Set.insert x s) xs
backslashEscapes :: [Char]
-> [(Char, String)]
backslashEscapes = map (\ch -> (ch, ['\\',ch]))
escapeStringUsing :: [(Char, String)] -> String -> String
escapeStringUsing _ [] = ""
escapeStringUsing escapeTable (x:xs) =
case (lookup x escapeTable) of
Just str -> str ++ rest
Nothing -> x:rest
where rest = escapeStringUsing escapeTable xs
stripTrailingNewlines :: String -> String
stripTrailingNewlines = reverse . dropWhile (== '\n') . reverse
trim :: String -> String
trim = triml . trimr
triml :: String -> String
triml = dropWhile (`elem` " \r\n\t")
trimr :: String -> String
trimr = reverse . triml . reverse
stripFirstAndLast :: String -> String
stripFirstAndLast str =
drop 1 $ take ((length str) 1) str
camelCaseToHyphenated :: String -> String
camelCaseToHyphenated [] = ""
camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b =
a:'-':(toLower b):(camelCaseToHyphenated rest)
camelCaseToHyphenated (a:rest) = (toLower a):(camelCaseToHyphenated rest)
toRomanNumeral :: Int -> String
toRomanNumeral x =
if x >= 4000 || x < 0
then "?"
else case x of
_ | x >= 1000 -> "M" ++ toRomanNumeral (x 1000)
_ | x >= 900 -> "CM" ++ toRomanNumeral (x 900)
_ | x >= 500 -> "D" ++ toRomanNumeral (x 500)
_ | x >= 400 -> "CD" ++ toRomanNumeral (x 400)
_ | x >= 100 -> "C" ++ toRomanNumeral (x 100)
_ | x >= 90 -> "XC" ++ toRomanNumeral (x 90)
_ | x >= 50 -> "L" ++ toRomanNumeral (x 50)
_ | x >= 40 -> "XL" ++ toRomanNumeral (x 40)
_ | x >= 10 -> "X" ++ toRomanNumeral (x 10)
_ | x == 9 -> "IX"
_ | x >= 5 -> "V" ++ toRomanNumeral (x 5)
_ | x == 4 -> "IV"
_ | x >= 1 -> "I" ++ toRomanNumeral (x 1)
_ -> ""
escapeURI :: String -> String
escapeURI = escapeURIString (not . needsEscaping)
where needsEscaping c = isSpace c || c `elem`
['<','>','|','"','{','}','[',']','^', '`']
tabFilter :: Int
-> String
-> String
tabFilter tabStop =
let go _ [] = ""
go _ ('\n':xs) = '\n' : go tabStop xs
go _ ('\r':'\n':xs) = '\n' : go tabStop xs
go _ ('\r':xs) = '\n' : go tabStop xs
go spsToNextStop ('\t':xs) =
if tabStop == 0
then '\t' : go tabStop xs
else replicate spsToNextStop ' ' ++ go tabStop xs
go 1 (x:xs) =
x : go tabStop xs
go spsToNextStop (x:xs) =
x : go (spsToNextStop 1) xs
in go tabStop
normalizeDate :: String -> Maybe String
normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
(msum $ map (\fs -> parsetimeWith fs s >>= rejectBadYear) formats :: Maybe Day)
where rejectBadYear day = case toGregorian day of
(y, _, _) | y >= 1601 && y <= 9999 -> Just day
_ -> Nothing
parsetimeWith =
#if MIN_VERSION_time(1,5,0)
parseTimeM True defaultTimeLocale
#else
parseTime defaultTimeLocale
#endif
formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
"%d %B %Y", "%b. %d, %Y", "%B %d, %Y",
"%Y%m%d", "%Y%m", "%Y"]
orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String]
orderedListMarkers (start, numstyle, numdelim) =
let singleton c = [c]
nums = case numstyle of
DefaultStyle -> map show [start..]
Example -> map show [start..]
Decimal -> map show [start..]
UpperAlpha -> drop (start 1) $ cycle $
map singleton ['A'..'Z']
LowerAlpha -> drop (start 1) $ cycle $
map singleton ['a'..'z']
UpperRoman -> map toRomanNumeral [start..]
LowerRoman -> map (map toLower . toRomanNumeral) [start..]
inDelim str = case numdelim of
DefaultDelim -> str ++ "."
Period -> str ++ "."
OneParen -> str ++ ")"
TwoParens -> "(" ++ str ++ ")"
in map inDelim nums
normalizeSpaces :: [Inline] -> [Inline]
normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty
where cleanup [] = []
cleanup (Space:rest) = case dropWhile isSpaceOrEmpty rest of
[] -> []
(x:xs) -> Space : x : cleanup xs
cleanup ((Str ""):rest) = cleanup rest
cleanup (x:rest) = x : cleanup rest
isSpaceOrEmpty :: Inline -> Bool
isSpaceOrEmpty Space = True
isSpaceOrEmpty (Str "") = True
isSpaceOrEmpty _ = False
extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines
extractSpaces f is =
let contents = B.unMany is
left = case viewl contents of
(Space :< _) -> B.space
(SoftBreak :< _) -> B.softbreak
_ -> mempty
right = case viewr contents of
(_ :> Space) -> B.space
(_ :> SoftBreak) -> B.softbreak
_ -> mempty in
(left <> f (B.trimInlines . B.Many $ contents) <> right)
normalize :: Pandoc -> Pandoc
normalize (Pandoc (Meta meta) blocks) =
Pandoc (Meta $ M.map go meta) (normalizeBlocks blocks)
where go (MetaInlines xs) = MetaInlines $ normalizeInlines xs
go (MetaBlocks xs) = MetaBlocks $ normalizeBlocks xs
go (MetaList ms) = MetaList $ map go ms
go (MetaMap m) = MetaMap $ M.map go m
go x = x
normalizeBlocks :: [Block] -> [Block]
normalizeBlocks (Null : xs) = normalizeBlocks xs
normalizeBlocks (Div attr bs : xs) =
Div attr (normalizeBlocks bs) : normalizeBlocks xs
normalizeBlocks (BlockQuote bs : xs) =
case normalizeBlocks bs of
[] -> normalizeBlocks xs
bs' -> BlockQuote bs' : normalizeBlocks xs
normalizeBlocks (BulletList [] : xs) = normalizeBlocks xs
normalizeBlocks (BulletList items : xs) =
BulletList (map normalizeBlocks items) : normalizeBlocks xs
normalizeBlocks (OrderedList _ [] : xs) = normalizeBlocks xs
normalizeBlocks (OrderedList attr items : xs) =
OrderedList attr (map normalizeBlocks items) : normalizeBlocks xs
normalizeBlocks (DefinitionList [] : xs) = normalizeBlocks xs
normalizeBlocks (DefinitionList items : xs) =
DefinitionList (map go items) : normalizeBlocks xs
where go (ils, bs) = (normalizeInlines ils, map normalizeBlocks bs)
normalizeBlocks (RawBlock _ "" : xs) = normalizeBlocks xs
normalizeBlocks (RawBlock f x : xs) =
case normalizeBlocks xs of
(RawBlock f' x' : rest) | f' == f ->
RawBlock f (x ++ ('\n':x')) : rest
rest -> RawBlock f x : rest
normalizeBlocks (Para ils : xs) =
case normalizeInlines ils of
[] -> normalizeBlocks xs
ils' -> Para ils' : normalizeBlocks xs
normalizeBlocks (Plain ils : xs) =
case normalizeInlines ils of
[] -> normalizeBlocks xs
ils' -> Plain ils' : normalizeBlocks xs
normalizeBlocks (Header lev attr ils : xs) =
Header lev attr (normalizeInlines ils) : normalizeBlocks xs
normalizeBlocks (Table capt aligns widths hdrs rows : xs) =
Table (normalizeInlines capt) aligns widths
(map normalizeBlocks hdrs) (map (map normalizeBlocks) rows)
: normalizeBlocks xs
normalizeBlocks (x:xs) = x : normalizeBlocks xs
normalizeBlocks [] = []
normalizeInlines :: [Inline] -> [Inline]
normalizeInlines (Str x : ys) =
case concat (x : map fromStr strs) of
"" -> rest
n -> Str n : rest
where
(strs, rest) = span isStr $ normalizeInlines ys
isStr (Str _) = True
isStr _ = False
fromStr (Str z) = z
fromStr _ = error "normalizeInlines - fromStr - not a Str"
normalizeInlines (Space : SoftBreak : ys) =
SoftBreak : normalizeInlines ys
normalizeInlines (Space : ys) =
if null rest
then []
else Space : rest
where isSp Space = True
isSp _ = False
rest = dropWhile isSp $ normalizeInlines ys
normalizeInlines (Emph xs : zs) =
case normalizeInlines zs of
(Emph ys : rest) -> normalizeInlines $
Emph (normalizeInlines $ xs ++ ys) : rest
rest -> case normalizeInlines xs of
[] -> rest
xs' -> Emph xs' : rest
normalizeInlines (Strong xs : zs) =
case normalizeInlines zs of
(Strong ys : rest) -> normalizeInlines $
Strong (normalizeInlines $ xs ++ ys) : rest
rest -> case normalizeInlines xs of
[] -> rest
xs' -> Strong xs' : rest
normalizeInlines (Subscript xs : zs) =
case normalizeInlines zs of
(Subscript ys : rest) -> normalizeInlines $
Subscript (normalizeInlines $ xs ++ ys) : rest
rest -> case normalizeInlines xs of
[] -> rest
xs' -> Subscript xs' : rest
normalizeInlines (Superscript xs : zs) =
case normalizeInlines zs of
(Superscript ys : rest) -> normalizeInlines $
Superscript (normalizeInlines $ xs ++ ys) : rest
rest -> case normalizeInlines xs of
[] -> rest
xs' -> Superscript xs' : rest
normalizeInlines (SmallCaps xs : zs) =
case normalizeInlines zs of
(SmallCaps ys : rest) -> normalizeInlines $
SmallCaps (normalizeInlines $ xs ++ ys) : rest
rest -> case normalizeInlines xs of
[] -> rest
xs' -> SmallCaps xs' : rest
normalizeInlines (Strikeout xs : zs) =
case normalizeInlines zs of
(Strikeout ys : rest) -> normalizeInlines $
Strikeout (normalizeInlines $ xs ++ ys) : rest
rest -> case normalizeInlines xs of
[] -> rest
xs' -> Strikeout xs' : rest
normalizeInlines (RawInline _ [] : ys) = normalizeInlines ys
normalizeInlines (RawInline f xs : zs) =
case normalizeInlines zs of
(RawInline f' ys : rest) | f == f' -> normalizeInlines $
RawInline f (xs ++ ys) : rest
rest -> RawInline f xs : rest
normalizeInlines (Code _ "" : ys) = normalizeInlines ys
normalizeInlines (Code attr xs : zs) =
case normalizeInlines zs of
(Code attr' ys : rest) | attr == attr' -> normalizeInlines $
Code attr (xs ++ ys) : rest
rest -> Code attr xs : rest
normalizeInlines (Span attr xs : zs) =
case normalizeInlines zs of
(Span attr' ys : rest) | attr == attr' -> normalizeInlines $
Span attr (normalizeInlines $ xs ++ ys) : rest
rest -> Span attr (normalizeInlines xs) : rest
normalizeInlines (Note bs : ys) = Note (normalizeBlocks bs) :
normalizeInlines ys
normalizeInlines (Quoted qt ils : ys) =
Quoted qt (normalizeInlines ils) : normalizeInlines ys
normalizeInlines (Link attr ils t : ys) =
Link attr (normalizeInlines ils) t : normalizeInlines ys
normalizeInlines (Image attr ils t : ys) =
Image attr (normalizeInlines ils) t : normalizeInlines ys
normalizeInlines (Cite cs ils : ys) =
Cite cs (normalizeInlines ils) : normalizeInlines ys
normalizeInlines (x : xs) = x : normalizeInlines xs
normalizeInlines [] = []
removeFormatting :: Walkable Inline a => a -> [Inline]
removeFormatting = query go . walk deNote
where go :: Inline -> [Inline]
go (Str xs) = [Str xs]
go Space = [Space]
go SoftBreak = [SoftBreak]
go (Code _ x) = [Str x]
go (Math _ x) = [Str x]
go LineBreak = [Space]
go _ = []
deNote (Note _) = Str ""
deNote x = x
stringify :: Walkable Inline a => a -> String
stringify = query go . walk deNote
where go :: Inline -> [Char]
go Space = " "
go SoftBreak = " "
go (Str x) = x
go (Code _ x) = x
go (Math _ x) = x
go (RawInline (Format "html") ('<':'b':'r':_)) = " "
go LineBreak = " "
go _ = ""
deNote (Note _) = Str ""
deNote x = x
capitalize :: Walkable Inline a => a -> a
capitalize = walk go
where go :: Inline -> Inline
go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s)
go x = x
compactify :: [[Block]]
-> [[Block]]
compactify [] = []
compactify items =
case (init items, last items) of
(_,[]) -> items
(others, final) ->
case last final of
Para a -> case (filter isPara $ concat items) of
[_] -> others ++ [init final ++ [Plain a]]
_ -> items
_ -> items
compactify' :: [Blocks]
-> [Blocks]
compactify' [] = []
compactify' items =
let (others, final) = (init items, last items)
in case reverse (B.toList final) of
(Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of
[_] -> others ++ [B.fromList (reverse $ Plain a : xs)]
_ -> items
_ -> items
compactify'DL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])]
compactify'DL items =
let defs = concatMap snd items
in case reverse (concatMap B.toList defs) of
(Para x:xs)
| not (any isPara xs) ->
let (t,ds) = last items
lastDef = B.toList $ last ds
ds' = init ds ++
if null lastDef
then [B.fromList lastDef]
else [B.fromList $ init lastDef ++ [Plain x]]
in init items ++ [(t, ds')]
| otherwise -> items
_ -> items
isPara :: Block -> Bool
isPara (Para _) = True
isPara _ = False
data Element = Blk Block
| Sec Int [Int] Attr [Inline] [Element]
deriving (Eq, Read, Show, Typeable, Data)
instance Walkable Inline Element where
walk f (Blk x) = Blk (walk f x)
walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
walkM f (Blk x) = Blk `fmap` walkM f x
walkM f (Sec lev nums attr ils elts) = do
ils' <- walkM f ils
elts' <- walkM f elts
return $ Sec lev nums attr ils' elts'
query f (Blk x) = query f x
query f (Sec _ _ _ ils elts) = query f ils <> query f elts
instance Walkable Block Element where
walk f (Blk x) = Blk (walk f x)
walk f (Sec lev nums attr ils elts) = Sec lev nums attr (walk f ils) (walk f elts)
walkM f (Blk x) = Blk `fmap` walkM f x
walkM f (Sec lev nums attr ils elts) = do
ils' <- walkM f ils
elts' <- walkM f elts
return $ Sec lev nums attr ils' elts'
query f (Blk x) = query f x
query f (Sec _ _ _ ils elts) = query f ils <> query f elts
inlineListToIdentifier :: [Inline] -> String
inlineListToIdentifier =
dropWhile (not . isAlpha) . intercalate "-" . words .
map (nbspToSp . toLower) .
filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") .
stringify
where nbspToSp '\160' = ' '
nbspToSp x = x
hierarchicalize :: [Block] -> [Element]
hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) []
hierarchicalizeWithIds :: [Block] -> S.State [Int] [Element]
hierarchicalizeWithIds [] = return []
hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do
lastnum <- S.get
let lastnum' = take level lastnum
let newnum = case length lastnum' of
x | "unnumbered" `elem` classes -> []
| x >= level -> init lastnum' ++ [last lastnum' + 1]
| otherwise -> lastnum ++
replicate (level length lastnum 1) 0 ++ [1]
unless (null newnum) $ S.put newnum
let (sectionContents, rest) = break (headerLtEq level) xs
sectionContents' <- hierarchicalizeWithIds sectionContents
rest' <- hierarchicalizeWithIds rest
return $ Sec level newnum attr title' sectionContents' : rest'
hierarchicalizeWithIds ((Div ("",["references"],[])
(Header level (ident,classes,kvs) title' : xs)):ys) =
hierarchicalizeWithIds ((Header level (ident,("references":classes),kvs)
title') : (xs ++ ys))
hierarchicalizeWithIds (x:rest) = do
rest' <- hierarchicalizeWithIds rest
return $ (Blk x) : rest'
headerLtEq :: Int -> Block -> Bool
headerLtEq level (Header l _ _) = l <= level
headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _)) = l <= level
headerLtEq _ _ = False
uniqueIdent :: [Inline] -> Set.Set String -> String
uniqueIdent title' usedIdents
= let baseIdent = case inlineListToIdentifier title' of
"" -> "section"
x -> x
numIdent n = baseIdent ++ "-" ++ show n
in if baseIdent `Set.member` usedIdents
then case find (\x -> not $ numIdent x `Set.member` usedIdents) ([1..60000] :: [Int]) of
Just x -> numIdent x
Nothing -> baseIdent
else baseIdent
isHeaderBlock :: Block -> Bool
isHeaderBlock (Header _ _ _) = True
isHeaderBlock _ = False
headerShift :: Int -> Pandoc -> Pandoc
headerShift n = walk shift
where shift :: Block -> Block
shift (Header level attr inner) = Header (level + n) attr inner
shift x = x
isTightList :: [[Block]] -> Bool
isTightList = all firstIsPlain
where firstIsPlain (Plain _ : _) = True
firstIsPlain _ = False
addMetaField :: ToMetaValue a
=> String
-> a
-> Meta
-> Meta
addMetaField key val (Meta meta) =
Meta $ M.insertWith combine key (toMetaValue val) meta
where combine newval (MetaList xs) = MetaList (xs ++ tolist newval)
combine newval x = MetaList [x, newval]
tolist (MetaList ys) = ys
tolist y = [y]
makeMeta :: [Inline] -> [[Inline]] -> [Inline] -> Meta
makeMeta title authors date =
addMetaField "title" (B.fromList title)
$ addMetaField "author" (map B.fromList authors)
$ addMetaField "date" (B.fromList date)
$ nullMeta
renderTags' :: [Tag String] -> String
renderTags' = renderTagsOptions
renderOptions{ optMinimize = matchTags ["hr", "br", "img",
"meta", "link"]
, optRawTag = matchTags ["script", "style"] }
where matchTags = \tags -> flip elem tags . map toLower
inDirectory :: FilePath -> IO a -> IO a
inDirectory path action = E.bracket
getCurrentDirectory
setCurrentDirectory
(const $ setCurrentDirectory path >> action)
getDefaultReferenceDocx :: Maybe FilePath -> IO Archive
getDefaultReferenceDocx datadir = do
let paths = ["[Content_Types].xml",
"_rels/.rels",
"docProps/app.xml",
"docProps/core.xml",
"word/document.xml",
"word/fontTable.xml",
"word/footnotes.xml",
"word/numbering.xml",
"word/settings.xml",
"word/webSettings.xml",
"word/styles.xml",
"word/_rels/document.xml.rels",
"word/_rels/footnotes.xml.rels",
"word/theme/theme1.xml"]
let toLazy = fromChunks . (:[])
let pathToEntry path = do epochtime <- (floor . utcTimeToPOSIXSeconds) <$>
getCurrentTime
contents <- toLazy <$> readDataFile datadir
("docx/" ++ path)
return $ toEntry path epochtime contents
mbArchive <- case datadir of
Nothing -> return Nothing
Just d -> do
exists <- doesFileExist (d </> "reference.docx")
if exists
then return (Just (d </> "reference.docx"))
else return Nothing
case mbArchive of
Just arch -> toArchive <$> BL.readFile arch
Nothing -> foldr addEntryToArchive emptyArchive <$>
mapM pathToEntry paths
getDefaultReferenceODT :: Maybe FilePath -> IO Archive
getDefaultReferenceODT datadir = do
let paths = ["mimetype",
"manifest.rdf",
"styles.xml",
"content.xml",
"meta.xml",
"settings.xml",
"Configurations2/accelerator/current.xml",
"Thumbnails/thumbnail.png",
"META-INF/manifest.xml"]
let pathToEntry path = do epochtime <- floor `fmap` getPOSIXTime
contents <- (fromChunks . (:[])) `fmap`
readDataFile datadir ("odt/" ++ path)
return $ toEntry path epochtime contents
mbArchive <- case datadir of
Nothing -> return Nothing
Just d -> do
exists <- doesFileExist (d </> "reference.odt")
if exists
then return (Just (d </> "reference.odt"))
else return Nothing
case mbArchive of
Just arch -> toArchive <$> BL.readFile arch
Nothing -> foldr addEntryToArchive emptyArchive <$>
mapM pathToEntry paths
readDefaultDataFile :: FilePath -> IO BS.ByteString
readDefaultDataFile "reference.docx" =
(BS.concat . toChunks . fromArchive) <$> getDefaultReferenceDocx Nothing
readDefaultDataFile "reference.odt" =
(BS.concat . toChunks . fromArchive) <$> getDefaultReferenceODT Nothing
readDefaultDataFile fname =
#ifdef EMBED_DATA_FILES
case lookup (makeCanonical fname) dataFiles of
Nothing -> err 97 $ "Could not find data file " ++ fname
Just contents -> return contents
where makeCanonical = Posix.joinPath . transformPathParts . splitDirectories
transformPathParts = reverse . foldl go []
go as "." = as
go (_:as) ".." = as
go as x = x : as
#else
getDataFileName fname' >>= checkExistence >>= BS.readFile
where fname' = if fname == "README" then fname else "data" </> fname
checkExistence :: FilePath -> IO FilePath
checkExistence fn = do
exists <- doesFileExist fn
if exists
then return fn
else err 97 ("Could not find data file " ++ fn)
#endif
readDataFile :: Maybe FilePath -> FilePath -> IO BS.ByteString
readDataFile Nothing fname = readDefaultDataFile fname
readDataFile (Just userDir) fname = do
exists <- doesFileExist (userDir </> fname)
if exists
then BS.readFile (userDir </> fname)
else readDefaultDataFile fname
readDataFileUTF8 :: Maybe FilePath -> FilePath -> IO String
readDataFileUTF8 userDir fname =
UTF8.toString `fmap` readDataFile userDir fname
parseURIReference' :: String -> Maybe URI
parseURIReference' s =
case parseURIReference s of
Just u
| length (uriScheme u) > 2 -> Just u
| null (uriScheme u) -> Just u
_ -> Nothing
fetchItem :: Maybe String -> String
-> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
fetchItem sourceURL s =
case (sourceURL >>= parseURIReference' . ensureEscaped, ensureEscaped s) of
(Just u, s') ->
case parseURIReference' s' of
Just u' -> openURL $ show $ u' `nonStrictRelativeTo` u
Nothing -> openURL s'
(Nothing, s') ->
case parseURI s' of
Just u' | length (uriScheme u') > 2 -> openURL (show u')
Just u' | uriScheme u' == "file:" ->
E.try $ readLocalFile $ dropWhile (=='/') (uriPath u')
_ -> E.try $ readLocalFile fp
where readLocalFile f = do
cont <- BS.readFile f
return (cont, mime)
dropFragmentAndQuery = takeWhile (\c -> c /= '?' && c /= '#')
fp = unEscapeString $ dropFragmentAndQuery s
mime = case takeExtension fp of
".gz" -> getMimeType $ dropExtension fp
".svgz" -> getMimeType $ dropExtension fp ++ ".svg"
x -> getMimeType x
ensureEscaped = escapeURIString isAllowedInURI . map convertSlash
convertSlash '\\' = '/'
convertSlash x = x
fetchItem' :: MediaBag -> Maybe String -> String
-> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
fetchItem' media sourceURL s = do
case lookupMedia s media of
Nothing -> fetchItem sourceURL s
Just (mime, bs) -> return $ Right (BS.concat $ toChunks bs, Just mime)
openURL :: String -> IO (Either E.SomeException (BS.ByteString, Maybe MimeType))
openURL u
| Just u'' <- stripPrefix "data:" u =
let mime = takeWhile (/=',') u''
contents = B8.pack $ unEscapeString $ drop 1 $ dropWhile (/=',') u''
in return $ Right (decodeLenient contents, Just mime)
#ifdef HTTP_CLIENT
| otherwise = withSocketsDo $ E.try $ do
#if MIN_VERSION_http_client(0,4,30)
let parseReq = parseRequest
#else
let parseReq = parseUrl
#endif
(proxy :: Either E.SomeException String) <- E.try $ getEnv "http_proxy"
req <- parseReq u
req' <- case proxy of
Left _ -> return req
Right pr -> (parseReq pr >>= \r ->
return $ addProxy (host r) (port r) req)
`mplus` return req
#if MIN_VERSION_http_client(0,4,18)
resp <- newManager tlsManagerSettings >>= httpLbs req'
#else
resp <- withManager tlsManagerSettings $ httpLbs req'
#endif
return (BS.concat $ toChunks $ responseBody resp,
UTF8.toString `fmap` lookup hContentType (responseHeaders resp))
#else
| otherwise = E.try $ getBodyAndMimeType `fmap` browse
(do S.liftIO $ UTF8.hPutStrLn stderr $ "Fetching " ++ u ++ "..."
setOutHandler $ const (return ())
setAllowRedirects True
request (getRequest' u'))
where getBodyAndMimeType (_, r) = (rspBody r, findHeader HdrContentType r)
getRequest' uriString = case parseURI uriString of
Nothing -> error ("Not a valid URL: " ++
uriString)
Just v -> mkRequest GET v
u' = escapeURIString (/= '|') u
#endif
err :: Int -> String -> IO a
err exitCode msg = do
name <- getProgName
UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
exitWith $ ExitFailure exitCode
return undefined
warn :: String -> IO ()
warn msg = do
name <- getProgName
UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
mapLeft :: (a -> b) -> Either a c -> Either b c
mapLeft f (Left x) = Left (f x)
mapLeft _ (Right x) = Right x
hush :: Either a b -> Maybe b
hush (Left _) = Nothing
hush (Right x) = Just x
collapseFilePath :: FilePath -> FilePath
collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories
where
go rs "." = rs
go r@(p:rs) ".." = case p of
".." -> ("..":r)
(checkPathSeperator -> Just True) -> ("..":r)
_ -> rs
go _ (checkPathSeperator -> Just True) = [[Posix.pathSeparator]]
go rs x = x:rs
isSingleton [] = Nothing
isSingleton [x] = Just x
isSingleton _ = Nothing
checkPathSeperator = fmap isPathSeparator . isSingleton
blockToInlines :: Block -> [Inline]
blockToInlines (Plain ils) = ils
blockToInlines (Para ils) = ils
blockToInlines (CodeBlock attr str) = [Code attr str]
blockToInlines (RawBlock fmt str) = [RawInline fmt str]
blockToInlines (BlockQuote blks) = blocksToInlines blks
blockToInlines (OrderedList _ blkslst) =
concatMap blocksToInlines blkslst
blockToInlines (BulletList blkslst) =
concatMap blocksToInlines blkslst
blockToInlines (DefinitionList pairslst) =
concatMap f pairslst
where
f (ils, blkslst) = ils ++
[Str ":", Space] ++
(concatMap blocksToInlines blkslst)
blockToInlines (Header _ _ ils) = ils
blockToInlines (HorizontalRule) = []
blockToInlines (Table _ _ _ headers rows) =
intercalate [LineBreak] $ map (concatMap blocksToInlines) tbl
where
tbl = headers : rows
blockToInlines (Div _ blks) = blocksToInlines blks
blockToInlines Null = []
blocksToInlinesWithSep :: [Inline] -> [Block] -> [Inline]
blocksToInlinesWithSep sep blks = intercalate sep $ map blockToInlines blks
blocksToInlines :: [Block] -> [Inline]
blocksToInlines = blocksToInlinesWithSep [Space, Str "ΒΆ", Space]
safeRead :: (MonadPlus m, Read a) => String -> m a
safeRead s = case reads s of
(d,x):_
| all isSpace x -> return d
_ -> mzero
withTempDir :: String -> (FilePath -> IO a) -> IO a
withTempDir =
#ifdef _WINDOWS
withTempDirectory "."
#else
withSystemTempDirectory
#endif