{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Text.CSL.Pandoc (processCites, processCites')
where
import Prelude
import Control.Applicative ((<|>))
import qualified Control.Exception as E
import Control.Monad
import Control.Monad.State
import Data.Aeson
import qualified Data.ByteString.Lazy as L
import Data.Char (isDigit, isPunctuation, isSpace,
toLower)
import qualified Data.Map as M
import qualified Data.Set as Set
import Data.Maybe (fromMaybe)
import System.Directory (getAppUserDataDirectory)
import System.Environment (getEnv)
import System.FilePath
import System.IO.Error (isDoesNotExistError)
import System.SetEnv (setEnv)
import Text.CSL.Data (getDefaultCSL)
import Text.CSL.Exception
import Text.CSL.Input.Bibutils (convertRefs, readBiblioFile)
import Text.CSL.Output.Pandoc (renderPandoc, renderPandoc',
headInline, initInline, tailInline, toCapital)
import Text.CSL.Parser
import Text.CSL.Proc
import Text.CSL.Reference hiding (Value, processCites)
import Text.CSL.Style hiding (Citation (..), Cite (..))
import qualified Text.CSL.Style as CSL
import Text.CSL.Util (findFile, lastInline,
parseRomanNumeral, splitStrWhen, tr',
trim)
import Text.HTML.TagSoup.Entity (lookupEntity)
import Text.Pandoc
import Text.Pandoc.Builder (deleteMeta, setMeta)
import Text.Pandoc.Shared (stringify)
import Text.Pandoc.Walk
import Text.Parsec hiding (State, (<|>))
processCites :: Style -> [Reference] -> Pandoc -> Pandoc
processCites style refs (Pandoc m1 b1) =
let metanocites = lookupMeta "nocite" m1
nocites = mkNociteWildcards refs . query getCitation <$> metanocites
Pandoc m2 b2 = evalState (walkM setHashes $ Pandoc (deleteMeta "nocite" m1) b1) 1
grps = query getCitation (Pandoc m2 b2) ++ fromMaybe [] nocites
locMap = locatorMap style
result = citeproc procOpts{ linkCitations = isLinkCitations m2}
style refs (setNearNote style $
map (map (toCslCite locMap)) grps)
cits_map = tr' "cits_map" $ M.fromList $ zip grps (citations result)
biblioList = map (renderPandoc' style) $ zip (bibliography result) (citationIds result)
moveNotes = maybe True truish $
lookupMeta "notes-after-punctuation" m1
Pandoc m3 bs = walk (mvPunct moveNotes style) . deNote .
walk (processCite style cits_map) $ Pandoc m2 b2
m = case metanocites of
Nothing -> m3
Just x -> setMeta "nocite" x m3
notemap = mkNoteMap (Pandoc m3 bs)
in Pandoc m $ walk (addFirstNoteNumber notemap)
$ walk (concatMap removeNocaseSpans)
$ insertRefs m biblioList bs
addFirstNoteNumber :: M.Map String Int -> Inline -> Inline
addFirstNoteNumber notemap
s@(Span ("",["first-reference-note-number"],[("refid",refid)]) _)
= case M.lookup refid notemap of
Nothing -> s
Just n -> Str (show n)
addFirstNoteNumber _
(Note [Para (Span ("",["reference-id-list"],_) [] : ils)])
= Note [Para ils]
addFirstNoteNumber _ x = x
mkNoteMap :: Pandoc -> M.Map String Int
mkNoteMap doc =
foldr go mempty $ splitUp $ zip [1..] $ query getNoteCitationIds doc
where
splitUp :: [(Int, [String])] -> [(Int, String)]
splitUp = concatMap (\(n,ss) -> map (n,) ss)
go :: (Int, String) -> M.Map String Int -> M.Map String Int
go (notenumber, citeid) = M.insert citeid notenumber
insertRefs :: Meta -> [Block] -> [Block] -> [Block]
insertRefs _ [] bs = bs
insertRefs meta refs bs =
if isRefRemove meta
then bs
else case runState (walkM go bs) False of
(bs', True) -> bs'
(_, False) ->
case reverse bs of
Header lev (id',classes,kvs) ys : xs ->
reverse xs ++
[Header lev (id',addUnNumbered classes,kvs) ys,
Div ("refs",["references"],[]) refs]
_ -> bs ++ refHeader ++
[Div ("refs",["references"],[]) refs]
where go :: Block -> State Bool Block
go (Div attr@("refs",_,_) xs) = do
put True
return $ Div attr (xs ++ refs)
go x = return x
addUnNumbered cs = "unnumbered" : [c | c <- cs, c /= "unnumbered"]
refHeader = case refTitle meta of
Just ils ->
[Header 1 ("bibliography", ["unnumbered"], []) ils]
_ -> []
refTitle :: Meta -> Maybe [Inline]
refTitle meta =
case lookupMeta "reference-section-title" meta of
Just (MetaString s) -> Just [Str s]
Just (MetaInlines ils) -> Just ils
Just (MetaBlocks [Plain ils]) -> Just ils
Just (MetaBlocks [Para ils]) -> Just ils
_ -> Nothing
isRefRemove :: Meta -> Bool
isRefRemove meta =
maybe False truish $ lookupMeta "suppress-bibliography" meta
isLinkCitations :: Meta -> Bool
isLinkCitations meta =
maybe False truish $ lookupMeta "link-citations" meta
truish :: MetaValue -> Bool
truish (MetaBool t) = t
truish (MetaString s) = isYesValue (map toLower s)
truish (MetaInlines ils) = isYesValue (map toLower (stringify ils))
truish (MetaBlocks [Plain ils]) = isYesValue (map toLower (stringify ils))
truish _ = False
isYesValue :: String -> Bool
isYesValue "t" = True
isYesValue "true" = True
isYesValue "yes" = True
isYesValue "on" = True
isYesValue _ = False
mkNociteWildcards :: [Reference] -> [[Citation]] -> [[Citation]]
mkNociteWildcards refs = map expandStar
where expandStar cs =
case [c | c <- cs
, citationId c == "*"] of
[] -> cs
_ -> allcites
allcites = map (\ref -> Citation{
citationId = unLiteral (refId ref),
citationPrefix = [],
citationSuffix = [],
citationMode = NormalCitation,
citationNoteNum = 0,
citationHash = 0 }) refs
removeNocaseSpans :: Inline -> [Inline]
removeNocaseSpans (Span ("",["nocase"],[]) xs) = xs
removeNocaseSpans x = [x]
processCites' :: Pandoc -> IO Pandoc
processCites' (Pandoc meta blocks) = do
mbcsldir <- E.catch (Just <$> getAppUserDataDirectory "csl") $ \e ->
if isDoesNotExistError e
then return Nothing
else E.throwIO e
mbpandocdir <- E.catch (Just <$> getAppUserDataDirectory "pandoc") $ \e ->
if isDoesNotExistError e
then return Nothing
else E.throwIO e
let inlineRefError s = E.throw $ ErrorParsingReferences s
let inlineRefs = either inlineRefError id
$ convertRefs $ lookupMeta "references" meta
let cslfile = (lookupMeta "csl" meta <|> lookupMeta "citation-style" meta)
>>= toPath
let mbLocale = (lookupMeta "lang" meta `mplus` lookupMeta "locale" meta)
>>= toPath
let tryReadCSLFile Nothing _ = mzero
tryReadCSLFile (Just d) f = E.catch (readCSLFile mbLocale (d </> f))
(\(_ :: E.SomeException) -> mzero)
csl <- case cslfile of
Just f | not (null f) -> readCSLFile mbLocale f
_ -> tryReadCSLFile mbpandocdir "default.csl"
`mplus` tryReadCSLFile mbcsldir "chicago-author-date.csl"
`mplus` (getDefaultCSL >>=
localizeCSL mbLocale . parseCSL')
case styleLocale csl of
(l:_) -> do
setEnv "LC_ALL" (localeLang l)
setEnv "LANG" (localeLang l)
[] -> do
envlang <- getEnv "LANG"
if null envlang
then do
setEnv "LANG" "en-US.UTF-8"
setEnv "LC_ALL" "en-US.UTF-8"
else
setEnv "LC_ALL" envlang
let citids = query getCitationIds (Pandoc meta blocks)
let idpred = if "*" `Set.member` citids
then const True
else (`Set.member` citids)
bibRefs <- getBibRefs idpred $ fromMaybe (MetaList [])
$ lookupMeta "bibliography" meta
let refs = inlineRefs ++ bibRefs
let cslAbbrevFile = lookupMeta "citation-abbreviations" meta >>= toPath
let skipLeadingSpace = L.dropWhile (\s -> s == 32 || (s >= 9 && s <= 13))
abbrevs <- maybe (return (Abbreviations M.empty))
(\f -> findFile (maybe ["."] (\g -> [".", g]) mbcsldir) f >>=
maybe (E.throwIO $ CouldNotFindAbbrevFile f) return >>=
L.readFile >>=
either error return . eitherDecode . skipLeadingSpace)
cslAbbrevFile
let csl' = csl{ styleAbbrevs = abbrevs }
return $ processCites (tr' "CSL" csl') refs $ Pandoc meta blocks
toPath :: MetaValue -> Maybe String
toPath (MetaString s) = Just s
toPath (MetaList xs) = case reverse xs of
[] -> Nothing
(x:_) -> toPath x
toPath (MetaInlines ils) = Just $ stringify ils
toPath _ = Nothing
getBibRefs :: (String -> Bool) -> MetaValue -> IO [Reference]
getBibRefs idpred (MetaList xs) = concat `fmap` mapM (getBibRefs idpred) xs
getBibRefs idpred (MetaInlines xs) = getBibRefs idpred (MetaString $ stringify xs)
getBibRefs idpred (MetaString s) = do
path <- findFile ["."] s >>= maybe (E.throwIO $ CouldNotFindBibFile s) return
map unescapeRefId `fmap` readBiblioFile idpred path
getBibRefs _ _ = return []
unescapeRefId :: Reference -> Reference
unescapeRefId ref = ref{ refId = Literal $ decodeEntities (unLiteral $ refId ref) }
decodeEntities :: String -> String
decodeEntities [] = []
decodeEntities ('&':xs) =
let (ys,zs) = break (==';') xs
in case zs of
';':ws -> case lookupEntity ('&':ys ++ ";") of
#if MIN_VERSION_tagsoup(0,13,0)
Just s -> s ++ decodeEntities ws
#else
Just c -> c : decodeEntities ws
#endif
Nothing -> '&' : decodeEntities xs
_ -> '&' : decodeEntities xs
decodeEntities (x:xs) = x : decodeEntities xs
processCite :: Style -> M.Map [Citation] Formatted -> Inline -> Inline
processCite s cs (Cite t _) =
case M.lookup t cs of
Just (Formatted xs)
| not (null xs) || all isSuppressAuthor t
-> Cite t (renderPandoc s (Formatted xs))
_ -> Strong [Str "???"]
where isSuppressAuthor c = citationMode c == SuppressAuthor
processCite _ _ x = x
getNoteCitationIds :: Inline -> [[String]]
getNoteCitationIds (Note [Para (Span ("",["reference-id-list"]
,[("refids",refids)]) [] : _)])
= [words refids]
getNoteCitationIds (Note _) = [[]]
getNoteCitationIds _ = []
isNote :: Inline -> Bool
isNote (Note _) = True
isNote (Cite _ [Note _]) = True
isNote (Cite _ [Superscript _]) = True
isNote _ = False
mvPunctInsideQuote :: Inline -> Inline -> [Inline]
mvPunctInsideQuote (Quoted qt ils) (Str s) | s `elem` [".", ","] =
[Quoted qt (init ils ++ mvPunctInsideQuote (last ils) (Str s))]
mvPunctInsideQuote il il' = [il, il']
isSpacy :: Inline -> Bool
isSpacy Space = True
isSpacy SoftBreak = True
isSpacy _ = False
mvPunct :: Bool -> Style -> [Inline] -> [Inline]
mvPunct moveNotes sty (x : Space : xs)
| isSpacy x = x : mvPunct moveNotes sty xs
mvPunct moveNotes sty (q : s : x : ys)
| isSpacy s
, isNote x
, startWithPunct ys
= if moveNotes
then mvPunct moveNotes sty $
case headInline ys of
"" -> q : x : tailInline ys
w -> q : Str w : x : tailInline ys
else q : x : mvPunct moveNotes sty ys
mvPunct moveNotes sty (Cite cs ils : ys)
| length ils > 1
, isNote (last ils)
, startWithPunct ys
, moveNotes
= Cite cs
(init ils ++
(case headInline ys of
"" -> []
s' | not (endWithPunct False (init ils)) -> [Str s']
| otherwise -> [])
++ [last ils]) : mvPunct moveNotes sty (tailInline ys)
mvPunct moveNotes sty (q@(Quoted _ _) : w@(Str _) : x : ys)
| isNote x
, isPunctuationInQuote sty
, moveNotes
= mvPunctInsideQuote q w ++ (x : mvPunct moveNotes sty ys)
mvPunct moveNotes sty (s : x : ys) | isSpacy s, isNote x =
x : mvPunct moveNotes sty ys
mvPunct moveNotes sty (s : x@(Cite _ (Superscript _ : _)) : ys)
| isSpacy s = x : mvPunct moveNotes sty ys
mvPunct moveNotes sty (Cite cs ils : Str "." : ys)
| lastInline ils == "."
= Cite cs ils : mvPunct moveNotes sty ys
mvPunct moveNotes sty (x:xs) = x : mvPunct moveNotes sty xs
mvPunct _ _ [] = []
endWithPunct :: Bool -> [Inline] -> Bool
endWithPunct _ [] = True
endWithPunct onlyFinal xs@(_:_) =
case reverse (stringify xs) of
[] -> True
(d:c:_) | isPunctuation d
&& not onlyFinal
&& isEndPunct c -> True
(c:_) | isEndPunct c -> True
| otherwise -> False
where isEndPunct c = c `elem` (".,;:!?" :: String)
startWithPunct :: [Inline] -> Bool
startWithPunct = all (`elem` (".,;:!?" :: String)) . headInline
deNote :: Pandoc -> Pandoc
deNote = topDown go
where go (Cite (c:cs) [Note [Para xs]]) =
Cite (c:cs) [Note [Para $ specialSpan (c:cs) : toCapital xs]]
go (Note xs) = Note $ topDown go' xs
go x = x
specialSpan cs =
Span ("",["reference-id-list"],
[("refids", unwords (map citationId cs))]) []
go' (Str "(" : Cite cs [Note [Para xs]] : Str ")" : ys) =
Str "(" : Cite cs xs : Str ")" : ys
go' (x : Cite cs [Note [Para xs]] : ys) | not (isSpacy x) =
x : Str "," : Space : comb (\zs -> [Cite cs zs]) xs ys
go' (Str "(" : Note [Para xs] : Str ")" : ys) =
Str "(" : xs ++ (Str ")" : ys)
go' (x : Note [Para xs] : ys) | not (isSpacy x) =
x : Str "," : Space : comb id xs ys
go' (Cite cs [Note [Para xs]] : ys) = comb (\zs -> [Cite cs zs]) xs ys
go' (Note [Para xs] : ys) = comb id xs ys
go' xs = xs
comb :: ([Inline] -> [Inline]) -> [Inline] -> [Inline] -> [Inline]
comb f xs ys =
let xs' = if startWithPunct ys && endWithPunct True xs
then initInline $ removeLeadingPunct xs
else removeLeadingPunct xs
removeLeadingPunct (Str [c] : s : zs)
| isSpacy s && (c == ',' || c == '.' || c == ':') = zs
removeLeadingPunct zs = zs
in f xs' ++ ys
getCitation :: Inline -> [[Citation]]
getCitation i | Cite t _ <- i = [t]
| otherwise = []
getCitationIds :: Inline -> Set.Set String
getCitationIds (Cite cs _) = Set.fromList (map citationId cs)
getCitationIds _ = mempty
setHashes :: Inline -> State Int Inline
setHashes i | Cite t ils <- i = do t' <- mapM setHash t
return $ Cite t' ils
| otherwise = return i
setHash :: Citation -> State Int Citation
setHash c = do
ident <- get
put $ ident + 1
return c{ citationHash = ident }
toCslCite :: LocatorMap -> Citation -> CSL.Cite
toCslCite locMap c
= let (la, lo, s) = locatorWords locMap $ citationSuffix c
s' = case (la,lo,s) of
("","",x:_)
| not (isPunct x) -> Space : s
_ -> s
isPunct (Str (x:_)) = isPunctuation x
isPunct _ = False
in emptyCite { CSL.citeId = citationId c
, CSL.citePrefix = Formatted $ citationPrefix c
, CSL.citeSuffix = Formatted s'
, CSL.citeLabel = la
, CSL.citeLocator = lo
, CSL.citeNoteNumber = show $ citationNoteNum c
, CSL.authorInText = citationMode c == AuthorInText
, CSL.suppressAuthor = citationMode c == SuppressAuthor
, CSL.citeHash = citationHash c
}
splitInp :: [Inline] -> [Inline]
splitInp = splitStrWhen (\c -> splitOn c || isSpace c)
where
splitOn ':' = False
splitOn c = isPunctuation c
locatorWords :: LocatorMap -> [Inline] -> (String, String, [Inline])
locatorWords locMap inp =
case parse (pLocatorWords locMap) "suffix" $ splitInp inp of
Right r -> r
Left _ -> ("","",inp)
pLocatorWords :: LocatorMap -> Parsec [Inline] st (String, String, [Inline])
pLocatorWords locMap = do
optional $ pMatchChar "," (== ',')
optional pSpace
(la, lo) <- pLocatorDelimited locMap <|> pLocatorIntegrated locMap
s <- getInput
return (la, trim lo, s)
pLocatorDelimited :: LocatorMap -> Parsec [Inline] st (String, String)
pLocatorDelimited locMap = try $ do
_ <- pMatchChar "{" (== '{')
skipMany pSpace
(la, _) <- pLocatorLabelDelimited locMap
let inner = do { t <- anyToken; return (True, stringify t) }
gs <- many (pBalancedBraces [('{','}'), ('[',']')] inner)
_ <- pMatchChar "}" (== '}')
let lo = concatMap snd gs
return (la, lo)
pLocatorLabelDelimited :: LocatorMap -> Parsec [Inline] st (String, Bool)
pLocatorLabelDelimited locMap
= pLocatorLabel' locMap lim <|> return ("page", True)
where
lim = stringify <$> anyToken
pLocatorIntegrated :: LocatorMap -> Parsec [Inline] st (String, String)
pLocatorIntegrated locMap = try $ do
(la, wasImplicit) <- pLocatorLabelIntegrated locMap
let modifier = if wasImplicit
then requireDigits
else requireRomansOrDigits
g <- try $ pLocatorWordIntegrated (not wasImplicit) >>= modifier
gs <- many (try $ pLocatorWordIntegrated False >>= modifier)
let lo = concat (g:gs)
return (la, lo)
pLocatorLabelIntegrated :: LocatorMap -> Parsec [Inline] st (String, Bool)
pLocatorLabelIntegrated locMap
= pLocatorLabel' locMap lim <|> (lookAhead digital >> return ("page", True))
where
lim = try $ pLocatorWordIntegrated True >>= requireRomansOrDigits
digital = try $ pLocatorWordIntegrated True >>= requireDigits
pLocatorLabel' :: LocatorMap -> Parsec [Inline] st String -> Parsec [Inline] st (String, Bool)
pLocatorLabel' locMap lim = go ""
where
go acc = try $ do
t <- anyToken
ts <- manyTill anyToken (try $ lookAhead lim)
let s = acc ++ stringify (t:ts)
case M.lookup (trim s) locMap of
Just l -> go s <|> return (l, False)
Nothing -> go s
requireDigits :: (Bool, String) -> Parsec [Inline] st String
requireDigits (_, s) = if not (any isDigit s)
then fail "requireDigits"
else return s
requireRomansOrDigits :: (Bool, String) -> Parsec [Inline] st String
requireRomansOrDigits (d, s) = if not d
then fail "requireRomansOrDigits"
else return s
pLocatorWordIntegrated :: Bool -> Parsec [Inline] st (Bool, String)
pLocatorWordIntegrated isFirst = try $ do
punct <- if isFirst
then return ""
else (stringify <$> pLocatorSep) <|> return ""
sp <- option "" (pSpace >> return " ")
(dig, s) <- pBalancedBraces [('(',')'), ('[',']'), ('{','}')] pPageSeq
return (dig, punct ++ sp ++ s)
pBalancedBraces :: [(Char, Char)] -> Parsec [Inline] st (Bool, String) -> Parsec [Inline] st (Bool, String)
pBalancedBraces braces p = try $ do
ss <- many1 surround
return $ anyWereDigitLike ss
where
except = notFollowedBy pBraces >> p
surround = foldl (\a (open, close) -> sur open close except <|> a) except braces
isc c = stringify <$> pMatchChar [c] (== c)
sur c c' m = try $ do
(d, mid) <- between (isc c) (isc c') (option (False, "") m)
return (d, [c] ++ mid ++ [c'])
flattened = concatMap (\(o, c) -> [o, c]) braces
pBraces = pMatchChar "braces" (`elem` flattened)
pPageSeq :: Parsec [Inline] st (Bool, String)
pPageSeq = oneDotTwo <|> withPeriod
where
oneDotTwo = do
u <- pPageUnit
us <- many withPeriod
return $ anyWereDigitLike (u:us)
withPeriod = try $ do
p <- pMatchChar "." (== '.')
u <- try pPageUnit
return (fst u, stringify p ++ snd u)
anyWereDigitLike :: [(Bool, String)] -> (Bool, String)
anyWereDigitLike as = (any fst as, concatMap snd as)
pPageUnit :: Parsec [Inline] st (Bool, String)
pPageUnit = roman <|> plainUnit
where
roman = (True,) <$> pRoman
plainUnit = do
ts <- many1 (notFollowedBy pSpace >>
notFollowedBy pLocatorPunct >>
anyToken)
let s = stringify ts
return (any isDigit s, s)
pRoman :: Parsec [Inline] st String
pRoman = try $ do
t <- anyToken
case t of
Str xs -> case parseRomanNumeral xs of
Nothing -> mzero
Just _ -> return xs
_ -> mzero
isLocatorPunct :: Char -> Bool
isLocatorPunct '-' = False
isLocatorPunct '–' = False
isLocatorPunct ':' = False
isLocatorPunct c = isPunctuation c
pLocatorPunct :: Parsec [Inline] st Inline
pLocatorPunct = pMatchChar "punctuation" isLocatorPunct
pLocatorSep :: Parsec [Inline] st Inline
pLocatorSep = pMatchChar "locator separator" isLocatorSep
isLocatorSep :: Char -> Bool
isLocatorSep ',' = True
isLocatorSep ';' = True
isLocatorSep _ = False
pMatchChar :: String -> (Char -> Bool) -> Parsec [Inline] st Inline
pMatchChar msg f = pMatch msg mc
where
mc (Str [c]) = f c
mc _ = False
pSpace :: Parsec [Inline] st Inline
pSpace = pMatch "' '" (\t -> isSpacy t || t == Str "\160")
pMatch :: String -> (Inline -> Bool) -> Parsec [Inline] st Inline
pMatch msg condition = try $ do
t <- anyToken
if not (condition t)
then fail msg
else return t
type LocatorMap = M.Map String String
locatorMap :: Style -> LocatorMap
locatorMap sty =
foldr (\term -> M.insert (termSingular term) (cslTerm term)
. M.insert (termPlural term) (cslTerm term))
M.empty
(concatMap localeTerms $ styleLocale sty)