{-# LANGUAGE OverloadedStrings #-}
{- |
   Module      : Text.Pandoc.Highlighting
   Copyright   : Copyright (C) 2008-2024 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Exports functions for syntax highlighting.
-}

module Text.Pandoc.Highlighting ( highlightingStyles
                                , languages
                                , languagesByExtension
                                , highlight
                                -- * Formats
                                -- ** LaTeX
                                , formatLaTeXInline
                                , formatLaTeXBlock
                                , styleToLaTeX
                                -- ** HTML
                                , formatHtmlInline
                                , formatHtmlBlock
                                , formatHtml4Block
                                , styleToCss
                                -- ** ConTeXt
                                , formatConTeXtInline
                                , formatConTeXtBlock
                                , styleToConTeXt
                                -- * Styles
                                , pygments
                                , espresso
                                , zenburn
                                , tango
                                , kate
                                , monochrome
                                , breezeDark
                                , haddock
                                , Style
                                , lookupHighlightingStyle
                                , fromListingsLanguage
                                , toListingsLanguage
                                ) where
import Control.Monad
import qualified Data.Map as M
import Data.Maybe (fromMaybe)
import qualified Data.Text as T
import Skylighting
import Text.Pandoc.Definition
import Text.Pandoc.Class (PandocMonad, readFileLazy)
import Text.Pandoc.Error (PandocError(..))
import Control.Monad.Except (throwError)
import System.FilePath (takeExtension)
import Text.Pandoc.Shared (safeRead)

highlightingStyles :: [(T.Text, Style)]
highlightingStyles :: [(Text, Style)]
highlightingStyles =
  [(Text
"pygments", Style
pygments),
   (Text
"tango", Style
tango),
   (Text
"espresso", Style
espresso),
   (Text
"zenburn", Style
zenburn),
   (Text
"kate", Style
kate),
   (Text
"monochrome", Style
monochrome),
   (Text
"breezedark", Style
breezeDark),
   (Text
"haddock", Style
haddock)]

languages :: SyntaxMap -> [T.Text]
languages :: SyntaxMap -> [Text]
languages SyntaxMap
syntaxmap = [Text -> Text
T.toLower (Syntax -> Text
sName Syntax
s) | Syntax
s <- SyntaxMap -> [Syntax]
forall k a. Map k a -> [a]
M.elems SyntaxMap
syntaxmap]

languagesByExtension :: SyntaxMap -> T.Text -> [T.Text]
languagesByExtension :: SyntaxMap -> Text -> [Text]
languagesByExtension SyntaxMap
syntaxmap Text
ext =
  [Text -> Text
T.toLower (Syntax -> Text
sName Syntax
s) | Syntax
s <- SyntaxMap -> String -> [Syntax]
syntaxesByExtension SyntaxMap
syntaxmap (Text -> String
T.unpack Text
ext)]

highlight :: SyntaxMap
          -> (FormatOptions -> [SourceLine] -> a) -- ^ Formatter
          -> Attr   -- ^ Attributes of the CodeBlock
          -> T.Text -- ^ Raw contents of the CodeBlock
          -> Either T.Text a
highlight :: forall a.
SyntaxMap
-> (FormatOptions -> [SourceLine] -> a)
-> Attr
-> Text
-> Either Text a
highlight SyntaxMap
syntaxmap FormatOptions -> [SourceLine] -> a
formatter (Text
ident, [Text]
classes, [(Text, Text)]
keyvals) Text
rawCode =
  let firstNum :: Int
firstNum = Int -> Maybe Int -> Int
forall a. a -> Maybe a -> a
fromMaybe Int
1 (Text -> Maybe Int
forall (m :: * -> *) a. (MonadPlus m, Read a) => Text -> m a
safeRead (Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"1" (Maybe Text -> Text) -> Maybe Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> [(Text, Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
"startFrom" [(Text, Text)]
keyvals))
      fmtOpts :: FormatOptions
fmtOpts = FormatOptions
defaultFormatOpts{
                  startNumber = firstNum,
                  lineAnchors = any (`elem`
                        ["line-anchors", "lineAnchors"]) classes,
                  numberLines = any (`elem`
                        ["number","numberLines", "number-lines"]) classes,
                  lineIdPrefix = if T.null ident
                                    then mempty
                                    else ident <> "-" }
      tokenizeOpts :: TokenizerConfig
tokenizeOpts = TokenizerConfig{ syntaxMap :: SyntaxMap
syntaxMap = SyntaxMap
syntaxmap
                                    , traceOutput :: Bool
traceOutput = Bool
False }
  in  case [Maybe Syntax] -> Maybe Syntax
forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, MonadPlus m) =>
t (m a) -> m a
msum ((Text -> Maybe Syntax) -> [Text] -> [Maybe Syntax]
forall a b. (a -> b) -> [a] -> [b]
map (Text -> SyntaxMap -> Maybe Syntax
`lookupSyntax` SyntaxMap
syntaxmap) [Text]
classes) of
            Maybe Syntax
Nothing
              | FormatOptions -> Bool
numberLines FormatOptions
fmtOpts -> a -> Either Text a
forall a b. b -> Either a b
Right
                              (a -> Either Text a) -> a -> Either Text a
forall a b. (a -> b) -> a -> b
$ FormatOptions -> [SourceLine] -> a
formatter FormatOptions
fmtOpts{ codeClasses = [],
                                                   containerClasses = classes }
                              ([SourceLine] -> a) -> [SourceLine] -> a
forall a b. (a -> b) -> a -> b
$ (Text -> SourceLine) -> [Text] -> [SourceLine]
forall a b. (a -> b) -> [a] -> [b]
map (\Text
ln -> [(TokenType
NormalTok, Text
ln)])
                              ([Text] -> [SourceLine]) -> [Text] -> [SourceLine]
forall a b. (a -> b) -> a -> b
$ Text -> [Text]
T.lines Text
rawCode
              | Bool
otherwise  -> Text -> Either Text a
forall a b. a -> Either a b
Left Text
""
            Just Syntax
syntax  -> (String -> Either Text a)
-> (a -> Either Text a) -> Either String a -> Either Text a
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Text -> Either Text a
forall a b. a -> Either a b
Left (Text -> Either Text a)
-> (String -> Text) -> String -> Either Text a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack) a -> Either Text a
forall a b. b -> Either a b
Right (Either String a -> Either Text a)
-> Either String a -> Either Text a
forall a b. (a -> b) -> a -> b
$
              FormatOptions -> [SourceLine] -> a
formatter FormatOptions
fmtOpts{ codeClasses =
                                   [T.toLower (sShortname syntax)],
                                  containerClasses = classes } ([SourceLine] -> a)
-> Either String [SourceLine] -> Either String a
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
                TokenizerConfig -> Syntax -> Text -> Either String [SourceLine]
tokenize TokenizerConfig
tokenizeOpts Syntax
syntax Text
rawCode

-- Functions for correlating latex listings package's language names
-- with skylighting language names:

langToListingsMap :: M.Map T.Text T.Text
langToListingsMap :: Map Text Text
langToListingsMap = [(Text, Text)] -> Map Text Text
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList [(Text, Text)]
langsList

listingsToLangMap :: M.Map T.Text T.Text
listingsToLangMap :: Map Text Text
listingsToLangMap = [(Text, Text)] -> Map Text Text
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList ([(Text, Text)] -> Map Text Text)
-> [(Text, Text)] -> Map Text Text
forall a b. (a -> b) -> a -> b
$ ((Text, Text) -> (Text, Text)) -> [(Text, Text)] -> [(Text, Text)]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Text) -> (Text, Text)
forall {b} {a}. (b, a) -> (a, b)
switch [(Text, Text)]
langsList
  where switch :: (b, a) -> (a, b)
switch (b
a,a
b) = (a
b,b
a)

langsList :: [(T.Text, T.Text)]
langsList :: [(Text, Text)]
langsList =
  [(Text
"abap",Text
"ABAP"),
  (Text
"acm",Text
"ACM"),
  (Text
"acmscript",Text
"ACMscript"),
  (Text
"acsl",Text
"ACSL"),
  (Text
"ada",Text
"Ada"),
  (Text
"algol",Text
"Algol"),
  (Text
"ant",Text
"Ant"),
  (Text
"assembler",Text
"Assembler"),
  (Text
"gnuassembler",Text
"Assembler"),
  (Text
"awk",Text
"Awk"),
  (Text
"bash",Text
"bash"),
  (Text
"monobasic",Text
"Basic"),
  (Text
"purebasic",Text
"Basic"),
  (Text
"c",Text
"C"),
  (Text
"cs",Text
"C"),
  (Text
"objectivec",Text
"C"),
  (Text
"cpp",Text
"C++"),
  (Text
"c++",Text
"C++"),
  (Text
"ocaml",Text
"Caml"),
  (Text
"cil",Text
"CIL"),
  (Text
"clean",Text
"Clean"),
  (Text
"cobol",Text
"Cobol"),
  (Text
"comal80",Text
"Comal80"),
  (Text
"command.com",Text
"command.com"),
  (Text
"comsol",Text
"Comsol"),
  (Text
"csh",Text
"csh"),
  (Text
"delphi",Text
"Delphi"),
  (Text
"eiffel",Text
"Eiffel"),
  (Text
"elan",Text
"Elan"),
  (Text
"elisp",Text
"elisp"),
  (Text
"erlang",Text
"erlang"),
  (Text
"euphoria",Text
"Euphoria"),
  (Text
"fortran",Text
"Fortran"),
  (Text
"gap",Text
"GAP"),
  (Text
"gcl",Text
"GCL"),
  (Text
"gnuplot",Text
"Gnuplot"),
  (Text
"go",Text
"Go"),
  (Text
"hansl",Text
"hansl"),
  (Text
"haskell",Text
"Haskell"),
  (Text
"html",Text
"HTML"),
  (Text
"idl",Text
"IDL"),
  (Text
"inform",Text
"inform"),
  (Text
"java",Text
"Java"),
  (Text
"jvmis",Text
"JVMIS"),
  (Text
"ksh",Text
"ksh"),
  (Text
"lingo",Text
"Lingo"),
  (Text
"lisp",Text
"Lisp"),
  (Text
"commonlisp",Text
"Lisp"),
  (Text
"llvm",Text
"LLVM"),
  (Text
"logo",Text
"Logo"),
  (Text
"lua",Text
"Lua"),
  (Text
"make",Text
"make"),
  (Text
"makefile",Text
"make"),
  (Text
"mathematica",Text
"Mathematica"),
  (Text
"matlab",Text
"Matlab"),
  (Text
"mercury",Text
"Mercury"),
  (Text
"metapost",Text
"MetaPost"),
  (Text
"miranda",Text
"Miranda"),
  (Text
"mizar",Text
"Mizar"),
  (Text
"ml",Text
"ML"),
  (Text
"modula2",Text
"Modula-2"),
  (Text
"mupad",Text
"MuPAD"),
  (Text
"nastran",Text
"NASTRAN"),
  (Text
"oberon2",Text
"Oberon-2"),
  (Text
"ocl",Text
"OCL"),
  (Text
"octave",Text
"Octave"),
  (Text
"oorexx",Text
"OORexx"),
  (Text
"oz",Text
"Oz"),
  (Text
"pascal",Text
"Pascal"),
  (Text
"perl",Text
"Perl"),
  (Text
"php",Text
"PHP"),
  (Text
"pli",Text
"PL/I"),
  (Text
"plasm",Text
"Plasm"),
  (Text
"postscript",Text
"PostScript"),
  (Text
"pov",Text
"POV"),
  (Text
"prolog",Text
"Prolog"),
  (Text
"promela",Text
"Promela"),
  (Text
"pstricks",Text
"PSTricks"),
  (Text
"python",Text
"Python"),
  (Text
"r",Text
"R"),
  (Text
"reduce",Text
"Reduce"),
  (Text
"rexx",Text
"Rexx"),
  (Text
"rsl",Text
"RSL"),
  (Text
"ruby",Text
"Ruby"),
  (Text
"s",Text
"S"),
  (Text
"sas",Text
"SAS"),
  (Text
"scala",Text
"Scala"),
  (Text
"scilab",Text
"Scilab"),
  (Text
"sh",Text
"sh"),
  (Text
"shelxl",Text
"SHELXL"),
  (Text
"simula",Text
"Simula"),
  (Text
"sparql",Text
"SPARQL"),
  (Text
"sql",Text
"SQL"),
  (Text
"swift",Text
"Swift"),
  (Text
"tcl",Text
"tcl"),
  (Text
"tex",Text
"TeX"),
  (Text
"latex",Text
"TeX"),
  (Text
"vbscript",Text
"VBScript"),
  (Text
"verilog",Text
"Verilog"),
  (Text
"vhdl",Text
"VHDL"),
  (Text
"vrml",Text
"VRML"),
  (Text
"xml",Text
"XML"),
  (Text
"xslt",Text
"XSLT")]

-- | Determine listings language name from skylighting language name.
toListingsLanguage :: T.Text -> Maybe T.Text
toListingsLanguage :: Text -> Maybe Text
toListingsLanguage Text
lang = Text -> Map Text Text -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup (Text -> Text
T.toLower Text
lang) Map Text Text
langToListingsMap

-- | Determine skylighting language name from listings language name.
fromListingsLanguage :: T.Text -> Maybe T.Text
fromListingsLanguage :: Text -> Maybe Text
fromListingsLanguage Text
lang = Text -> Map Text Text -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Text
lang Map Text Text
listingsToLangMap

-- | Lookup style from a name. If the name is a standard style,
-- load it; if it ends in ".theme", attempt to load a KDE theme
-- from the file path specified.
lookupHighlightingStyle :: PandocMonad m => String -> m Style
lookupHighlightingStyle :: forall (m :: * -> *). PandocMonad m => String -> m Style
lookupHighlightingStyle String
s
  | String -> String
takeExtension String
s String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
".theme" = -- attempt to load KDE theme
    do ByteString
contents <- String -> m ByteString
forall (m :: * -> *). PandocMonad m => String -> m ByteString
readFileLazy String
s
       case ByteString -> Either String Style
parseTheme ByteString
contents of
            Left String
_    -> PandocError -> m Style
forall a. PandocError -> m a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (PandocError -> m Style) -> PandocError -> m Style
forall a b. (a -> b) -> a -> b
$ Text -> PandocError
PandocOptionError (Text -> PandocError) -> Text -> PandocError
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$
                           String
"Could not read highlighting theme " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
s
            Right Style
sty -> Style -> m Style
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return Style
sty
  | Bool
otherwise =
  case Text -> [(Text, Style)] -> Maybe Style
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup (Text -> Text
T.toLower (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack String
s) [(Text, Style)]
highlightingStyles of
       Just Style
sty -> Style -> m Style
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return Style
sty
       Maybe Style
Nothing  -> PandocError -> m Style
forall a. PandocError -> m a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (PandocError -> m Style) -> PandocError -> m Style
forall a b. (a -> b) -> a -> b
$ Text -> PandocError
PandocOptionError (Text -> PandocError) -> Text -> PandocError
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$
                      String
"Unknown highlight-style " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
s