{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings #-}
{- |
   Module      : Text.Pandoc.Sources
   Copyright   : Copyright (C) 2021-2024 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Defines Sources object to be used as input to pandoc parsers and redefines Char
parsers so they get source position information from it.
-}

module Text.Pandoc.Sources
  ( Sources(..)
  , ToSources(..)
  , UpdateSourcePos(..)
  , sourcesToText
  , initialSourceName
  , addToSources
  , ensureFinalNewlines
  , addToInput
  , satisfy
  , oneOf
  , noneOf
  , anyChar
  , char
  , string
  , newline
  , space
  , spaces
  , letter
  , digit
  , hexDigit
  , alphaNum
  )
where
import qualified Text.Parsec as P
import Text.Parsec (Stream(..), ParsecT)
import Text.Parsec.Pos as P
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BL
import Data.Char (isSpace, isLetter, isAlphaNum, isDigit, isHexDigit)
import Data.String (IsString(..))
import qualified Data.List.NonEmpty as NonEmpty

-- | A list of inputs labeled with source positions.  It is assumed
-- that the 'Text's have @\n@ line endings.
newtype Sources = Sources { Sources -> [(SourcePos, Text)]
unSources :: [(SourcePos, Text)] }
  deriving (Int -> Sources -> ShowS
[Sources] -> ShowS
Sources -> String
(Int -> Sources -> ShowS)
-> (Sources -> String) -> ([Sources] -> ShowS) -> Show Sources
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Sources -> ShowS
showsPrec :: Int -> Sources -> ShowS
$cshow :: Sources -> String
show :: Sources -> String
$cshowList :: [Sources] -> ShowS
showList :: [Sources] -> ShowS
Show, NonEmpty Sources -> Sources
Sources -> Sources -> Sources
(Sources -> Sources -> Sources)
-> (NonEmpty Sources -> Sources)
-> (forall b. Integral b => b -> Sources -> Sources)
-> Semigroup Sources
forall b. Integral b => b -> Sources -> Sources
forall a.
(a -> a -> a)
-> (NonEmpty a -> a)
-> (forall b. Integral b => b -> a -> a)
-> Semigroup a
$c<> :: Sources -> Sources -> Sources
<> :: Sources -> Sources -> Sources
$csconcat :: NonEmpty Sources -> Sources
sconcat :: NonEmpty Sources -> Sources
$cstimes :: forall b. Integral b => b -> Sources -> Sources
stimes :: forall b. Integral b => b -> Sources -> Sources
Semigroup, Semigroup Sources
Sources
Semigroup Sources =>
Sources
-> (Sources -> Sources -> Sources)
-> ([Sources] -> Sources)
-> Monoid Sources
[Sources] -> Sources
Sources -> Sources -> Sources
forall a.
Semigroup a =>
a -> (a -> a -> a) -> ([a] -> a) -> Monoid a
$cmempty :: Sources
mempty :: Sources
$cmappend :: Sources -> Sources -> Sources
mappend :: Sources -> Sources -> Sources
$cmconcat :: [Sources] -> Sources
mconcat :: [Sources] -> Sources
Monoid)

instance Monad m => Stream Sources m Char where
  uncons :: Sources -> m (Maybe (Char, Sources))
uncons (Sources []) = Maybe (Char, Sources) -> m (Maybe (Char, Sources))
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe (Char, Sources)
forall a. Maybe a
Nothing
  uncons (Sources ((SourcePos
pos,Text
t):[(SourcePos, Text)]
rest)) =
    case Text -> Maybe (Char, Text)
T.uncons Text
t of
      Maybe (Char, Text)
Nothing -> Sources -> m (Maybe (Char, Sources))
forall s (m :: * -> *) t. Stream s m t => s -> m (Maybe (t, s))
uncons ([(SourcePos, Text)] -> Sources
Sources [(SourcePos, Text)]
rest)
      Just (Char
c,Text
t') -> Maybe (Char, Sources) -> m (Maybe (Char, Sources))
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe (Char, Sources) -> m (Maybe (Char, Sources)))
-> Maybe (Char, Sources) -> m (Maybe (Char, Sources))
forall a b. (a -> b) -> a -> b
$ (Char, Sources) -> Maybe (Char, Sources)
forall a. a -> Maybe a
Just (Char
c, [(SourcePos, Text)] -> Sources
Sources ((SourcePos
pos,Text
t')(SourcePos, Text) -> [(SourcePos, Text)] -> [(SourcePos, Text)]
forall a. a -> [a] -> [a]
:[(SourcePos, Text)]
rest))

instance IsString Sources where
  fromString :: String -> Sources
fromString String
s = [(SourcePos, Text)] -> Sources
Sources [(String -> SourcePos
P.initialPos String
"", String -> Text
T.pack ((Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'\r') String
s))]

class ToSources a where
  toSources :: a -> Sources

instance ToSources Text where
  toSources :: Text -> Sources
toSources Text
t = [(SourcePos, Text)] -> Sources
Sources [(String -> SourcePos
P.initialPos String
"", (Char -> Bool) -> Text -> Text
T.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'\r') Text
t)]

instance ToSources [(FilePath, Text)] where
  toSources :: [(String, Text)] -> Sources
toSources = [(SourcePos, Text)] -> Sources
Sources
            ([(SourcePos, Text)] -> Sources)
-> ([(String, Text)] -> [(SourcePos, Text)])
-> [(String, Text)]
-> Sources
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((String, Text) -> (SourcePos, Text))
-> [(String, Text)] -> [(SourcePos, Text)]
forall a b. (a -> b) -> [a] -> [b]
map (\(String
fp,Text
t) ->
                    (String -> SourcePos
P.initialPos String
fp, Text -> Char -> Text
T.snoc ((Char -> Bool) -> Text -> Text
T.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'\r') Text
t) Char
'\n'))

instance ToSources Sources where
  toSources :: Sources -> Sources
toSources = Sources -> Sources
forall a. a -> a
id

sourcesToText :: Sources -> Text
sourcesToText :: Sources -> Text
sourcesToText (Sources [(SourcePos, Text)]
xs) = [Text] -> Text
forall a. Monoid a => [a] -> a
mconcat ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ ((SourcePos, Text) -> Text) -> [(SourcePos, Text)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (SourcePos, Text) -> Text
forall a b. (a, b) -> b
snd [(SourcePos, Text)]
xs

addToSources :: Monad m => SourcePos -> Text -> ParsecT Sources u m ()
addToSources :: forall (m :: * -> *) u.
Monad m =>
SourcePos -> Text -> ParsecT Sources u m ()
addToSources SourcePos
pos Text
t = do
  SourcePos
curpos <- ParsecT Sources u m SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
P.getPosition
  Sources [(SourcePos, Text)]
xs <- ParsecT Sources u m Sources
forall (m :: * -> *) s u. Monad m => ParsecT s u m s
P.getInput
  let xs' :: [(SourcePos, Text)]
xs' = case [(SourcePos, Text)]
xs of
               [] -> []
               ((SourcePos
_,Text
t'):[(SourcePos, Text)]
rest) -> (SourcePos
curpos,Text
t')(SourcePos, Text) -> [(SourcePos, Text)] -> [(SourcePos, Text)]
forall a. a -> [a] -> [a]
:[(SourcePos, Text)]
rest
  Sources -> ParsecT Sources u m ()
forall (m :: * -> *) s u. Monad m => s -> ParsecT s u m ()
P.setInput (Sources -> ParsecT Sources u m ())
-> Sources -> ParsecT Sources u m ()
forall a b. (a -> b) -> a -> b
$ [(SourcePos, Text)] -> Sources
Sources ((SourcePos
pos, (Char -> Bool) -> Text -> Text
T.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'\r') Text
t)(SourcePos, Text) -> [(SourcePos, Text)] -> [(SourcePos, Text)]
forall a. a -> [a] -> [a]
:[(SourcePos, Text)]
xs')

ensureFinalNewlines :: Int -- ^ number of trailing newlines
                    -> Sources
                    -> Sources
ensureFinalNewlines :: Int -> Sources -> Sources
ensureFinalNewlines Int
n (Sources [(SourcePos, Text)]
xs) =
  case [(SourcePos, Text)] -> Maybe (NonEmpty (SourcePos, Text))
forall a. [a] -> Maybe (NonEmpty a)
NonEmpty.nonEmpty [(SourcePos, Text)]
xs of
    Maybe (NonEmpty (SourcePos, Text))
Nothing -> [(SourcePos, Text)] -> Sources
Sources [(String -> SourcePos
initialPos String
"", Int -> Text -> Text
T.replicate Int
n Text
"\n")]
    Just NonEmpty (SourcePos, Text)
lst ->
      case NonEmpty (SourcePos, Text) -> (SourcePos, Text)
forall a. NonEmpty a -> a
NonEmpty.last NonEmpty (SourcePos, Text)
lst of
        (SourcePos
spos, Text
t) ->
          case Text -> Int
T.length ((Char -> Bool) -> Text -> Text
T.takeWhileEnd (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'\n') Text
t) of
            Int
len | Int
len Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
n -> [(SourcePos, Text)] -> Sources
Sources [(SourcePos, Text)]
xs
                | Bool
otherwise -> [(SourcePos, Text)] -> Sources
Sources (NonEmpty (SourcePos, Text) -> [(SourcePos, Text)]
forall a. NonEmpty a -> [a]
NonEmpty.init NonEmpty (SourcePos, Text)
lst [(SourcePos, Text)] -> [(SourcePos, Text)] -> [(SourcePos, Text)]
forall a. [a] -> [a] -> [a]
++
                                        [(SourcePos
spos,
                                          Text
t Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Int -> Text -> Text
T.replicate (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
len) Text
"\n")])

class UpdateSourcePos s c where
  updateSourcePos :: SourcePos -> c -> s -> SourcePos

instance UpdateSourcePos Text Char where
   updateSourcePos :: SourcePos -> Char -> Text -> SourcePos
updateSourcePos SourcePos
pos Char
c Text
_ = SourcePos -> Char -> SourcePos
updatePosChar SourcePos
pos Char
c

instance UpdateSourcePos [Char] Char where
   updateSourcePos :: SourcePos -> Char -> String -> SourcePos
updateSourcePos SourcePos
pos Char
c String
_ = SourcePos -> Char -> SourcePos
updatePosChar SourcePos
pos Char
c

instance UpdateSourcePos BS.ByteString Char where
   updateSourcePos :: SourcePos -> Char -> ByteString -> SourcePos
updateSourcePos SourcePos
pos Char
c ByteString
_ = SourcePos -> Char -> SourcePos
updatePosChar SourcePos
pos Char
c

instance UpdateSourcePos BL.ByteString Char where
   updateSourcePos :: SourcePos -> Char -> ByteString -> SourcePos
updateSourcePos SourcePos
pos Char
c ByteString
_ = SourcePos -> Char -> SourcePos
updatePosChar SourcePos
pos Char
c

instance UpdateSourcePos Sources Char where
   updateSourcePos :: SourcePos -> Char -> Sources -> SourcePos
updateSourcePos SourcePos
pos Char
c Sources
sources =
     case Sources
sources of
       Sources [] -> SourcePos -> Char -> SourcePos
updatePosChar SourcePos
pos Char
c
       Sources ((SourcePos
_,Text
t):(SourcePos
pos',Text
_):[(SourcePos, Text)]
_)
         | Text -> Bool
T.null Text
t  -> SourcePos
pos'
       Sources [(SourcePos, Text)]
_ ->
           case Char
c of
             Char
'\n' -> SourcePos -> Int -> SourcePos
incSourceLine (SourcePos -> Int -> SourcePos
setSourceColumn SourcePos
pos Int
1) Int
1
             Char
'\t' -> SourcePos -> Int -> SourcePos
incSourceColumn SourcePos
pos (Int
4 Int -> Int -> Int
forall a. Num a => a -> a -> a
- ((SourcePos -> Int
sourceColumn SourcePos
pos Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`mod` Int
4))
             Char
_    -> SourcePos -> Int -> SourcePos
incSourceColumn SourcePos
pos Int
1

-- | Get name of first source in 'Sources'.
initialSourceName :: Sources -> FilePath
initialSourceName :: Sources -> String
initialSourceName (Sources []) = String
""
initialSourceName (Sources ((SourcePos
pos,Text
_):[(SourcePos, Text)]
_)) = SourcePos -> String
sourceName SourcePos
pos

-- | Add some text to the beginning of the input sources.
-- This simplifies code that expands macros.
addToInput :: Monad m => Text -> ParsecT Sources u m ()
addToInput :: forall (m :: * -> *) u. Monad m => Text -> ParsecT Sources u m ()
addToInput Text
t = do
  Sources [(SourcePos, Text)]
xs <- ParsecT Sources u m Sources
forall (m :: * -> *) s u. Monad m => ParsecT s u m s
P.getInput
  case [(SourcePos, Text)]
xs of
    [] -> Sources -> ParsecT Sources u m ()
forall (m :: * -> *) s u. Monad m => s -> ParsecT s u m ()
P.setInput (Sources -> ParsecT Sources u m ())
-> Sources -> ParsecT Sources u m ()
forall a b. (a -> b) -> a -> b
$ [(SourcePos, Text)] -> Sources
Sources [(String -> SourcePos
initialPos String
"",Text
t)]
    (SourcePos
pos,Text
t'):[(SourcePos, Text)]
rest -> Sources -> ParsecT Sources u m ()
forall (m :: * -> *) s u. Monad m => s -> ParsecT s u m ()
P.setInput (Sources -> ParsecT Sources u m ())
-> Sources -> ParsecT Sources u m ()
forall a b. (a -> b) -> a -> b
$ [(SourcePos, Text)] -> Sources
Sources ((SourcePos
pos, Text
t Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
t')(SourcePos, Text) -> [(SourcePos, Text)] -> [(SourcePos, Text)]
forall a. a -> [a] -> [a]
:[(SourcePos, Text)]
rest)

-- We need to redefine the parsers in Text.Parsec.Char so that they
-- update source positions properly from the Sources stream.

satisfy :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
         => (Char -> Bool) -> ParsecT s u m Char
satisfy :: forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
(Char -> Bool) -> ParsecT s u m Char
satisfy Char -> Bool
f = (Char -> String)
-> (SourcePos -> Char -> s -> SourcePos)
-> (Char -> Maybe Char)
-> ParsecT s u m Char
forall s (m :: * -> *) t a u.
Stream s m t =>
(t -> String)
-> (SourcePos -> t -> s -> SourcePos)
-> (t -> Maybe a)
-> ParsecT s u m a
P.tokenPrim Char -> String
forall a. Show a => a -> String
show SourcePos -> Char -> s -> SourcePos
forall s c. UpdateSourcePos s c => SourcePos -> c -> s -> SourcePos
updateSourcePos Char -> Maybe Char
matcher
 where
  matcher :: Char -> Maybe Char
matcher !Char
c = if Char -> Bool
f Char
c then Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c else Maybe Char
forall a. Maybe a
Nothing

oneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
      => [Char] -> ParsecT s u m Char
oneOf :: forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
String -> ParsecT s u m Char
oneOf String
cs = (Char -> Bool) -> ParsecT s u m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
cs)

noneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
       => [Char] -> ParsecT s u m Char
noneOf :: forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
String -> ParsecT s u m Char
noneOf String
cs = (Char -> Bool) -> ParsecT s u m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` String
cs)

anyChar :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
        => ParsecT s u m Char
anyChar :: forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m Char
anyChar = (Char -> Bool) -> ParsecT s u m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (Bool -> Char -> Bool
forall a b. a -> b -> a
const Bool
True)

char :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
     => Char -> ParsecT s u m Char
char :: forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
c = (Char -> Bool) -> ParsecT s u m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c)

string :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
       => [Char] -> ParsecT s u m [Char]
string :: forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
String -> ParsecT s u m String
string = (Char -> ParsecT s u m Char) -> String -> ParsecT s u m String
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM Char -> ParsecT s u m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char

newline :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
        => ParsecT s u m Char
newline :: forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m Char
newline = (Char -> Bool) -> ParsecT s u m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n')

space :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
      => ParsecT s u m Char
space :: forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m Char
space = (Char -> Bool) -> ParsecT s u m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
(Char -> Bool) -> ParsecT s u m Char
satisfy Char -> Bool
isSpace

spaces :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
       => ParsecT s u m ()
spaces :: forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m ()
spaces = ParsecT s u m Char -> ParsecT s u m ()
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
P.skipMany ParsecT s u m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m Char
space ParsecT s u m () -> String -> ParsecT s u m ()
forall s u (m :: * -> *) a.
ParsecT s u m a -> String -> ParsecT s u m a
P.<?> String
"white space"

letter :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
       => ParsecT s u m Char
letter :: forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m Char
letter = (Char -> Bool) -> ParsecT s u m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
(Char -> Bool) -> ParsecT s u m Char
satisfy Char -> Bool
isLetter

alphaNum :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
         => ParsecT s u m Char
alphaNum :: forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m Char
alphaNum = (Char -> Bool) -> ParsecT s u m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
(Char -> Bool) -> ParsecT s u m Char
satisfy Char -> Bool
isAlphaNum

digit :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
      => ParsecT s u m Char
digit :: forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m Char
digit = (Char -> Bool) -> ParsecT s u m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
(Char -> Bool) -> ParsecT s u m Char
satisfy Char -> Bool
isDigit

hexDigit :: (Monad m, Stream s m Char, UpdateSourcePos s Char)
         => ParsecT s u m Char
hexDigit :: forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m Char
hexDigit = (Char -> Bool) -> ParsecT s u m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
(Char -> Bool) -> ParsecT s u m Char
satisfy Char -> Bool
isHexDigit