{-# LANGUAGE LambdaCase          #-}
{-# LANGUAGE OverloadedStrings   #-}
{-# LANGUAGE TupleSections       #-}
{- |
   Module      : Text.Pandoc.App.Input
   Copyright   : © 2006-2024 John MacFarlane
   License     : GPL-2.0-or-later
   Maintainer  : John MacFarlane <jgm@berkeley@edu>

Read from the file system into a pandoc document.
-}
module Text.Pandoc.App.Input
  ( InputParameters (..)
  , readInput
  ) where

import Control.Monad ((>=>))
import Control.Monad.Except (throwError, catchError)
import Data.Text (Text)
import Network.URI (URI (..), parseURI)
import Text.Pandoc.Transforms (adjustLinksAndIds)
import Text.Pandoc.Class ( PandocMonad, openURL, toTextM
                         , readFileStrict, readStdinStrict, report)
import Text.Pandoc.Definition (Pandoc)
import Text.Pandoc.Error (PandocError (..))
import Text.Pandoc.Logging (LogMessage (..))
import Text.Pandoc.MIME (getCharset, MimeType)
import Text.Pandoc.Options (ReaderOptions (..))
import Text.Pandoc.Readers (Reader (..))
import Text.Pandoc.Shared (tabFilter)
import Text.Pandoc.URI (uriPathToPath)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text as T

-- | Settings specifying how and which input should be processed.
data InputParameters m = InputParameters
  { forall (m :: * -> *). InputParameters m -> Reader m
inputReader         :: Reader m
  , forall (m :: * -> *). InputParameters m -> Text
inputReaderName     :: Text
  , forall (m :: * -> *). InputParameters m -> ReaderOptions
inputReaderOptions  :: ReaderOptions
  , forall (m :: * -> *). InputParameters m -> [FilePath]
inputSources        :: [FilePath]
  , forall (m :: * -> *). InputParameters m -> Maybe Int
inputSpacesPerTab   :: Maybe Int
  , forall (m :: * -> *). InputParameters m -> Bool
inputFileScope      :: Bool
  }

-- | Read all input into a pandoc document.
readInput :: PandocMonad m => InputParameters m -> m Pandoc
readInput :: forall (m :: * -> *).
PandocMonad m =>
InputParameters m -> m Pandoc
readInput InputParameters m
params = do
  let sources :: [FilePath]
sources = InputParameters m -> [FilePath]
forall (m :: * -> *). InputParameters m -> [FilePath]
inputSources InputParameters m
params
  let readerName :: Text
readerName = InputParameters m -> Text
forall (m :: * -> *). InputParameters m -> Text
inputReaderName InputParameters m
params
  let readerOpts :: ReaderOptions
readerOpts = InputParameters m -> ReaderOptions
forall (m :: * -> *). InputParameters m -> ReaderOptions
inputReaderOptions InputParameters m
params
  let convertTabs :: Text -> Text
      convertTabs :: Text -> Text
convertTabs = Int -> Text -> Text
tabFilter (Int -> Text -> Text) -> Int -> Text -> Text
forall a b. (a -> b) -> a -> b
$ case InputParameters m -> Maybe Int
forall (m :: * -> *). InputParameters m -> Maybe Int
inputSpacesPerTab InputParameters m
params of
        Maybe Int
Nothing -> Int
0
        Just Int
ts -> if Text
readerName Text -> [Text] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text
"t2t", Text
"man", Text
"tsv"]
                   then Int
0
                   else Int
ts

  [(FilePath, (ByteString, Maybe Text))]
inputs <- [FilePath] -> m [(FilePath, (ByteString, Maybe Text))]
forall (m :: * -> *).
PandocMonad m =>
[FilePath] -> m [(FilePath, (ByteString, Maybe Text))]
readSources [FilePath]
sources

  case InputParameters m -> Reader m
forall (m :: * -> *). InputParameters m -> Reader m
inputReader InputParameters m
params of
    TextReader forall a. ToSources a => ReaderOptions -> a -> m Pandoc
r
      | Text
readerName Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"json" ->
          [Pandoc] -> Pandoc
forall a. Monoid a => [a] -> a
mconcat ([Pandoc] -> Pandoc) -> m [Pandoc] -> m Pandoc
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ((FilePath, (ByteString, Maybe Text)) -> m Pandoc)
-> [(FilePath, (ByteString, Maybe Text))] -> m [Pandoc]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM ((Text -> Text)
-> (FilePath, (ByteString, Maybe Text)) -> m (FilePath, Text)
forall (m :: * -> *).
PandocMonad m =>
(Text -> Text)
-> (FilePath, (ByteString, Maybe Text)) -> m (FilePath, Text)
inputToText Text -> Text
convertTabs ((FilePath, (ByteString, Maybe Text)) -> m (FilePath, Text))
-> ((FilePath, Text) -> m Pandoc)
-> (FilePath, (ByteString, Maybe Text))
-> m Pandoc
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> ReaderOptions -> [(FilePath, Text)] -> m Pandoc
forall a. ToSources a => ReaderOptions -> a -> m Pandoc
r ReaderOptions
readerOpts ([(FilePath, Text)] -> m Pandoc)
-> ((FilePath, Text) -> [(FilePath, Text)])
-> (FilePath, Text)
-> m Pandoc
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((FilePath, Text) -> [(FilePath, Text)] -> [(FilePath, Text)]
forall a. a -> [a] -> [a]
:[]))
                           [(FilePath, (ByteString, Maybe Text))]
inputs
      | InputParameters m -> Bool
forall (m :: * -> *). InputParameters m -> Bool
inputFileScope InputParameters m
params ->
          [Pandoc] -> Pandoc
forall a. Monoid a => [a] -> a
mconcat ([Pandoc] -> Pandoc) -> m [Pandoc] -> m Pandoc
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ((FilePath, (ByteString, Maybe Text)) -> m Pandoc)
-> [(FilePath, (ByteString, Maybe Text))] -> m [Pandoc]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM
              (\(FilePath, (ByteString, Maybe Text))
source -> do
                  (FilePath
fp, Text
txt) <- (Text -> Text)
-> (FilePath, (ByteString, Maybe Text)) -> m (FilePath, Text)
forall (m :: * -> *).
PandocMonad m =>
(Text -> Text)
-> (FilePath, (ByteString, Maybe Text)) -> m (FilePath, Text)
inputToText Text -> Text
convertTabs (FilePath, (ByteString, Maybe Text))
source
                  Extensions -> Text -> [Text] -> Transform
adjustLinksAndIds (ReaderOptions -> Extensions
readerExtensions ReaderOptions
readerOpts)
                    (FilePath -> Text
T.pack FilePath
fp) (((FilePath, (ByteString, Maybe Text)) -> Text)
-> [(FilePath, (ByteString, Maybe Text))] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (FilePath -> Text
T.pack (FilePath -> Text)
-> ((FilePath, (ByteString, Maybe Text)) -> FilePath)
-> (FilePath, (ByteString, Maybe Text))
-> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (FilePath, (ByteString, Maybe Text)) -> FilePath
forall a b. (a, b) -> a
fst) [(FilePath, (ByteString, Maybe Text))]
inputs)
                    Transform -> m Pandoc -> m Pandoc
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ReaderOptions -> [(FilePath, Text)] -> m Pandoc
forall a. ToSources a => ReaderOptions -> a -> m Pandoc
r ReaderOptions
readerOpts [(FilePath
fp, Text
txt)])
              [(FilePath, (ByteString, Maybe Text))]
inputs
      | Bool
otherwise -> ((FilePath, (ByteString, Maybe Text)) -> m (FilePath, Text))
-> [(FilePath, (ByteString, Maybe Text))] -> m [(FilePath, Text)]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM ((Text -> Text)
-> (FilePath, (ByteString, Maybe Text)) -> m (FilePath, Text)
forall (m :: * -> *).
PandocMonad m =>
(Text -> Text)
-> (FilePath, (ByteString, Maybe Text)) -> m (FilePath, Text)
inputToText Text -> Text
convertTabs) [(FilePath, (ByteString, Maybe Text))]
inputs m [(FilePath, Text)]
-> ([(FilePath, Text)] -> m Pandoc) -> m Pandoc
forall a b. m a -> (a -> m b) -> m b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ReaderOptions -> [(FilePath, Text)] -> m Pandoc
forall a. ToSources a => ReaderOptions -> a -> m Pandoc
r ReaderOptions
readerOpts
    ByteStringReader ReaderOptions -> ByteString -> m Pandoc
r ->
      [Pandoc] -> Pandoc
forall a. Monoid a => [a] -> a
mconcat ([Pandoc] -> Pandoc) -> m [Pandoc] -> m Pandoc
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ((FilePath, (ByteString, Maybe Text)) -> m Pandoc)
-> [(FilePath, (ByteString, Maybe Text))] -> m [Pandoc]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM (ReaderOptions -> ByteString -> m Pandoc
r ReaderOptions
readerOpts (ByteString -> m Pandoc)
-> ((FilePath, (ByteString, Maybe Text)) -> ByteString)
-> (FilePath, (ByteString, Maybe Text))
-> m Pandoc
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (FilePath, (ByteString, Maybe Text)) -> ByteString
inputToLazyByteString) [(FilePath, (ByteString, Maybe Text))]
inputs

readSources :: PandocMonad m
            => [FilePath] -> m [(FilePath, (BS.ByteString, Maybe MimeType))]
readSources :: forall (m :: * -> *).
PandocMonad m =>
[FilePath] -> m [(FilePath, (ByteString, Maybe Text))]
readSources [FilePath]
srcs =
  (FilePath -> m (FilePath, (ByteString, Maybe Text)))
-> [FilePath] -> m [(FilePath, (ByteString, Maybe Text))]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM (\FilePath
fp -> do (ByteString, Maybe Text)
t <- FilePath -> m (ByteString, Maybe Text)
forall (m :: * -> *).
PandocMonad m =>
FilePath -> m (ByteString, Maybe Text)
readSource FilePath
fp
                  (FilePath, (ByteString, Maybe Text))
-> m (FilePath, (ByteString, Maybe Text))
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (if FilePath
fp FilePath -> FilePath -> Bool
forall a. Eq a => a -> a -> Bool
== FilePath
"-" then FilePath
"" else FilePath
fp, (ByteString, Maybe Text)
t)) [FilePath]
srcs

-- | Read input from a resource, i.e., either a file, a URL, or stdin
-- (@-@).
readSource :: PandocMonad m
           => FilePath -> m (BS.ByteString, Maybe MimeType)
readSource :: forall (m :: * -> *).
PandocMonad m =>
FilePath -> m (ByteString, Maybe Text)
readSource FilePath
"-" = (,Maybe Text
forall a. Maybe a
Nothing) (ByteString -> (ByteString, Maybe Text))
-> m ByteString -> m (ByteString, Maybe Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> m ByteString
forall (m :: * -> *). PandocMonad m => m ByteString
readStdinStrict
readSource FilePath
src =
  case FilePath -> Maybe URI
parseURI FilePath
src of
    Just URI
u | URI -> FilePath
uriScheme URI
u FilePath -> [FilePath] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [FilePath
"http:",FilePath
"https:"] -> Text -> m (ByteString, Maybe Text)
forall (m :: * -> *).
PandocMonad m =>
Text -> m (ByteString, Maybe Text)
openURL (FilePath -> Text
T.pack FilePath
src)
           | URI -> FilePath
uriScheme URI
u FilePath -> FilePath -> Bool
forall a. Eq a => a -> a -> Bool
== FilePath
"file:" ->
               (,Maybe Text
forall a. Maybe a
Nothing) (ByteString -> (ByteString, Maybe Text))
-> m ByteString -> m (ByteString, Maybe Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
                 FilePath -> m ByteString
forall (m :: * -> *). PandocMonad m => FilePath -> m ByteString
readFileStrict (Text -> FilePath
uriPathToPath (Text -> FilePath) -> Text -> FilePath
forall a b. (a -> b) -> a -> b
$ FilePath -> Text
T.pack (FilePath -> Text) -> FilePath -> Text
forall a b. (a -> b) -> a -> b
$ URI -> FilePath
uriPath URI
u)
    Maybe URI
_       -> (,Maybe Text
forall a. Maybe a
Nothing) (ByteString -> (ByteString, Maybe Text))
-> m ByteString -> m (ByteString, Maybe Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> FilePath -> m ByteString
forall (m :: * -> *). PandocMonad m => FilePath -> m ByteString
readFileStrict FilePath
src

inputToText :: PandocMonad m
            => (Text -> Text)
            -> (FilePath, (BS.ByteString, Maybe MimeType))
            -> m (FilePath, Text)
inputToText :: forall (m :: * -> *).
PandocMonad m =>
(Text -> Text)
-> (FilePath, (ByteString, Maybe Text)) -> m (FilePath, Text)
inputToText Text -> Text
convTabs (FilePath
fp, (ByteString
bs,Maybe Text
mt)) =
  (FilePath
fp,) (Text -> (FilePath, Text))
-> (Text -> Text) -> Text -> (FilePath, Text)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
convTabs (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
T.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'\r') (Text -> (FilePath, Text)) -> m Text -> m (FilePath, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
  case Maybe Text
mt Maybe Text -> (Text -> Maybe Text) -> Maybe Text
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Text -> Maybe Text
getCharset of
    Just Text
"UTF-8"      -> FilePath -> ByteString -> m Text
forall (m :: * -> *).
PandocMonad m =>
FilePath -> ByteString -> m Text
toTextM FilePath
fp ByteString
bs
    Just Text
"ISO-8859-1" -> Text -> m Text
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> m Text) -> Text -> m Text
forall a b. (a -> b) -> a -> b
$ FilePath -> Text
T.pack (FilePath -> Text) -> FilePath -> Text
forall a b. (a -> b) -> a -> b
$ ByteString -> FilePath
B8.unpack ByteString
bs
    Just Text
charset      -> PandocError -> m Text
forall a. PandocError -> m a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (PandocError -> m Text) -> PandocError -> m Text
forall a b. (a -> b) -> a -> b
$ Text -> PandocError
PandocUnsupportedCharsetError Text
charset
    Maybe Text
Nothing           -> m Text -> (PandocError -> m Text) -> m Text
forall a. m a -> (PandocError -> m a) -> m a
forall e (m :: * -> *) a.
MonadError e m =>
m a -> (e -> m a) -> m a
catchError
                           (FilePath -> ByteString -> m Text
forall (m :: * -> *).
PandocMonad m =>
FilePath -> ByteString -> m Text
toTextM FilePath
fp ByteString
bs)
                           (\case
                              PandocUTF8DecodingError{} -> do
                                LogMessage -> m ()
forall (m :: * -> *). PandocMonad m => LogMessage -> m ()
report (LogMessage -> m ()) -> LogMessage -> m ()
forall a b. (a -> b) -> a -> b
$ FilePath -> LogMessage
NotUTF8Encoded
                                  (if FilePath -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null FilePath
fp
                                      then FilePath
"input"
                                      else FilePath
fp)
                                Text -> m Text
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> m Text) -> Text -> m Text
forall a b. (a -> b) -> a -> b
$ FilePath -> Text
T.pack (FilePath -> Text) -> FilePath -> Text
forall a b. (a -> b) -> a -> b
$ ByteString -> FilePath
B8.unpack ByteString
bs
                              PandocError
e -> PandocError -> m Text
forall a. PandocError -> m a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError PandocError
e)

inputToLazyByteString :: (FilePath, (BS.ByteString, Maybe MimeType))
                      -> BL.ByteString
inputToLazyByteString :: (FilePath, (ByteString, Maybe Text)) -> ByteString
inputToLazyByteString (FilePath
_, (ByteString
bs,Maybe Text
_)) = ByteString -> ByteString
BL.fromStrict ByteString
bs