{- |
   Module      : Text.Pandoc.CSV
   Copyright   : Copyright (C) 2017-2024 John MacFarlane <jgm@berkeley.edu>
   License     : GNU GPL, version 2 or above
   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Simple CSV parser.
-}

module Text.Pandoc.CSV (
  CSVOptions(..),
  defaultCSVOptions,
  parseCSV,
  ParseError
) where

import Control.Monad (unless, void, mzero)
import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.Parsing hiding (escaped)

type Parser = Parsec Text ()

data CSVOptions = CSVOptions{
    CSVOptions -> Char
csvDelim     :: Char
  , CSVOptions -> Maybe Char
csvQuote     :: Maybe Char
  , CSVOptions -> Bool
csvKeepSpace :: Bool -- treat whitespace following delim as significant
  , CSVOptions -> Maybe Char
csvEscape    :: Maybe Char -- default is to double up quote
} deriving (ReadPrec [CSVOptions]
ReadPrec CSVOptions
Int -> ReadS CSVOptions
ReadS [CSVOptions]
(Int -> ReadS CSVOptions)
-> ReadS [CSVOptions]
-> ReadPrec CSVOptions
-> ReadPrec [CSVOptions]
-> Read CSVOptions
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
$creadsPrec :: Int -> ReadS CSVOptions
readsPrec :: Int -> ReadS CSVOptions
$creadList :: ReadS [CSVOptions]
readList :: ReadS [CSVOptions]
$creadPrec :: ReadPrec CSVOptions
readPrec :: ReadPrec CSVOptions
$creadListPrec :: ReadPrec [CSVOptions]
readListPrec :: ReadPrec [CSVOptions]
Read, Int -> CSVOptions -> ShowS
[CSVOptions] -> ShowS
CSVOptions -> String
(Int -> CSVOptions -> ShowS)
-> (CSVOptions -> String)
-> ([CSVOptions] -> ShowS)
-> Show CSVOptions
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> CSVOptions -> ShowS
showsPrec :: Int -> CSVOptions -> ShowS
$cshow :: CSVOptions -> String
show :: CSVOptions -> String
$cshowList :: [CSVOptions] -> ShowS
showList :: [CSVOptions] -> ShowS
Show)

defaultCSVOptions :: CSVOptions
defaultCSVOptions :: CSVOptions
defaultCSVOptions = CSVOptions{
    csvDelim :: Char
csvDelim = Char
','
  , csvQuote :: Maybe Char
csvQuote = Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'"'
  , csvKeepSpace :: Bool
csvKeepSpace = Bool
False
  , csvEscape :: Maybe Char
csvEscape = Maybe Char
forall a. Maybe a
Nothing }

parseCSV :: CSVOptions -> Text -> Either ParseError [[Text]]
parseCSV :: CSVOptions -> Text -> Either ParseError [[Text]]
parseCSV CSVOptions
opts Text
t = Parsec Text () [[Text]]
-> String -> Text -> Either ParseError [[Text]]
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse (CSVOptions -> Parsec Text () [[Text]]
pCSV CSVOptions
opts) String
"csv" Text
t

pCSV :: CSVOptions -> Parser [[Text]]
pCSV :: CSVOptions -> Parsec Text () [[Text]]
pCSV CSVOptions
opts =
  (CSVOptions -> Parser [Text]
pCSVRow CSVOptions
opts Parser [Text]
-> ParsecT Text () Identity () -> Parsec Text () [[Text]]
forall s (m :: * -> *) t u a sep.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m sep -> ParsecT s u m [a]
`sepEndBy` ParsecT Text () Identity ()
endline) Parsec Text () [[Text]]
-> ParsecT Text () Identity () -> Parsec Text () [[Text]]
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (ParsecT Text () Identity ()
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m ()
spaces ParsecT Text () Identity ()
-> ParsecT Text () Identity () -> ParsecT Text () Identity ()
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ParsecT Text () Identity ()
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof)

pCSVRow :: CSVOptions -> Parser [Text]
pCSVRow :: CSVOptions -> Parser [Text]
pCSVRow CSVOptions
opts = do
  Text
x <- CSVOptions -> Parser Text
pCSVCell CSVOptions
opts
  [Text]
xs <- (if Text -> Bool
T.null Text
x then Parser Text -> Parser [Text]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 else Parser Text -> Parser [Text]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many) (Parser Text -> Parser [Text]) -> Parser Text -> Parser [Text]
forall a b. (a -> b) -> a -> b
$ CSVOptions -> ParsecT Text () Identity ()
pCSVDelim CSVOptions
opts ParsecT Text () Identity () -> Parser Text -> Parser Text
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> CSVOptions -> Parser Text
pCSVCell CSVOptions
opts
  [Text] -> Parser [Text]
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
xText -> [Text] -> [Text]
forall a. a -> [a] -> [a]
:[Text]
xs)

pCSVCell :: CSVOptions -> Parser Text
pCSVCell :: CSVOptions -> Parser Text
pCSVCell CSVOptions
opts = CSVOptions -> Parser Text
pCSVQuotedCell CSVOptions
opts Parser Text -> Parser Text -> Parser Text
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> CSVOptions -> Parser Text
pCSVUnquotedCell CSVOptions
opts

pCSVQuotedCell :: CSVOptions -> Parser Text
pCSVQuotedCell :: CSVOptions -> Parser Text
pCSVQuotedCell CSVOptions
opts =
  case CSVOptions -> Maybe Char
csvQuote CSVOptions
opts of
    Maybe Char
Nothing -> Parser Text
forall a. ParsecT Text () Identity a
forall (m :: * -> *) a. MonadPlus m => m a
mzero
    Just Char
quotechar -> do
      Char -> ParsecT Text () Identity Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
quotechar
      String
res <- ParsecT Text () Identity Char -> ParsecT Text () Identity String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many ((Char -> Bool) -> ParsecT Text () Identity Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (\Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
quotechar Bool -> Bool -> Bool
&&
                                  Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c Maybe Char -> Maybe Char -> Bool
forall a. Eq a => a -> a -> Bool
/= CSVOptions -> Maybe Char
csvEscape CSVOptions
opts) ParsecT Text () Identity Char
-> ParsecT Text () Identity Char -> ParsecT Text () Identity Char
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> CSVOptions -> ParsecT Text () Identity Char
escaped CSVOptions
opts)
      Char -> ParsecT Text () Identity Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
quotechar
      Text -> Parser Text
forall a. a -> ParsecT Text () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text) -> Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack String
res

escaped :: CSVOptions -> Parser Char
escaped :: CSVOptions -> ParsecT Text () Identity Char
escaped CSVOptions
opts =
  case CSVOptions -> Maybe Char
csvEscape CSVOptions
opts of
    Maybe Char
Nothing ->
      case CSVOptions -> Maybe Char
csvQuote CSVOptions
opts of
        Maybe Char
Nothing -> ParsecT Text () Identity Char
forall a. ParsecT Text () Identity a
forall (m :: * -> *) a. MonadPlus m => m a
mzero
        Just Char
q -> ParsecT Text () Identity Char -> ParsecT Text () Identity Char
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (ParsecT Text () Identity Char -> ParsecT Text () Identity Char)
-> ParsecT Text () Identity Char -> ParsecT Text () Identity Char
forall a b. (a -> b) -> a -> b
$ Char -> ParsecT Text () Identity Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
q ParsecT Text () Identity Char
-> ParsecT Text () Identity Char -> ParsecT Text () Identity Char
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> ParsecT Text () Identity Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
q
    Just Char
c  -> ParsecT Text () Identity Char -> ParsecT Text () Identity Char
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (ParsecT Text () Identity Char -> ParsecT Text () Identity Char)
-> ParsecT Text () Identity Char -> ParsecT Text () Identity Char
forall a b. (a -> b) -> a -> b
$ Char -> ParsecT Text () Identity Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
c ParsecT Text () Identity Char
-> ParsecT Text () Identity Char -> ParsecT Text () Identity Char
forall a b.
ParsecT Text () Identity a
-> ParsecT Text () Identity b -> ParsecT Text () Identity b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> String -> ParsecT Text () Identity Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
String -> ParsecT s u m Char
noneOf String
"\r\n"

pCSVUnquotedCell :: CSVOptions -> Parser Text
pCSVUnquotedCell :: CSVOptions -> Parser Text
pCSVUnquotedCell CSVOptions
opts = String -> Text
T.pack (String -> Text) -> ParsecT Text () Identity String -> Parser Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
  ParsecT Text () Identity Char -> ParsecT Text () Identity String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many ((Char -> Bool) -> ParsecT Text () Identity Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (\Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= CSVOptions -> Char
csvDelim CSVOptions
opts Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\r' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\n'))

pCSVDelim :: CSVOptions -> Parser ()
pCSVDelim :: CSVOptions -> ParsecT Text () Identity ()
pCSVDelim CSVOptions
opts = do
  Char -> ParsecT Text () Identity Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char (CSVOptions -> Char
csvDelim CSVOptions
opts)
  let sp :: ParsecT Text u Identity Char
sp = case CSVOptions -> Char
csvDelim CSVOptions
opts of
              Char
'\t' -> Char -> ParsecT Text u Identity Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
' '
              Char
_    -> String -> ParsecT Text u Identity Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
String -> ParsecT s u m Char
oneOf String
" \t"
  Bool -> ParsecT Text () Identity () -> ParsecT Text () Identity ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (CSVOptions -> Bool
csvKeepSpace CSVOptions
opts) (ParsecT Text () Identity () -> ParsecT Text () Identity ())
-> ParsecT Text () Identity () -> ParsecT Text () Identity ()
forall a b. (a -> b) -> a -> b
$ ParsecT Text () Identity Char -> ParsecT Text () Identity ()
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany ParsecT Text () Identity Char
forall {u}. ParsecT Text u Identity Char
sp

endline :: Parser ()
endline :: ParsecT Text () Identity ()
endline = do
  ParsecT Text () Identity () -> ParsecT Text () Identity ()
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m ()
optional (ParsecT Text () Identity Char -> ParsecT Text () Identity ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (ParsecT Text () Identity Char -> ParsecT Text () Identity ())
-> ParsecT Text () Identity Char -> ParsecT Text () Identity ()
forall a b. (a -> b) -> a -> b
$ Char -> ParsecT Text () Identity Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
'\r')
  ParsecT Text () Identity Char -> ParsecT Text () Identity ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (ParsecT Text () Identity Char -> ParsecT Text () Identity ())
-> ParsecT Text () Identity Char -> ParsecT Text () Identity ()
forall a b. (a -> b) -> a -> b
$ Char -> ParsecT Text () Identity Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
'\n'