{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
module Data.Csv.Parser.Megaparsec
( ConversionError (..)
, decode
, decodeWith
, decodeByName
, decodeByNameWith )
where
import Control.Monad
import Data.ByteString (ByteString)
import Data.Csv hiding
( Parser
, record
, namedRecord
, header
, toNamedRecord
, decode
, decodeWith
, decodeByName
, decodeByNameWith )
import Data.Data
import Data.Vector (Vector)
import Data.Word (Word8)
import Text.Megaparsec
import Text.Megaparsec.Byte
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.Csv as C
import qualified Data.HashMap.Strict as H
import qualified Data.Vector as V
newtype ConversionError = ConversionError String
deriving (Eq, Data, Typeable, Ord, Read, Show)
instance ShowErrorComponent ConversionError where
showErrorComponent (ConversionError msg) =
"conversion error: " ++ msg
type Parser = Parsec ConversionError BL.ByteString
decode :: FromRecord a
=> HasHeader
-> FilePath
-> BL.ByteString
-> Either (ParseErrorBundle BL.ByteString ConversionError) (Vector a)
decode = decodeWith defaultDecodeOptions
{-# INLINE decode #-}
decodeWith :: FromRecord a
=> DecodeOptions
-> HasHeader
-> FilePath
-> BL.ByteString
-> Either (ParseErrorBundle BL.ByteString ConversionError) (Vector a)
decodeWith = decodeWithC csv
{-# INLINE decodeWith #-}
decodeByName :: FromNamedRecord a
=> FilePath
-> BL.ByteString
-> Either (ParseErrorBundle BL.ByteString ConversionError) (Header, Vector a)
decodeByName = decodeByNameWith defaultDecodeOptions
{-# INLINE decodeByName #-}
decodeByNameWith :: FromNamedRecord a
=> DecodeOptions
-> FilePath
-> BL.ByteString
-> Either (ParseErrorBundle BL.ByteString ConversionError) (Header, Vector a)
decodeByNameWith opts = parse (csvWithHeader opts)
{-# INLINE decodeByNameWith #-}
decodeWithC
:: (DecodeOptions -> Parser a)
-> DecodeOptions
-> HasHeader
-> FilePath
-> BL.ByteString
-> Either (ParseErrorBundle BL.ByteString ConversionError) a
decodeWithC p opts@DecodeOptions {..} hasHeader = parse parser
where
parser = case hasHeader of
HasHeader -> header decDelimiter *> p opts
NoHeader -> p opts
{-# INLINE decodeWithC #-}
csv :: FromRecord a
=> DecodeOptions
-> Parser (Vector a)
csv DecodeOptions {..} = do
xs <- sepEndBy1 (record decDelimiter parseRecord) eol
eof
return $! V.fromList xs
csvWithHeader :: FromNamedRecord a
=> DecodeOptions
-> Parser (Header, Vector a)
csvWithHeader DecodeOptions {..} = do
!hdr <- header decDelimiter
let f = parseNamedRecord . toNamedRecord hdr
xs <- sepEndBy1 (record decDelimiter f) eol
eof
return $ let !v = V.fromList xs in (hdr, v)
toNamedRecord :: Header -> Record -> NamedRecord
toNamedRecord hdr v = H.fromList . V.toList $ V.zip hdr v
{-# INLINE toNamedRecord #-}
header :: Word8 -> Parser Header
header del = V.fromList <$!> p <* eol
where
p = sepBy1 (name del) (void $ char del) <?> "file header"
{-# INLINE header #-}
name :: Word8 -> Parser Name
name del = field del <?> "name in header"
{-# INLINE name #-}
record
:: Word8
-> (Record -> C.Parser a)
-> Parser a
record del f = do
notFollowedBy eof
r <- V.fromList <$!> (sepBy1 (field del) (void $ char del) <?> "record")
case C.runParser (f r) of
Left msg -> customFailure (ConversionError msg)
Right x -> return x
{-# INLINE record #-}
field :: Word8 -> Parser Field
field del = label "field" (escapedField <|> unescapedField del)
{-# INLINE field #-}
escapedField :: Parser ByteString
escapedField =
B.pack <$!> between (char 34) (char 34) (many $ normalChar <|> escapedDq)
where
normalChar = anySingleBut 34 <?> "unescaped character"
escapedDq = label "escaped double-quote" (34 <$ string "\"\"")
{-# INLINE escapedField #-}
unescapedField :: Word8 -> Parser ByteString
unescapedField del = BL.toStrict <$> takeWhileP (Just "unescaped character") f
where
f x = x /= del && x /= 34 && x /= 10 && x /= 13
{-# INLINE unescapedField #-}