{-# LANGUAGE BangPatterns, CPP #-}
module Data.Csv.Parser
( DecodeOptions(..)
, defaultDecodeOptions
, csv
, csvWithHeader
, header
, record
, name
, field
) where
import Data.ByteString.Builder (byteString, toLazyByteString, charUtf8)
import Control.Applicative (optional)
import Data.Attoparsec.ByteString.Char8 (char, endOfInput)
import qualified Data.Attoparsec.ByteString as A
import qualified Data.Attoparsec.Lazy as AL
import qualified Data.Attoparsec.Zepto as Z
import qualified Data.ByteString as S
import qualified Data.ByteString.Unsafe as S
import qualified Data.Vector as V
import Data.Word (Word8)
import Data.Csv.Types
import Data.Csv.Util ((<$!>), blankLine, endOfLine, liftM2', cr, newline, doubleQuote, toStrict)
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative ((<$>), (*>), (<*), pure)
import Data.Monoid (mappend, mempty)
#endif
data DecodeOptions = DecodeOptions
{
DecodeOptions -> Word8
decDelimiter :: {-# UNPACK #-} !Word8
} deriving (DecodeOptions -> DecodeOptions -> Bool
(DecodeOptions -> DecodeOptions -> Bool)
-> (DecodeOptions -> DecodeOptions -> Bool) -> Eq DecodeOptions
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DecodeOptions -> DecodeOptions -> Bool
$c/= :: DecodeOptions -> DecodeOptions -> Bool
== :: DecodeOptions -> DecodeOptions -> Bool
$c== :: DecodeOptions -> DecodeOptions -> Bool
Eq, Int -> DecodeOptions -> ShowS
[DecodeOptions] -> ShowS
DecodeOptions -> String
(Int -> DecodeOptions -> ShowS)
-> (DecodeOptions -> String)
-> ([DecodeOptions] -> ShowS)
-> Show DecodeOptions
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [DecodeOptions] -> ShowS
$cshowList :: [DecodeOptions] -> ShowS
show :: DecodeOptions -> String
$cshow :: DecodeOptions -> String
showsPrec :: Int -> DecodeOptions -> ShowS
$cshowsPrec :: Int -> DecodeOptions -> ShowS
Show)
defaultDecodeOptions :: DecodeOptions
defaultDecodeOptions :: DecodeOptions
defaultDecodeOptions = DecodeOptions
{ decDelimiter :: Word8
decDelimiter = Word8
44
}
csv :: DecodeOptions -> AL.Parser Csv
csv :: DecodeOptions -> Parser Csv
csv !DecodeOptions
opts = do
[Record]
vals <- Parser Record -> Parser [Record]
forall a. Parser a -> Parser [a]
sepByEndOfLine1' (Word8 -> Parser Record
record (DecodeOptions -> Word8
decDelimiter DecodeOptions
opts))
Maybe ()
_ <- Parser ByteString () -> Parser ByteString (Maybe ())
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ByteString ()
endOfLine
Parser ByteString ()
forall t. Chunk t => Parser t ()
endOfInput
let nonEmpty :: [Record]
nonEmpty = [Record] -> [Record]
removeBlankLines [Record]
vals
Csv -> Parser Csv
forall (m :: * -> *) a. Monad m => a -> m a
return (Csv -> Parser Csv) -> Csv -> Parser Csv
forall a b. (a -> b) -> a -> b
$! [Record] -> Csv
forall a. [a] -> Vector a
V.fromList [Record]
nonEmpty
{-# INLINE csv #-}
sepByDelim1' :: AL.Parser a
-> Word8
-> AL.Parser [a]
sepByDelim1' :: forall a. Parser a -> Word8 -> Parser [a]
sepByDelim1' Parser a
p !Word8
delim = (a -> [a] -> [a])
-> Parser a -> Parser ByteString [a] -> Parser ByteString [a]
forall (m :: * -> *) a b c.
Monad m =>
(a -> b -> c) -> m a -> m b -> m c
liftM2' (:) Parser a
p Parser ByteString [a]
loop
where
loop :: Parser ByteString [a]
loop = do
Maybe Word8
mb <- Parser (Maybe Word8)
A.peekWord8
case Maybe Word8
mb of
Just Word8
b | Word8
b Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
delim -> (a -> [a] -> [a])
-> Parser a -> Parser ByteString [a] -> Parser ByteString [a]
forall (m :: * -> *) a b c.
Monad m =>
(a -> b -> c) -> m a -> m b -> m c
liftM2' (:) (Parser Word8
A.anyWord8 Parser Word8 -> Parser a -> Parser a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser a
p) Parser ByteString [a]
loop
Maybe Word8
_ -> [a] -> Parser ByteString [a]
forall (f :: * -> *) a. Applicative f => a -> f a
pure []
{-# INLINE sepByDelim1' #-}
sepByEndOfLine1' :: AL.Parser a
-> AL.Parser [a]
sepByEndOfLine1' :: forall a. Parser a -> Parser [a]
sepByEndOfLine1' Parser a
p = (a -> [a] -> [a])
-> Parser a -> Parser ByteString [a] -> Parser ByteString [a]
forall (m :: * -> *) a b c.
Monad m =>
(a -> b -> c) -> m a -> m b -> m c
liftM2' (:) Parser a
p Parser ByteString [a]
loop
where
loop :: Parser ByteString [a]
loop = do
Maybe Word8
mb <- Parser (Maybe Word8)
A.peekWord8
case Maybe Word8
mb of
Just Word8
b | Word8
b Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
cr ->
(a -> [a] -> [a])
-> Parser a -> Parser ByteString [a] -> Parser ByteString [a]
forall (m :: * -> *) a b c.
Monad m =>
(a -> b -> c) -> m a -> m b -> m c
liftM2' (:) (Parser Word8
A.anyWord8 Parser Word8 -> Parser Word8 -> Parser Word8
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Word8 -> Parser Word8
A.word8 Word8
newline Parser Word8 -> Parser a -> Parser a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser a
p) Parser ByteString [a]
loop
| Word8
b Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
newline ->
(a -> [a] -> [a])
-> Parser a -> Parser ByteString [a] -> Parser ByteString [a]
forall (m :: * -> *) a b c.
Monad m =>
(a -> b -> c) -> m a -> m b -> m c
liftM2' (:) (Parser Word8
A.anyWord8 Parser Word8 -> Parser a -> Parser a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser a
p) Parser ByteString [a]
loop
Maybe Word8
_ -> [a] -> Parser ByteString [a]
forall (f :: * -> *) a. Applicative f => a -> f a
pure []
{-# INLINE sepByEndOfLine1' #-}
csvWithHeader :: DecodeOptions -> AL.Parser (Header, V.Vector NamedRecord)
!DecodeOptions
opts = do
!Record
hdr <- Word8 -> Parser Record
header (DecodeOptions -> Word8
decDelimiter DecodeOptions
opts)
[NamedRecord]
vals <- (Record -> NamedRecord) -> [Record] -> [NamedRecord]
forall a b. (a -> b) -> [a] -> [b]
map (Record -> Record -> NamedRecord
toNamedRecord Record
hdr) ([Record] -> [NamedRecord])
-> ([Record] -> [Record]) -> [Record] -> [NamedRecord]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Record] -> [Record]
removeBlankLines ([Record] -> [NamedRecord])
-> Parser [Record] -> Parser ByteString [NamedRecord]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
Parser Record -> Parser [Record]
forall a. Parser a -> Parser [a]
sepByEndOfLine1' (Word8 -> Parser Record
record (DecodeOptions -> Word8
decDelimiter DecodeOptions
opts))
Maybe ()
_ <- Parser ByteString () -> Parser ByteString (Maybe ())
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser ByteString ()
endOfLine
Parser ByteString ()
forall t. Chunk t => Parser t ()
endOfInput
let !v :: Vector NamedRecord
v = [NamedRecord] -> Vector NamedRecord
forall a. [a] -> Vector a
V.fromList [NamedRecord]
vals
(Record, Vector NamedRecord) -> Parser (Record, Vector NamedRecord)
forall (m :: * -> *) a. Monad m => a -> m a
return (Record
hdr, Vector NamedRecord
v)
header :: Word8
-> AL.Parser Header
!Word8
delim = [ByteString] -> Record
forall a. [a] -> Vector a
V.fromList ([ByteString] -> Record)
-> Parser ByteString [ByteString] -> Parser Record
forall (m :: * -> *) a b. Monad m => (a -> b) -> m a -> m b
<$!> Word8 -> Parser ByteString
name Word8
delim Parser ByteString -> Word8 -> Parser ByteString [ByteString]
forall a. Parser a -> Word8 -> Parser [a]
`sepByDelim1'` Word8
delim Parser Record -> Parser ByteString () -> Parser Record
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
endOfLine
name :: Word8 -> AL.Parser Name
name :: Word8 -> Parser ByteString
name !Word8
delim = Word8 -> Parser ByteString
field Word8
delim
removeBlankLines :: [Record] -> [Record]
removeBlankLines :: [Record] -> [Record]
removeBlankLines = (Record -> Bool) -> [Record] -> [Record]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Record -> Bool) -> Record -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Record -> Bool
blankLine)
record :: Word8
-> AL.Parser Record
record :: Word8 -> Parser Record
record !Word8
delim = [ByteString] -> Record
forall a. [a] -> Vector a
V.fromList ([ByteString] -> Record)
-> Parser ByteString [ByteString] -> Parser Record
forall (m :: * -> *) a b. Monad m => (a -> b) -> m a -> m b
<$!> Word8 -> Parser ByteString
field Word8
delim Parser ByteString -> Word8 -> Parser ByteString [ByteString]
forall a. Parser a -> Word8 -> Parser [a]
`sepByDelim1'` Word8
delim
{-# INLINE record #-}
field :: Word8 -> AL.Parser Field
field :: Word8 -> Parser ByteString
field !Word8
delim = do
Maybe Word8
mb <- Parser (Maybe Word8)
A.peekWord8
case Maybe Word8
mb of
Just Word8
b | Word8
b Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
doubleQuote -> Parser ByteString
escapedField
Maybe Word8
_ -> Word8 -> Parser ByteString
unescapedField Word8
delim
{-# INLINE field #-}
escapedField :: AL.Parser S.ByteString
escapedField :: Parser ByteString
escapedField = do
Char
_ <- Parser Char
dquote
ByteString
s <- HasCallStack => ByteString -> ByteString
ByteString -> ByteString
S.init (ByteString -> ByteString)
-> Parser ByteString -> Parser ByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Bool -> (Bool -> Word8 -> Maybe Bool) -> Parser ByteString
forall s. s -> (s -> Word8 -> Maybe s) -> Parser ByteString
A.scan Bool
False ((Bool -> Word8 -> Maybe Bool) -> Parser ByteString)
-> (Bool -> Word8 -> Maybe Bool) -> Parser ByteString
forall a b. (a -> b) -> a -> b
$ \Bool
s Word8
c -> if Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
doubleQuote
then Bool -> Maybe Bool
forall a. a -> Maybe a
Just (Bool -> Bool
not Bool
s)
else if Bool
s then Maybe Bool
forall a. Maybe a
Nothing
else Bool -> Maybe Bool
forall a. a -> Maybe a
Just Bool
False)
if Word8
doubleQuote Word8 -> ByteString -> Bool
`S.elem` ByteString
s
then case Parser ByteString -> ByteString -> Either String ByteString
forall a. Parser a -> ByteString -> Either String a
Z.parse Parser ByteString
unescape ByteString
s of
Right ByteString
r -> ByteString -> Parser ByteString
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
r
Left String
err -> String -> Parser ByteString
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
err
else ByteString -> Parser ByteString
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
s
unescapedField :: Word8 -> AL.Parser S.ByteString
unescapedField :: Word8 -> Parser ByteString
unescapedField !Word8
delim = (Word8 -> Bool) -> Parser ByteString
A.takeWhile (\ Word8
c -> Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
doubleQuote Bool -> Bool -> Bool
&&
Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
newline Bool -> Bool -> Bool
&&
Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
delim Bool -> Bool -> Bool
&&
Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
cr)
dquote :: AL.Parser Char
dquote :: Parser Char
dquote = Char -> Parser Char
char Char
'"'
unescape :: Z.Parser S.ByteString
unescape :: Parser ByteString
unescape = (ByteString -> ByteString
toStrict (ByteString -> ByteString)
-> (Builder -> ByteString) -> Builder -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> ByteString
toLazyByteString) (Builder -> ByteString)
-> ZeptoT Identity Builder -> Parser ByteString
forall (m :: * -> *) a b. Monad m => (a -> b) -> m a -> m b
<$!> Builder -> ZeptoT Identity Builder
forall {m :: * -> *}. Monad m => Builder -> ZeptoT m Builder
go Builder
forall a. Monoid a => a
mempty where
go :: Builder -> ZeptoT m Builder
go Builder
acc = do
ByteString
h <- (Word8 -> Bool) -> ZeptoT m ByteString
forall (m :: * -> *).
Monad m =>
(Word8 -> Bool) -> ZeptoT m ByteString
Z.takeWhile (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
doubleQuote)
let rest :: ZeptoT m Builder
rest = do
ByteString
start <- Int -> ZeptoT m ByteString
forall (m :: * -> *). Monad m => Int -> ZeptoT m ByteString
Z.take Int
2
if (ByteString -> Word8
S.unsafeHead ByteString
start Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
doubleQuote Bool -> Bool -> Bool
&&
ByteString -> Int -> Word8
S.unsafeIndex ByteString
start Int
1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
doubleQuote)
then Builder -> ZeptoT m Builder
go (Builder
acc Builder -> Builder -> Builder
forall a. Monoid a => a -> a -> a
`mappend` ByteString -> Builder
byteString ByteString
h Builder -> Builder -> Builder
forall a. Monoid a => a -> a -> a
`mappend` Char -> Builder
charUtf8 Char
'"')
else String -> ZeptoT m Builder
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"invalid CSV escape sequence"
Bool
done <- ZeptoT m Bool
forall (m :: * -> *). Monad m => ZeptoT m Bool
Z.atEnd
if Bool
done
then Builder -> ZeptoT m Builder
forall (m :: * -> *) a. Monad m => a -> m a
return (Builder
acc Builder -> Builder -> Builder
forall a. Monoid a => a -> a -> a
`mappend` ByteString -> Builder
byteString ByteString
h)
else ZeptoT m Builder
rest