Copyright | (c) Richard Warfield |
---|---|
License | BSD 3-clause |
Maintainer | richard@litx.io |
Safe Haskell | None |
Language | Haskell2010 |
Stream CSV data in/out using Cassava. Adapted from streaming-cassava.
For efficiency, operates on streams of strict ByteString chunks
(i.e. IsStream t => t m ByteString)
rather than directly on streams of Word8.
The chunkStream
function is useful for generating an input stream from a
Handle
.
Example usage:
import Streamly import qualified Streamly.Prelude as S import Streamly.Csv (decode, encode, chunkStream) import System.IO import qualified Data.Csv as Csv import qualified Data.ByteString as BS import Data.Vector (Vector) do h <- openFile "testfile.csv" ReadMode let chunks = chunkStream h (64*1024) recs = decode Csv.HasHeader chunks :: SerialT IO (Vector BS.ByteString) withFile "dest.csv" WriteMode $ \ho -> S.mapM_ (BS.hPut ho) $ encode Nothing recs
Synopsis
- decode :: (IsStream t, MonadAsync m, FromRecord a) => HasHeader -> t m ByteString -> t m a
- decodeWith :: (IsStream t, MonadAsync m, FromRecord a) => DecodeOptions -> HasHeader -> t m ByteString -> t m a
- decodeWithErrors :: (IsStream t, Monad m, FromRecord a, MonadThrow m) => DecodeOptions -> HasHeader -> t m ByteString -> t m (Either CsvParseException a)
- newtype CsvParseException = CsvParseException String
- chunkStream :: (IsStream t, MonadAsync m) => Handle -> Int -> t m ByteString
- decodeByName :: (MonadAsync m, FromNamedRecord a) => SerialT m ByteString -> SerialT m a
- decodeByNameWith :: (MonadAsync m, FromNamedRecord a) => DecodeOptions -> SerialT m ByteString -> SerialT m a
- decodeByNameWithErrors :: forall m a. (Monad m, MonadThrow m, FromNamedRecord a) => DecodeOptions -> SerialT m ByteString -> SerialT m (Either CsvParseException a)
- encode :: (IsStream t, ToRecord a, Monad m) => Maybe Header -> t m a -> t m ByteString
- encodeDefault :: forall a t m. (IsStream t, ToRecord a, DefaultOrdered a, Monad m) => t m a -> t m ByteString
- encodeWith :: (IsStream t, ToRecord a, Monad m) => EncodeOptions -> Maybe Header -> t m a -> t m ByteString
- encodeByName :: (IsStream t, ToNamedRecord a, Monad m) => Header -> t m a -> t m ByteString
- encodeByNameDefault :: forall a t m. (IsStream t, DefaultOrdered a, ToNamedRecord a, Monad m) => t m a -> t m ByteString
- encodeByNameWith :: (IsStream t, ToNamedRecord a, Monad m) => EncodeOptions -> Header -> t m a -> t m ByteString
- class FromRecord a where
- parseRecord :: Record -> Parser a
- class FromNamedRecord a where
- parseNamedRecord :: NamedRecord -> Parser a
- class ToRecord a where
- class ToNamedRecord a where
- toNamedRecord :: a -> NamedRecord
- class DefaultOrdered a where
- headerOrder :: a -> Header
- data HasHeader
- type Header = Vector Name
- header :: [ByteString] -> Header
- type Name = ByteString
- data DecodeOptions = DecodeOptions {
- decDelimiter :: !Word8
- defaultDecodeOptions :: DecodeOptions
- data EncodeOptions = EncodeOptions {
- encDelimiter :: !Word8
- encUseCrLf :: !Bool
- encIncludeHeader :: !Bool
- encQuoting :: !Quoting
- defaultEncodeOptions :: EncodeOptions
Decoding
decode :: (IsStream t, MonadAsync m, FromRecord a) => HasHeader -> t m ByteString -> t m a Source #
Use defaultOptions
for decoding the provided CSV.
decodeWith :: (IsStream t, MonadAsync m, FromRecord a) => DecodeOptions -> HasHeader -> t m ByteString -> t m a Source #
Return back a stream of values from the provided CSV, stopping at the first error.
If you wish to instead ignore errors, consider using
decodeWithErrors
with mapMaybe
Any remaining input is discarded.
decodeWithErrors :: (IsStream t, Monad m, FromRecord a, MonadThrow m) => DecodeOptions -> HasHeader -> t m ByteString -> t m (Either CsvParseException a) Source #
Return back a stream with an attempt at type conversion, and either the previous result or any overall parsing errors with the remainder of the input.
newtype CsvParseException Source #
Instances
Eq CsvParseException Source # | |
Defined in Streamly.Csv (==) :: CsvParseException -> CsvParseException -> Bool # (/=) :: CsvParseException -> CsvParseException -> Bool # | |
Show CsvParseException Source # | |
Defined in Streamly.Csv showsPrec :: Int -> CsvParseException -> ShowS # show :: CsvParseException -> String # showList :: [CsvParseException] -> ShowS # | |
IsString CsvParseException Source # | |
Defined in Streamly.Csv fromString :: String -> CsvParseException # | |
Exception CsvParseException Source # | |
Defined in Streamly.Csv |
chunkStream :: (IsStream t, MonadAsync m) => Handle -> Int -> t m ByteString Source #
Named decoding
decodeByName :: (MonadAsync m, FromNamedRecord a) => SerialT m ByteString -> SerialT m a Source #
Use defaultOptions
for decoding the provided CSV.
decodeByNameWith :: (MonadAsync m, FromNamedRecord a) => DecodeOptions -> SerialT m ByteString -> SerialT m a Source #
Return back a stream of values from the provided CSV, stopping at the first error.
A header is required to determine the order of columns, but then discarded.
If you wish to instead ignore errors, consider using
decodeByNameWithErrors
with mapMaybe
Any remaining input is discarded.
decodeByNameWithErrors :: forall m a. (Monad m, MonadThrow m, FromNamedRecord a) => DecodeOptions -> SerialT m ByteString -> SerialT m (Either CsvParseException a) Source #
Return back a stream with an attempt at type conversion, but where the order of columns doesn't have to match the order of fields of your actual type.
This requires/assumes a header in the CSV stream, which is discarded after parsing.
Encoding
encodeDefault :: forall a t m. (IsStream t, ToRecord a, DefaultOrdered a, Monad m) => t m a -> t m ByteString Source #
Encode a stream of values with the default options and a derived header prefixed.
encodeWith :: (IsStream t, ToRecord a, Monad m) => EncodeOptions -> Maybe Header -> t m a -> t m ByteString Source #
Encode a stream of values with the provided options.
Optionally prefix the stream with headers (the header
function
may be useful).
Named encoding
encodeByName :: (IsStream t, ToNamedRecord a, Monad m) => Header -> t m a -> t m ByteString Source #
Select the columns that you wish to encode from your data structure using default options (which currently includes printing the header).
encodeByNameDefault :: forall a t m. (IsStream t, DefaultOrdered a, ToNamedRecord a, Monad m) => t m a -> t m ByteString Source #
Use the default ordering to encode all fields/columns.
encodeByNameWith :: (IsStream t, ToNamedRecord a, Monad m) => EncodeOptions -> Header -> t m a -> t m ByteString Source #
Select the columns that you wish to encode from your data structure.
Header printing respects encIncludeheader
.
Re-exports
class FromRecord a where #
A type that can be converted from a single CSV record, with the possibility of failure.
When writing an instance, use empty
, mzero
, or fail
to make a
conversion fail, e.g. if a Record
has the wrong number of
columns.
Given this example data:
John,56 Jane,55
here's an example type and instance:
data Person = Person { name :: !Text, age :: !Int } instance FromRecord Person where parseRecord v | length v == 2 = Person <$> v .! 0 <*> v .! 1 | otherwise = mzero
Nothing
parseRecord :: Record -> Parser a #
Instances
class FromNamedRecord a where #
A type that can be converted from a single CSV record, with the possibility of failure.
When writing an instance, use empty
, mzero
, or fail
to make a
conversion fail, e.g. if a Record
has the wrong number of
columns.
Given this example data:
name,age John,56 Jane,55
here's an example type and instance:
{-# LANGUAGE OverloadedStrings #-} data Person = Person { name :: !Text, age :: !Int } instance FromNamedRecord Person where parseNamedRecord m = Person <$> m .: "name" <*> m .: "age"
Note the use of the OverloadedStrings
language extension which
enables ByteString
values to be written as string literals.
Nothing
parseNamedRecord :: NamedRecord -> Parser a #
Instances
(FromField a, FromField b, Ord a) => FromNamedRecord (Map a b) | |
Defined in Data.Csv.Conversion parseNamedRecord :: NamedRecord -> Parser (Map a b) # | |
(Eq a, FromField a, FromField b, Hashable a) => FromNamedRecord (HashMap a b) | |
Defined in Data.Csv.Conversion parseNamedRecord :: NamedRecord -> Parser (HashMap a b) # |
A type that can be converted to a single CSV record.
An example type and instance:
data Person = Person { name :: !Text, age :: !Int } instance ToRecord Person where toRecord (Person name age) = record [ toField name, toField age]
Outputs data on this form:
John,56 Jane,55
Nothing
Instances
class ToNamedRecord a where #
A type that can be converted to a single CSV record.
An example type and instance:
data Person = Person { name :: !Text, age :: !Int } instance ToNamedRecord Person where toNamedRecord (Person name age) = namedRecord [ "name" .= name, "age" .= age]
Nothing
toNamedRecord :: a -> NamedRecord #
Convert a value to a named record.
Instances
(ToField a, ToField b, Ord a) => ToNamedRecord (Map a b) | |
Defined in Data.Csv.Conversion toNamedRecord :: Map a b -> NamedRecord # | |
(Eq a, ToField a, ToField b, Hashable a) => ToNamedRecord (HashMap a b) | |
Defined in Data.Csv.Conversion toNamedRecord :: HashMap a b -> NamedRecord # |
class DefaultOrdered a where #
A type that has a default field order when converted to CSV. This
class lets you specify how to get the headers to use for a record
type that's an instance of ToNamedRecord
.
To derive an instance, the type is required to only have one constructor and that constructor must have named fields (also known as selectors) for all fields.
Right: data Foo = Foo { foo :: !Int }
Wrong: data Bar = Bar Int
If you try to derive an instance using GHC generics and your type doesn't have named fields, you will get an error along the lines of:
<interactive>:9:10: No instance for (DefaultOrdered (M1 S NoSelector (K1 R Char) ())) arising from a use of ‘Data.Csv.Conversion.$gdmheader’ In the expression: Data.Csv.Conversion.$gdmheader In an equation for ‘header’: header = Data.Csv.Conversion.$gdmheader In the instance declaration for ‘DefaultOrdered Foo’
Nothing
headerOrder :: a -> Header #
The header order for this record. Should include the names
used in the NamedRecord
returned by ToNamedRecord
. Pass
undefined
as the argument, together with a type annotation
e.g.
.headerOrder
(undefined
:: MyRecord)
Is the CSV data preceded by a header?
The header corresponds to the first line a CSV file. Not all CSV files have a header.
header :: [ByteString] -> Header #
Construct a header from a list of ByteString
s.
type Name = ByteString #
A header has one or more names, describing the data in the column following the name.
data DecodeOptions #
Options that controls how data is decoded. These options can be used to e.g. decode tab-separated data instead of comma-separated data.
To avoid having your program stop compiling when new fields are
added to DecodeOptions
, create option records by overriding
values in defaultDecodeOptions
. Example:
myOptions = defaultDecodeOptions { decDelimiter = fromIntegral (ord '\t') }
DecodeOptions | |
|
Instances
Eq DecodeOptions | |
Defined in Data.Csv.Parser (==) :: DecodeOptions -> DecodeOptions -> Bool # (/=) :: DecodeOptions -> DecodeOptions -> Bool # | |
Show DecodeOptions | |
Defined in Data.Csv.Parser showsPrec :: Int -> DecodeOptions -> ShowS # show :: DecodeOptions -> String # showList :: [DecodeOptions] -> ShowS # |
defaultDecodeOptions :: DecodeOptions #
Decoding options for parsing CSV files.
data EncodeOptions #
Options that controls how data is encoded. These options can be used to e.g. encode data in a tab-separated format instead of in a comma-separated format.
To avoid having your program stop compiling when new fields are
added to EncodeOptions
, create option records by overriding
values in defaultEncodeOptions
. Example:
myOptions = defaultEncodeOptions { encDelimiter = fromIntegral (ord '\t') }
N.B. The encDelimiter
must not be the quote character (i.e.
"
) or one of the record separator characters (i.e. \n
or
\r
).
EncodeOptions | |
|
Instances
Eq EncodeOptions | |
Defined in Data.Csv.Encoding (==) :: EncodeOptions -> EncodeOptions -> Bool # (/=) :: EncodeOptions -> EncodeOptions -> Bool # | |
Show EncodeOptions | |
Defined in Data.Csv.Encoding showsPrec :: Int -> EncodeOptions -> ShowS # show :: EncodeOptions -> String # showList :: [EncodeOptions] -> ShowS # |
defaultEncodeOptions :: EncodeOptions #
Encoding options for CSV files.