Copyright | (C) CSIRO 2017-2018 |
---|---|
License | BSD3 |
Maintainer | George Wilson <george.wilson@data61.csiro.au> |
Stability | experimental |
Portability | non-portable |
Safe Haskell | None |
Language | Haskell2010 |
This module contains data structures, combinators, and primitives for decoding a CSV into a list of your Haskell datatype.
A Decode
can be built using the primitives in this file. Decode
is an Applicative
and an Alt
, allowing for composition
of these values with <*>
and <!>
The primitive Decode
s in this file which use ByteString
expect UTF-8
encoding. The Decode type has an instance of Profunctor
,
so you can lmap
or alterInput
to reencode on the way in.
This module is intended to be imported qualified like so
import qualified Data.Sv.Decode.Core as D
Synopsis
- newtype Decode e s a = Decode {
- unwrapDecode :: Compose (DecodeState s) (Compose (Writer (Last Bool)) (DecodeValidation e)) a
- type Decode' s = Decode s s
- type DecodeValidation e = Validation (DecodeErrors e)
- data DecodeError e
- = UnexpectedEndOfRow
- | ExpectedEndOfRow (Vector e)
- | UnknownCategoricalValue e [[e]]
- | MissingColumn e
- | MissingHeader
- | BadConfig e
- | BadParse e
- | BadDecode e
- newtype DecodeErrors e = DecodeErrors (NonEmpty (DecodeError e))
- decode :: Traversable f => Decode' ByteString a -> f (Vector ByteString) -> DecodeValidation ByteString (f a)
- decodeMay :: DecodeError e -> (s -> Maybe a) -> Decode e s a
- decodeEither :: (s -> Either (DecodeError e) a) -> Decode e s a
- decodeEither' :: (e -> DecodeError e') -> (s -> Either e a) -> Decode e' s a
- mapErrors :: (e -> x) -> Decode e s a -> Decode x s a
- alterInput :: (e -> x) -> (t -> s) -> Decode e s a -> Decode x t a
- column :: Ord s => s -> Decode' s a -> NameDecode' s a
- (.:) :: Ord s => s -> Decode' s a -> NameDecode' s a
- contents :: Decode e s s
- char :: Decode' ByteString Char
- byteString :: Decode' ByteString ByteString
- utf8 :: Decode' ByteString Text
- lazyUtf8 :: Decode' ByteString Text
- lazyByteString :: Decode' ByteString ByteString
- string :: Decode' ByteString String
- int :: Decode' ByteString Int
- integer :: Decode' ByteString Integer
- float :: Decode' ByteString Float
- double :: Decode' ByteString Double
- rational :: Floating a => Decode' ByteString a
- boolean :: (IsString s, Ord s) => Decode' s Bool
- boolean' :: Ord s => (String -> s) -> Decode' s Bool
- ignore :: Decode e s ()
- replace :: a -> Decode e s a
- exactly :: (Semigroup s, Eq s, IsString s) => s -> Decode' s s
- emptyField :: (Eq s, IsString s, Semigroup s) => Decode' s ()
- row :: Decode e s (Vector s)
- choice :: Decode e s a -> Decode e s a -> Decode e s a
- element :: NonEmpty (Decode e s a) -> Decode e s a
- optionalField :: Decode e s a -> Decode e s (Maybe a)
- ignoreFailure :: Decode e s a -> Decode e s (Maybe a)
- orEmpty :: (Eq s, IsString s, Semigroup s) => Decode' s a -> Decode' s (Maybe a)
- either :: Decode e s a -> Decode e s b -> Decode e s (Either a b)
- orElse :: Decode e s a -> a -> Decode e s a
- orElseE :: Decode e s b -> a -> Decode e s (Either a b)
- categorical :: (Ord s, Show a) => [(a, s)] -> Decode' s a
- categorical' :: forall s a. (Ord s, Show a) => [(a, [s])] -> Decode' s a
- (>>==) :: Decode e s a -> (a -> DecodeValidation e b) -> Decode e s b
- (==<<) :: (a -> DecodeValidation e b) -> Decode e s a -> Decode e s b
- bindDecode :: Decode e s a -> (a -> Decode e s b) -> Decode e s b
- decodeRead :: Readable a => Decode' ByteString a
- decodeRead' :: Readable a => ByteString -> Decode' ByteString a
- decodeReadWithMsg :: Readable a => (ByteString -> e) -> Decode e ByteString a
- withTrifecta :: Parser a -> Decode' ByteString a
- withAttoparsec :: Parser a -> Decode' ByteString a
- withParsec :: Parsec ByteString () a -> Decode' ByteString a
- withTextReader :: Reader a -> Decode' Text a
- onError :: Decode e s a -> (DecodeErrors e -> Decode e s a) -> Decode e s a
- decodeError :: DecodeError e -> DecodeValidation e a
- unexpectedEndOfRow :: DecodeValidation e a
- expectedEndOfRow :: Vector e -> DecodeValidation e a
- unknownCategoricalValue :: e -> [[e]] -> DecodeValidation e a
- badParse :: e -> DecodeValidation e a
- badDecode :: e -> DecodeValidation e a
- validateEither :: Either (DecodeError e) a -> DecodeValidation e a
- validateEitherWith :: (e -> DecodeError e') -> Either e a -> DecodeValidation e' a
- validateMaybe :: DecodeError e -> Maybe b -> DecodeValidation e b
- runDecode :: Decode e s a -> Vector s -> Ind -> (DecodeValidation e a, Last Bool, Ind)
- buildDecode :: (Vector s -> Ind -> (DecodeValidation e a, Last Bool, Ind)) -> Decode e s a
- mkDecode :: (s -> DecodeValidation e a) -> Decode e s a
- promote :: Decode' s a -> Vector s -> DecodeValidation s a
- promote' :: (s -> e) -> Decode e s a -> Vector s -> DecodeValidation e a
- runNamed :: NameDecode e s a -> Map s Ind -> DecodeValidation e (Decode e s a)
- anonymous :: Decode e s a -> NameDecode e s a
- makePositional :: Ord s => Vector s -> NameDecode e s a -> DecodeValidation e (Decode e s a)
The types
A
is for decoding some fields from a CSV row into our type Decode
e s aa
.
The second type parameter (s
) is the input string type
(usually ByteString
or Text
).
The first type parameter (e
) is the type of strings which occur in errors.
Under most circumstances you want these type paraters to coincide, but they
don't have to. They are two separate type parameters instead of one so that
Decode
can have a Profunctor
instance.
There are primitive Decode
s, and combinators for composing or
otherwise manipulating them. In particular, Decode
is an
Applicative
functor and an Alt
from the semigroupoids package, also known
as a SemiAlternative
.
Decode
is not a Monad
, but we can perform monad-like operations on
it with >>==
or bindDecode
Decode | |
|
type DecodeValidation e = Validation (DecodeErrors e) Source #
DecodeValidation
is the error-accumulating Applicative
underlying
Decode
data DecodeError e Source #
DecodeError
is a value indicating what went wrong during a parse or
decode. Its constructor indictates the type of error which occured, and
there is usually an associated string with more finely-grained details.
UnexpectedEndOfRow | I was looking for another field, but I am at the end of the row |
ExpectedEndOfRow (Vector e) | I should be at the end of the row, but I found extra fields |
UnknownCategoricalValue e [[e]] | This decoder was built using the |
MissingColumn e | Looked for a column with this name, but could not find it |
MissingHeader | There should have been a header but there was nothing |
BadConfig e | sv is misconfigured |
BadParse e | The parser failed, meaning decoding proper didn't even begin |
BadDecode e | Some other kind of decoding failure occured |
Instances
newtype DecodeErrors e Source #
DecodeErrors
is a Semigroup
full of DecodeError
. It is used as the
error side of a DecodeValidation
. When multiple errors occur, they will
be collected.
DecodeErrors (NonEmpty (DecodeError e)) |
Instances
Running Decodes
decode :: Traversable f => Decode' ByteString a -> f (Vector ByteString) -> DecodeValidation ByteString (f a) Source #
Decodes a sv into a list of its values using the provided Decode
Convenience constructors and functions
decodeEither :: (s -> Either (DecodeError e) a) -> Decode e s a Source #
decodeEither' :: (e -> DecodeError e') -> (s -> Either e a) -> Decode e' s a Source #
mapErrors :: (e -> x) -> Decode e s a -> Decode x s a Source #
Map over the errors of a Decode
To map over the other two parameters, use the Profunctor
instance.
alterInput :: (e -> x) -> (t -> s) -> Decode e s a -> Decode x t a Source #
This transforms a Decode' s a
into a Decode' t a
. It needs
functions in both directions because the errors can include fragments of the
input.
alterInput :: (s -> t) -> (t -> s) -> Decode' s a -> Decode' t a
Primitive Decodes
Name-based
column :: Ord s => s -> Decode' s a -> NameDecode' s a Source #
This is the primitive for building decoders that work with columns
Look for the column with the given name and run the given decoder on it
(.:) :: Ord s => s -> Decode' s a -> NameDecode' s a infixl 5 Source #
Infix alias for column
Mnemonic: Dot colon names Decoders, Equal colon names Encoders.
Field-based
contents :: Decode e s s Source #
Get the contents of a field without doing any decoding. This never fails.
char :: Decode' ByteString Char Source #
Get a field that's a single char. This will fail if there are mulitple characters in the field.
byteString :: Decode' ByteString ByteString Source #
Get the contents of a field as a bytestring.
Alias for contents
utf8 :: Decode' ByteString Text Source #
Get the contents of a UTF-8 encoded field as Text
This will also work for ASCII text, as ASCII is a subset of UTF-8
lazyByteString :: Decode' ByteString ByteString Source #
Get the contents of a field as a lazy ByteString
int :: Decode' ByteString Int Source #
Decode a UTF-8 ByteString
field as an Int
integer :: Decode' ByteString Integer Source #
Decode a UTF-8 ByteString
field as an Integer
float :: Decode' ByteString Float Source #
Decode a UTF-8 ByteString
field as a Float
double :: Decode' ByteString Double Source #
Decode a UTF-8 ByteString
field as a Double
This is much faster than rational
but can be less precise. If you're
not sure which to use, use rational
.
See the documentation for double
for more information.
rational :: Floating a => Decode' ByteString a Source #
Decode a UTF-8 ByteString
as any Floating
type (usually Double
)
This is slower than 'double ' but more precise. If you're
not sure which to use, use rational
.
See the documentation for double
for more information.
boolean :: (IsString s, Ord s) => Decode' s Bool Source #
Decode a field as a Bool
This aims to be tolerant to different forms a boolean might take.
boolean' :: Ord s => (String -> s) -> Decode' s Bool Source #
Decode a field as a Bool
. This version lets you provide the fromString
function that's right for you, since IsString
on a
ByteString
will do the wrong thing in the case of many
encodings such as UTF-16 or UTF-32.
This aims to be tolerant to different forms a boolean might take.
ignore :: Decode e s () Source #
Throw away the contents of a field. This is useful for skipping unneeded fields.
replace :: a -> Decode e s a Source #
Throw away the contents of a field, and return the given value.
exactly :: (Semigroup s, Eq s, IsString s) => s -> Decode' s s Source #
Decode exactly the given string, or else fail.
emptyField :: (Eq s, IsString s, Semigroup s) => Decode' s () Source #
Succeed only when the given field is the empty string.
The empty string surrounded in quotes or spaces is still the empty string.
Row-based
Combinators
optionalField :: Decode e s a -> Decode e s (Maybe a) Source #
Try the given Decode
. If it fails, succeed without consuming anything.
This usually isn't what you want. ignoreFailure
and orEmpty
are more
likely what you are after.
either :: Decode e s a -> Decode e s b -> Decode e s (Either a b) Source #
Try the first, then try the second, and wrap the winner in an Either
.
This is left-biased, meaning if they both succeed, left wins.
orElse :: Decode e s a -> a -> Decode e s a Source #
Try the given decoder, otherwise succeed with the given value.
orElseE :: Decode e s b -> a -> Decode e s (Either a b) Source #
Try the given decoder, or if it fails succeed with the given value, in an Either
.
categorical :: (Ord s, Show a) => [(a, s)] -> Decode' s a Source #
Decode categorical data, given a list of the values and the strings which match them.
Usually this is used with sum types with nullary constructors.
data TrafficLight = Red | Amber | Green categorical [(Red, "red"), (Amber, "amber"), (Green, "green")]
categorical' :: forall s a. (Ord s, Show a) => [(a, [s])] -> Decode' s a Source #
Decode categorical data, given a list of the values and lists of strings which match them.
This version allows for multiple strings to match each value, which is useful for when the categories are inconsistently labelled.
data TrafficLight = Red | Amber | Green categorical' [(Red, ["red", "R"]), (Amber, ["amber", "orange", "A"]), (Green, ["green", "G"])]
For another example of its usage, see the source for boolean
.
(>>==) :: Decode e s a -> (a -> DecodeValidation e b) -> Decode e s b infixl 1 Source #
This can be used to build a Decode
whose value depends on the
result of another Decode
. This is especially useful since Decode
is not
a Monad
.
If you need something like this but with more power, look at bindDecode
bindDecode :: Decode e s a -> (a -> Decode e s b) -> Decode e s b Source #
Bind through a Decode
.
This bind does not agree with the Applicative
instance because it does
not accumulate multiple error values. This is a violation of the Monad
laws, meaning Decode
is not a Monad
.
That is not to say that there is anything wrong with using this function. It can be quite useful.
Building Decodes from Readable
decodeRead :: Readable a => Decode' ByteString a Source #
Use the Readable
instance to try to decode the given value.
decodeRead' :: Readable a => ByteString -> Decode' ByteString a Source #
Use the Readable
instance to try to decode the given value,
or fail with the given error message.
decodeReadWithMsg :: Readable a => (ByteString -> e) -> Decode e ByteString a Source #
Use the Readable
instance to try to decode the given value,
or use the value to build an error message.
Building Decodes from parsers
withTrifecta :: Parser a -> Decode' ByteString a Source #
Build a Decode
from a Trifecta parser
withAttoparsec :: Parser a -> Decode' ByteString a Source #
Build a Decode
from an Attoparsec parser
withParsec :: Parsec ByteString () a -> Decode' ByteString a Source #
Build a Decode
from a Parsec parser
Working with errors
decodeError :: DecodeError e -> DecodeValidation e a Source #
Build a failing DecodeValidation
unexpectedEndOfRow :: DecodeValidation e a Source #
Fail with UnexpectedEndOfRow
expectedEndOfRow :: Vector e -> DecodeValidation e a Source #
Fail with ExpectedEndOfRow
. This takes the rest of the row, so that it
can be displayed to the user.
unknownCategoricalValue :: e -> [[e]] -> DecodeValidation e a Source #
Fail with UnknownCategoricalValue
.
It takes the unknown value and the list of good categorical values.
This mostly exists to be used by the categorical
function.
badParse :: e -> DecodeValidation e a Source #
Fail with BadParse
with the given message. This is for when the parse
step fails, and decoding does not even begin.
badDecode :: e -> DecodeValidation e a Source #
Fail with BadDecode
with the given message. This is something of a
generic error for when decoding a field goes wrong.
validateEither :: Either (DecodeError e) a -> DecodeValidation e a Source #
Build a DecodeValidation
from an Either
validateEitherWith :: (e -> DecodeError e') -> Either e a -> DecodeValidation e' a Source #
Build a DecodeValidation
from an Either
, given a function to build the error.
validateMaybe :: DecodeError e -> Maybe b -> DecodeValidation e b Source #
Build a DecodeValidation
from a Maybe
. You have to supply an error
to use in the Nothing
case
Implementation details
runDecode :: Decode e s a -> Vector s -> Ind -> (DecodeValidation e a, Last Bool, Ind) Source #
Convenience to get the underlying function out of a Decode
in a useful form
buildDecode :: (Vector s -> Ind -> (DecodeValidation e a, Last Bool, Ind)) -> Decode e s a Source #
Convenient constructor for Decode
that handles all the newtype noise for you.
runNamed :: NameDecode e s a -> Map s Ind -> DecodeValidation e (Decode e s a) Source #
Convenience to get the underlying function out of a NameDecode
in a useful form
anonymous :: Decode e s a -> NameDecode e s a Source #
Promote a Decode
to a NameDecode
that doesn't look for any names
makePositional :: Ord s => Vector s -> NameDecode e s a -> DecodeValidation e (Decode e s a) Source #
Given a header and a NameDecode
, resolve header names to positions and
return a Decode