Copyright | (C) CSIRO 2017-2018 |
---|---|
License | BSD3 |
Maintainer | George Wilson <george.wilson@data61.csiro.au> |
Stability | experimental |
Portability | non-portable |
Safe Haskell | None |
Language | Haskell2010 |
This module contains data structures, combinators, and primitives for
decoding an Sv
into a list of your Haskell datatype.
A file can be read with parseDecodeFromFile
. If you already have the text
data in memory, it can be decoded with parseDecode
.
You will need a Decode
for your desired type.
A Decode
can be built using the primitives in this file. Decode
is an Applicative
and an Alt
, allowing for composition
of these values with <*>
and <!>
The primitive Decode
s in this file which use ByteString
expect UTF-8
encoding. The Decode type has an instance of Profunctor
,
so you can lmap
or alterInput
to reencode on the way in.
This module is intended to be imported qualified like so
import qualified Data.Sv.Decode as D
- newtype Decode e s a = Decode {
- unwrapDecode :: Compose (DecodeState s) (DecodeValidation e) a
- type Decode' s = Decode s s
- data Validation err a :: * -> * -> *
- type DecodeValidation e = Validation (DecodeErrors e)
- data DecodeError e
- = UnexpectedEndOfRow
- | ExpectedEndOfRow (Vector (SpacedField e))
- | UnknownCategoricalValue e [[e]]
- | BadParse e
- | BadDecode e
- newtype DecodeErrors e = DecodeErrors (NonEmpty (DecodeError e))
- decode :: Decode' s a -> Sv s -> DecodeValidation s [a]
- parseDecode :: Decode' ByteString a -> ParseOptions ByteString -> ByteString -> DecodeValidation ByteString [a]
- parseDecode' :: SvParser s -> Decode' s a -> ParseOptions s -> s -> DecodeValidation s [a]
- parseDecodeFromFile :: MonadIO m => Decode' ByteString a -> ParseOptions ByteString -> FilePath -> m (DecodeValidation ByteString [a])
- parseDecodeFromFile' :: MonadIO m => SvParser s -> Decode' s a -> ParseOptions s -> FilePath -> m (DecodeValidation s [a])
- decodeMay :: DecodeError e -> (s -> Maybe a) -> Decode e s a
- decodeEither :: (s -> Either (DecodeError e) a) -> Decode e s a
- decodeEither' :: (e -> DecodeError e') -> (s -> Either e a) -> Decode e' s a
- mapErrors :: (e -> x) -> Decode e s a -> Decode x s a
- alterInput :: (e -> x) -> (t -> s) -> Decode e s a -> Decode x t a
- contents :: Decode e s s
- untrimmed :: Monoid s => (HorizontalSpace -> s) -> Decode e s s
- raw :: Decode e s (SpacedField s)
- char :: Decode' ByteString Char
- byteString :: Decode' ByteString ByteString
- utf8 :: Decode' ByteString Text
- lazyUtf8 :: Decode' ByteString Text
- lazyByteString :: Decode' ByteString ByteString
- string :: Decode' ByteString String
- int :: Decode' ByteString Int
- integer :: Decode' ByteString Integer
- float :: Decode' ByteString Float
- double :: Decode' ByteString Double
- boolean :: (IsString s, Ord s) => Decode' s Bool
- boolean' :: Ord s => (String -> s) -> Decode' s Bool
- ignore :: Decode e s ()
- replace :: a -> Decode e s a
- exactly :: (Semigroup s, Eq s, IsString s) => s -> Decode' s s
- emptyField :: (Eq s, IsString s, Semigroup s) => Decode' s ()
- row :: Decode e s (Vector s)
- rowWithSpacing :: Decode e s (Vector (SpacedField s))
- choice :: Decode e s a -> Decode e s a -> Decode e s a
- element :: NonEmpty (Decode e s a) -> Decode e s a
- optionalField :: Decode e s a -> Decode e s (Maybe a)
- ignoreFailure :: Decode e s a -> Decode e s (Maybe a)
- orEmpty :: (Eq s, IsString s, Semigroup s) => Decode' s a -> Decode' s (Maybe a)
- either :: Decode e s a -> Decode e s b -> Decode e s (Either a b)
- orElse :: Decode e s a -> a -> Decode e s a
- orElseE :: Decode e s b -> a -> Decode e s (Either a b)
- categorical :: (Ord s, Show a) => [(a, s)] -> Decode' s a
- categorical' :: forall s a. (Ord s, Show a) => [(a, [s])] -> Decode' s a
- (>>==) :: Decode e s a -> (a -> DecodeValidation e b) -> Decode e s b
- (==<<) :: (a -> DecodeValidation e b) -> Decode e s a -> Decode e s b
- bindDecode :: Decode e s a -> (a -> Decode e s b) -> Decode e s b
- decodeRead :: Readable a => Decode' ByteString a
- decodeRead' :: Readable a => ByteString -> Decode' ByteString a
- decodeReadWithMsg :: Readable a => (ByteString -> e) -> Decode e ByteString a
- withTrifecta :: Parser a -> Decode' ByteString a
- withAttoparsec :: Parser a -> Decode' ByteString a
- withParsec :: Parsec ByteString () a -> Decode' ByteString a
- onError :: Decode e s a -> (DecodeErrors e -> Decode e s a) -> Decode e s a
- decodeError :: DecodeError e -> DecodeValidation e a
- unexpectedEndOfRow :: DecodeValidation e a
- expectedEndOfRow :: Vector (SpacedField e) -> DecodeValidation e a
- unknownCategoricalValue :: e -> [[e]] -> DecodeValidation e a
- badParse :: e -> DecodeValidation e a
- badDecode :: e -> DecodeValidation e a
- validateEither :: Either (DecodeError e) a -> DecodeValidation e a
- validateEither' :: (e -> DecodeError e') -> Either e a -> DecodeValidation e' a
- validateMaybe :: DecodeError e -> Maybe b -> DecodeValidation e b
- validateMaybe' :: (a -> Maybe b) -> DecodeError e -> a -> DecodeValidation e b
- runDecode :: Decode e s a -> Vector (SpacedField s) -> Ind -> (DecodeValidation e a, Ind)
- buildDecode :: (Vector (SpacedField s) -> Ind -> (DecodeValidation e a, Ind)) -> Decode e s a
- mkDecode :: (s -> DecodeValidation e a) -> Decode e s a
- mkDecodeWithQuotes :: (Field s -> DecodeValidation e a) -> Decode e s a
- mkDecodeWithSpaces :: (SpacedField s -> DecodeValidation e a) -> Decode e s a
- promote :: Decode' s a -> Record s -> DecodeValidation s a
The types
A 'Decode e s a' is for decoding some fields from a CSV row into our type a
.
The second type parameter (s
) is the input string type
(usually ByteString
or Text
).
The first type parameter (e
) is the type of strings which occur in errors.
Under most circumstances you want these type paraters to coincide, but they
don't have to. They are two separate type parameters instead of one so that
Decode
can have a Profunctor
instance.
There are primitive Decode
s, and combinators for composing or
otherwise manipulating them. In particular, Decode
is an
Applicative
functor and an Alt
from the semigroupoids package.
Decode
is not a Monad
, but we can perform monad-like operations on
it with >>==
and bindDecode
Decode | |
|
data Validation err a :: * -> * -> * #
An Validation
is either a value of the type err
or a
, similar to Either
. However,
the Applicative
instance for Validation
accumulates errors using a Semigroup
on err
.
In contrast, the Applicative
for Either
returns only the first error.
A consequence of this is that Validation
has no Bind
or Monad
instance. This is because
such an instance would violate the law that a Monad's ap
must equal the
Applicative
's <*>
An example of typical usage can be found here.
Bitraversable Validation | |
Bifoldable Validation | |
Bifunctor Validation | |
Swapped Validation | |
Validate Validation | |
Functor (Validation err) | |
Semigroup err => Applicative (Validation err) | |
Foldable (Validation err) | |
Traversable (Validation err) | |
Semigroup err => Apply (Validation err) | |
Alt (Validation err) | |
(Eq a, Eq err) => Eq (Validation err a) | |
(Data a, Data err) => Data (Validation err a) | |
(Ord a, Ord err) => Ord (Validation err a) | |
(Show a, Show err) => Show (Validation err a) | |
Generic (Validation err a) | |
Semigroup e => Semigroup (Validation e a) | |
Monoid e => Monoid (Validation e a) | |
(NFData e, NFData a) => NFData (Validation e a) | |
type Rep (Validation err a) | |
type DecodeValidation e = Validation (DecodeErrors e) Source #
DecodeValidation
is the error-accumulating Applicative
underlying
Decode
data DecodeError e Source #
DecodeError
is a value indicating what went wrong during a parse or
decode. Its constructor indictates the type of error which occured, and
there is usually an associated string with more finely-grained details.
UnexpectedEndOfRow | I was looking for another field, but I am at the end of the row |
ExpectedEndOfRow (Vector (SpacedField e)) | I should be at the end of the row, but I found extra fields |
UnknownCategoricalValue e [[e]] | This decoder was built using the |
BadParse e | The parser failed, meaning decoding proper didn't even begin |
BadDecode e | Some other kind of decoding failure occured |
Functor DecodeError Source # | |
Eq e => Eq (DecodeError e) Source # | |
Ord e => Ord (DecodeError e) Source # | |
Show e => Show (DecodeError e) Source # | |
Generic (DecodeError e) Source # | |
NFData e => NFData (DecodeError e) Source # | |
type Rep (DecodeError e) Source # | |
newtype DecodeErrors e Source #
DecodeErrors
is a Semigroup
full of DecodeError
. It is used as the
error side of a DecodeValidation
. When multiple errors occur, they will
be collected.
DecodeErrors (NonEmpty (DecodeError e)) |
Functor DecodeErrors Source # | |
Eq e => Eq (DecodeErrors e) Source # | |
Ord e => Ord (DecodeErrors e) Source # | |
Show e => Show (DecodeErrors e) Source # | |
Generic (DecodeErrors e) Source # | |
Semigroup (DecodeErrors e) Source # | |
NFData e => NFData (DecodeErrors e) Source # | |
type Rep (DecodeErrors e) Source # | |
Running Decodes
decode :: Decode' s a -> Sv s -> DecodeValidation s [a] Source #
Decodes a sv into a list of its values using the provided Decode
parseDecode :: Decode' ByteString a -> ParseOptions ByteString -> ByteString -> DecodeValidation ByteString [a] Source #
Parse a ByteString
as an Sv, and then decode it with the given decoder.
This version uses Trifecta
to parse the ByteString
, which is assumed to
be UTF-8 encoded. If you want a different library, use parseDecode'
.
parseDecode' :: SvParser s -> Decode' s a -> ParseOptions s -> s -> DecodeValidation s [a] Source #
Parse text as an Sv, and then decode it with the given decoder.
This version lets you choose which parsing library to use by providing an
SvParser
. Common selections are trifecta
and attoparsecByteString
.
parseDecodeFromFile :: MonadIO m => Decode' ByteString a -> ParseOptions ByteString -> FilePath -> m (DecodeValidation ByteString [a]) Source #
Load a file, parse it, and decode it.
This version uses Trifecta to parse the file, which is assumed to be UTF-8 encoded.
parseDecodeFromFile' :: MonadIO m => SvParser s -> Decode' s a -> ParseOptions s -> FilePath -> m (DecodeValidation s [a]) Source #
Load a file, parse it, and decode it.
This version lets you choose which parsing library to use by providing an
SvParser
. Common selections are trifecta
and attoparsecByteString
.
Convenience constructors and functions
decodeEither :: (s -> Either (DecodeError e) a) -> Decode e s a Source #
decodeEither' :: (e -> DecodeError e') -> (s -> Either e a) -> Decode e' s a Source #
mapErrors :: (e -> x) -> Decode e s a -> Decode x s a Source #
Map over the errors of a Decode
To map over the other two parameters, use the Profunctor
instance.
alterInput :: (e -> x) -> (t -> s) -> Decode e s a -> Decode x t a Source #
This transforms a Decode' s a
into a Decode' t a
. It needs
functions in both directions because the errors can include fragments of the
input.
alterInput :: (s -> t) -> (t -> s) -> Decode' s a -> Decode' t a
Primitive Decodes
Field-based
contents :: Decode e s s Source #
Get the contents of a field without doing any decoding. This never fails.
untrimmed :: Monoid s => (HorizontalSpace -> s) -> Decode e s s Source #
Returns the field contents. This keeps the spacing around an unquoted field.
raw :: Decode e s (SpacedField s) Source #
Succeeds with the whole field structure, including spacing and quoting information
char :: Decode' ByteString Char Source #
Get a field that's a single char. This will fail if there are mulitple characters in the field.
byteString :: Decode' ByteString ByteString Source #
Get the contents of a field as a bytestring.
Alias for contents
utf8 :: Decode' ByteString Text Source #
Get the contents of a UTF-8 encoded field as Text
This will also work for ASCII text, as ASCII is a subset of UTF-8
lazyByteString :: Decode' ByteString ByteString Source #
Get the contents of a field as a lazy ByteString
int :: Decode' ByteString Int Source #
Decode a UTF-8 ByteString
field as an Int
integer :: Decode' ByteString Integer Source #
Decode a UTF-8 ByteString
field as an Integer
float :: Decode' ByteString Float Source #
Decode a UTF-8 ByteString
field as a Float
double :: Decode' ByteString Double Source #
Decode a UTF-8 ByteString
field as a Double
boolean :: (IsString s, Ord s) => Decode' s Bool Source #
Decode a field as a Bool
This aims to be tolerant to different forms a boolean might take.
boolean' :: Ord s => (String -> s) -> Decode' s Bool Source #
Decode a field as a Bool
. This version lets you provide the fromString
function that's right for you, since IsString
on a
ByteString
will do the wrong thing in the case of many
encodings such as UTF-16 or UTF-32.
This aims to be tolerant to different forms a boolean might take.
ignore :: Decode e s () Source #
Throw away the contents of a field. This is useful for skipping unneeded fields.
replace :: a -> Decode e s a Source #
Throw away the contents of a field, and return the given value.
exactly :: (Semigroup s, Eq s, IsString s) => s -> Decode' s s Source #
Decode exactly the given string, or else fail.
emptyField :: (Eq s, IsString s, Semigroup s) => Decode' s () Source #
Succeed only when the given field is the empty string.
The empty string surrounded in quotes or spaces is still the empty string.
Row-based
rowWithSpacing :: Decode e s (Vector (SpacedField s)) Source #
Grab the whole row, including all spacing and quoting information,
as a Vector
Combinators
optionalField :: Decode e s a -> Decode e s (Maybe a) Source #
Try the given Decode
. If it fails, succeed without consuming anything.
This usually isn't what you want. ignoreFailure
and orEmpty
are more
likely what you are after.
either :: Decode e s a -> Decode e s b -> Decode e s (Either a b) Source #
Try the first, then try the second, and wrap the winner in an Either
.
This is left-biased, meaning if they both succeed, left wins.
orElse :: Decode e s a -> a -> Decode e s a Source #
Try the given decoder, otherwise succeed with the given value.
orElseE :: Decode e s b -> a -> Decode e s (Either a b) Source #
Try the given decoder, or if it fails succeed with the given value, in an Either
.
categorical :: (Ord s, Show a) => [(a, s)] -> Decode' s a Source #
Decode categorical data, given a list of the values and the strings which match them.
Usually this is used with sum types with nullary constructors.
data TrafficLight = Red | Amber | Green categorical [(Red, "red"), (Amber, "amber"), (Green, "green")]
categorical' :: forall s a. (Ord s, Show a) => [(a, [s])] -> Decode' s a Source #
Decode categorical data, given a list of the values and lists of strings which match them.
This version allows for multiple strings to match each value, which is useful for when the categories are inconsistently labelled.
data TrafficLight = Red | Amber | Green categorical' [(Red, ["red", "R"]), (Amber, ["amber", "orange", "A"]), (Green, ["green", "G"])]
For another example of its usage, see the source for boolean
.
(>>==) :: Decode e s a -> (a -> DecodeValidation e b) -> Decode e s b infixl 1 Source #
This can be used to build a Decode
whose value depends on the
result of another Decode
. This is especially useful since Decode
is not
a Monad
.
If you need something like this but with more power, look at bindDecode
bindDecode :: Decode e s a -> (a -> Decode e s b) -> Decode e s b Source #
Bind through a Decode
.
This bind does not agree with the Applicative
instance because it does
not accumulate multiple error values. This is a violation of the Monad
laws, meaning Decode
is not a Monad
.
That is not to say that there is anything wrong with using this function. It can be quite useful.
Building Decodes from Readable
decodeRead :: Readable a => Decode' ByteString a Source #
Use the Readable
instance to try to decode the given value.
decodeRead' :: Readable a => ByteString -> Decode' ByteString a Source #
Use the Readable
instance to try to decode the given value,
or fail with the given error message.
decodeReadWithMsg :: Readable a => (ByteString -> e) -> Decode e ByteString a Source #
Use the Readable
instance to try to decode the given value,
or use the value to build an error message.
Building Decodes from parsers
withTrifecta :: Parser a -> Decode' ByteString a Source #
Build a Decode
from a Trifecta parser
withAttoparsec :: Parser a -> Decode' ByteString a Source #
Build a Decode
from an Attoparsec parser
withParsec :: Parsec ByteString () a -> Decode' ByteString a Source #
Build a Decode
from a Parsec parser
Working with errors
decodeError :: DecodeError e -> DecodeValidation e a Source #
Build a failing DecodeValidation
unexpectedEndOfRow :: DecodeValidation e a Source #
Fail with UnexpectedEndOfRow
expectedEndOfRow :: Vector (SpacedField e) -> DecodeValidation e a Source #
Fail with ExpectedEndOfRow
. This takes the rest of the row, so that it
can be displayed to the user.
unknownCategoricalValue :: e -> [[e]] -> DecodeValidation e a Source #
Fail with UnknownCategoricalValue
.
It takes the unknown value and the list of good categorical values.
This mostly exists to be used by the categorical
function.
badParse :: e -> DecodeValidation e a Source #
Fail with BadParse
with the given message. This is for when the parse
step fails, and decoding does not even begin.
badDecode :: e -> DecodeValidation e a Source #
Fail with BadDecode
with the given message. This is something of a
generic error for when decoding a field goes wrong.
validateEither :: Either (DecodeError e) a -> DecodeValidation e a Source #
Build a DecodeValidation
from an Either
validateEither' :: (e -> DecodeError e') -> Either e a -> DecodeValidation e' a Source #
Build a DecodeValidation
from an Either
, given a function to build the error.
validateMaybe :: DecodeError e -> Maybe b -> DecodeValidation e b Source #
Build a DecodeValidation
from a Maybe
. You have to supply an error
to use in the Nothing
case
validateMaybe' :: (a -> Maybe b) -> DecodeError e -> a -> DecodeValidation e b Source #
Build a DecodeValidation
from a function that returns a Maybe
You have to supply an error to use in the Nothing
case
Implementation details
runDecode :: Decode e s a -> Vector (SpacedField s) -> Ind -> (DecodeValidation e a, Ind) Source #
Convenience to get the underlying function out of a Decode in a useful form
buildDecode :: (Vector (SpacedField s) -> Ind -> (DecodeValidation e a, Ind)) -> Decode e s a Source #
Convenient constructor for Decode
that handles all the newtype noise for you.
mkDecode :: (s -> DecodeValidation e a) -> Decode e s a Source #
Build a Decode
from a function.
This version gives you just the contents of the field, with no information about the spacing or quoting around that field.
mkDecodeWithQuotes :: (Field s -> DecodeValidation e a) -> Decode e s a Source #
mkDecodeWithSpaces :: (SpacedField s -> DecodeValidation e a) -> Decode e s a Source #
Build a Decode
from a function.
This version gives you access to the whole SpacedField
, which includes
information about spacing both before and after the field, and about quotes
if they were used.