Copyright | (c) Marco Zocca (2018-2020) |
---|---|
License | BSD-style |
Maintainer | ocramz fripost org |
Stability | experimental |
Portability | GHC |
Safe Haskell | None |
Language | Haskell2010 |
Synopsis
- data Frame row
- encode :: (Foldable t, Heidi a) => t a -> Frame (Row [TC] VP)
- class Heidi a
- data TC
- data VP
- frameFromList :: [row] -> Frame row
- head :: Frame row -> row
- take :: Int -> Frame row -> Frame row
- drop :: Int -> Frame row -> Frame row
- numRows :: Frame row -> Int
- filter :: (row -> Bool) -> Frame row -> Frame row
- filterA :: Applicative f => (row -> f Bool) -> Frame row -> f (Frame row)
- groupWith :: (row -> row -> Bool) -> Frame row -> [Frame row]
- zipWith :: (a -> b -> c) -> Frame a -> Frame b -> Frame c
- scanl :: (b -> a -> b) -> b -> Frame a -> Frame b
- scanr :: (a -> b -> b) -> b -> Frame a -> Frame b
- spreadWith :: (TrieKey k, Foldable t, Ord k, Ord v) => (v -> k) -> k -> k -> t (Row k v) -> Frame (Row k v)
- gatherWith :: (Foldable t, Ord k, TrieKey k) => (k -> v) -> Set k -> k -> k -> t (Row k v) -> Frame (Row k v)
- groupBy :: (Foldable t, TrieKey k, Eq k, Ord v) => k -> t (Row k v) -> Map v (Frame (Row k v))
- innerJoin :: (Foldable t, Ord v, TrieKey k, Eq v, Eq k) => k -> k -> t (Row k v) -> t (Row k v) -> Frame (Row k v)
- leftOuterJoin :: (Foldable t, Ord v, TrieKey k, Eq v, Eq k) => k -> k -> t (Row k v) -> t (Row k v) -> Frame (Row k v)
- toVector :: Frame row -> Vector row
- fromVector :: Vector row -> Frame row
- data Row k v
- rowFromList :: TrieKey k => [(k, v)] -> Row k v
- toList :: TrieKey k => Row k v -> [(k, v)]
- keys :: TrieKey k => Row k v -> [k]
- delete :: TrieKey k => k -> Row k v -> Row k v
- filterWithKey :: TrieKey k => (k -> v -> Bool) -> Row k v -> Row k v
- filterWithKeyPrefix :: (TrieKey a, Eq a) => [a] -> Row [a] v -> Row [a] v
- filterWithKeyAny :: (TrieKey a, Eq a) => a -> Row [a] v -> Row [a] v
- deleteMany :: (TrieKey k, Foldable t) => t k -> Row k v -> Row k v
- partitionWithKey :: TrieKey k => (k -> v -> Bool) -> Row k v -> (Row k v, Row k v)
- partitionWithKeyPrefix :: (TrieKey a, Eq a) => [a] -> Row [a] v -> (Row [a] v, Row [a] v)
- lookup :: TrieKey k => k -> Row k v -> Maybe v
- (!:) :: TrieKey k => k -> (a -> Bool) -> Row k a -> Bool
- elemSatisfies :: TrieKey k => (a -> Bool) -> k -> Row k a -> Bool
- maybeEmpty :: TrieKey k => Maybe (Row k v) -> Row k v
- eqByLookup :: (TrieKey k, Eq k, Eq a) => k -> Row k a -> Row k a -> Maybe Bool
- eqByLookups :: (Foldable t, TrieKey k, Eq k, Eq a) => t k -> Row k a -> Row k a -> Maybe Bool
- compareByLookup :: (TrieKey k, Eq k, Ord a) => k -> Row k a -> Row k a -> Maybe Ordering
- union :: TrieKey k => Row k v -> Row k v -> Row k v
- unionWith :: TrieKey k => (v -> v -> v) -> Row k v -> Row k v -> Row k v
- intersection :: TrieKey k => Row k v -> Row k b -> Row k v
- intersectionWith :: TrieKey k => (a -> b -> v) -> Row k a -> Row k b -> Row k v
- mapWithKey :: TrieKey k => (k -> a -> b) -> Row k a -> Row k b
- foldWithKey :: TrieKey k => (k -> a -> r -> r) -> r -> Row k a -> r
- keysOnly :: (TrieKey k, Foldable f) => f (Row k v) -> Row k ()
- traverseWithKey :: (Applicative f, TrieKey k) => (k -> a -> f b) -> Row k a -> f (Row k b)
- int :: TrieKey k => k -> Traversal' (Row k VP) Int
- bool :: TrieKey k => k -> Traversal' (Row k VP) Bool
- float :: TrieKey k => k -> Traversal' (Row k VP) Float
- double :: TrieKey k => k -> Traversal' (Row k VP) Double
- char :: TrieKey k => k -> Traversal' (Row k VP) Char
- string :: TrieKey k => k -> Traversal' (Row k VP) String
- text :: TrieKey k => k -> Traversal' (Row k VP) Text
- scientific :: TrieKey k => k -> Traversal' (Row k VP) Scientific
- oneHot :: TrieKey k => k -> Traversal' (Row k VP) (OneHot Int)
- at :: TrieKey k => k -> Lens' (Row k a) (Maybe a)
- keep :: Getting Any row a -> (a -> b) -> row -> Bool
- atPrefix :: (TrieKey k, Eq k) => [k] -> Lens' (Row [k] v) [v]
- eachPrefixed :: (TrieKey k, Eq k) => [k] -> Traversal' (Row [k] v) v
- foldPrefixed :: (TrieKey k, Eq k, Monoid r) => [k] -> Getting r (Row [k] v) v
- tcTyN :: TC -> String
- tcTyCon :: TC -> String
- mkTyN :: String -> TC
- mkTyCon :: String -> TC
Frame
A Frame
is a list of rows.
Instances
Functor Frame Source # | |
Foldable Frame Source # | |
Defined in Core.Data.Frame.List fold :: Monoid m => Frame m -> m # foldMap :: Monoid m => (a -> m) -> Frame a -> m # foldr :: (a -> b -> b) -> b -> Frame a -> b # foldr' :: (a -> b -> b) -> b -> Frame a -> b # foldl :: (b -> a -> b) -> b -> Frame a -> b # foldl' :: (b -> a -> b) -> b -> Frame a -> b # foldr1 :: (a -> a -> a) -> Frame a -> a # foldl1 :: (a -> a -> a) -> Frame a -> a # elem :: Eq a => a -> Frame a -> Bool # maximum :: Ord a => Frame a -> a # minimum :: Ord a => Frame a -> a # | |
Traversable Frame Source # | |
Show row => Show (Frame row) Source # | |
Construction
Encoding
encode :: (Foldable t, Heidi a) => t a -> Frame (Row [TC] VP) Source #
Populate a Frame
with the generic encoding of the row data
For example, a list of records having two fields each will produce a dataframe with two columns, having the record field names as column labels.
data P1 = P1 Int Char deriving (Eq, Show,Generic
) instanceHeidi
P1 data P2 = P2 { p2i :: Int, p2c :: Char } deriving (Eq, Show, Generic) instance Heidi P2 data Q = Q (Maybe Int) (Either Double Char) deriving (Eq, Show, Generic) instance Heidi Q
>>>
encode [P1 42 'z']
Frame {tableRows = [([TC "P1" "_0"],VPInt 42),([TC "P1" "_1"],VPChar 'z')] :| []}
>>>
encode [P2 42 'z']
Frame {tableRows = [([TC "P2" "p2c"],VPChar 'z'),([TC "P2" "p2i"],VPInt 42)] :| []}
Test using Maybe
and Either
record fields :
>>>
encode [Q (Just 42) (Left 1.2), Q Nothing (Right 'b')]
Frame {tableRows = [([TC "Q" "_0",TC "Maybe" "Just"],VPInt 42),([TC "Q" "_1",TC "Either" "Left"],VPDouble 1.2)] :| [[([TC "Q" "_1",TC "Either" "Right"],VPChar 'b')]]}
NB: as the last example above demonstrates, Nothing
values are not inserted in the rows, which can be used to encode missing data features.
Typeclass for types which have a generic encoding.
NOTE: if your type has a Generic
instance you just need to declare an empty instance of Heidi
for it.
example:
data A = A Int Char deriving (Generic
) instanceHeidi
A
Instances
A (type, constructor) name pair
Instances
Primitive types
NB : this is just a convenience for unityping the dataframe contents, but it should not be exposed to the library users
Direct
frameFromList :: [row] -> Frame row Source #
Access
Filtering
filterA :: Applicative f => (row -> f Bool) -> Frame row -> f (Frame row) Source #
This generalizes the list-based filter
function.
Grouping
groupWith :: (row -> row -> Bool) -> Frame row -> [Frame row] Source #
groupWith
takes row comparison function and a list and returns a list of lists such that the concatenation of the result is equal to the argument. Moreover, each sublist in the result contains only elements that satisfy the comparison.
Zipping
Scans
Data tidying
:: (TrieKey k, Foldable t, Ord k, Ord v) | |
=> (v -> k) | |
-> k | "key" key |
-> k | "value" key |
-> t (Row k v) | input dataframe |
-> Frame (Row k v) |
spreadWith
moves the unique values of a key column into the column names, spreading the values of a value column across the new columns.
:: (Foldable t, Ord k, TrieKey k) | |
=> (k -> v) | |
-> Set k | set of keys to gather |
-> k | "key" key |
-> k | "value" key |
-> t (Row k v) | input dataframe |
-> Frame (Row k v) |
gatherWith
moves column names into a "key" column, gathering the column values into a single "value" column
Relational operations
:: (Foldable t, TrieKey k, Eq k, Ord v) | |
=> k | Key to group by |
-> t (Row k v) | A 'Frame (GTR.Row k v) can be used here |
-> Map v (Frame (Row k v)) |
GROUP BY : given a key and a table that uses it, split the table in multiple tables, one per value taken by the key.
>>>
numRows <$> (HM.lookup "129" $ groupBy "id.0" t0)
Just 2
:: (Foldable t, Ord v, TrieKey k, Eq v, Eq k) | |
=> k | Key into the first table |
-> k | Key into the second table |
-> t (Row k v) | First dataframe |
-> t (Row k v) | Second dataframe |
-> Frame (Row k v) |
INNER JOIN : given two dataframes and one key from each, compute the inner join using the keys as relations.
>>>
head t0
[("id.0","129"),("qty","1"),("item","book")]
>>>
head t1
[("id.1","129"),("price","100")]
>>>
head $ innerJoin "id.0" "id.1" t0 t1
[("id.1","129"),("id.0","129"),("qty","5"),("item","book"),("price","100")]
leftOuterJoin :: (Foldable t, Ord v, TrieKey k, Eq v, Eq k) => k -> k -> t (Row k v) -> t (Row k v) -> Frame (Row k v) Source #
LEFT (OUTER) JOIN : given two dataframes and one key from each, compute the left outer join using the keys as relations.
Vector-related
fromVector :: Vector row -> Frame row Source #
Produce a Frame from a Vector
of rows
Row
A Row
type is internally a Trie:
- Fast random access
- Fast set operations
- Supports missing elements
Instances
TrieKey k => Functor (Row k) Source # | |
TrieKey k => Foldable (Row k) Source # | |
Defined in Heidi.Data.Row.GenericTrie fold :: Monoid m => Row k m -> m # foldMap :: Monoid m => (a -> m) -> Row k a -> m # foldr :: (a -> b -> b) -> b -> Row k a -> b # foldr' :: (a -> b -> b) -> b -> Row k a -> b # foldl :: (b -> a -> b) -> b -> Row k a -> b # foldl' :: (b -> a -> b) -> b -> Row k a -> b # foldr1 :: (a -> a -> a) -> Row k a -> a # foldl1 :: (a -> a -> a) -> Row k a -> a # elem :: Eq a => a -> Row k a -> Bool # maximum :: Ord a => Row k a -> a # minimum :: Ord a => Row k a -> a # | |
TrieKey k => Traversable (Row k) Source # | |
(TrieKey k, Eq k, Eq v) => Eq (Row k v) Source # | |
(TrieKey k, Eq k, Eq v, Ord k, Ord v) => Ord (Row k v) Source # | |
(TrieKey k, Show k, Show v) => Show (Row k v) Source # | |
Construction
rowFromList :: TrieKey k => [(k, v)] -> Row k v Source #
Construct a Row
from a list of key-element pairs.
>>>
lookup 3 (rowFromList [(3,'a'),(4,'b')])
Just 'a'>>>
lookup 6 (rowFromList [(3,'a'),(4,'b')])
Nothing
Access
Filtering
Returns a new Row
that doesn't have a given key-value pair
filterWithKey :: TrieKey k => (k -> v -> Bool) -> Row k v -> Row k v Source #
Filter a row by applying a predicate to its keys and corresponding elements.
NB : filtering _retains_ the elements that satisfy the predicate.
Retains the entries for which the given list is a prefix of the indexing key
filterWithKeyAny :: (TrieKey a, Eq a) => a -> Row [a] v -> Row [a] v Source #
Retains the entries for which the given item appears at any position in the indexing key
deleteMany :: (TrieKey k, Foldable t) => t k -> Row k v -> Row k v Source #
Produce a new Row
such that its keys do _not_ belong to a certain set.
Partitioning
Partition a Row
into two new ones, such as the elements that satisfy the predicate will end up in the _left_ row.
partitionWithKeyPrefix Source #
Uses partitionWithKey
internally
Lookup
lookup :: TrieKey k => k -> Row k v -> Maybe v Source #
Lookup the value stored at a given key in a row
>>>
lookup 0 row0
Just 'a'>>>
lookup 1 row0
Nothing
elemSatisfies :: TrieKey k => (a -> Bool) -> k -> Row k a -> Bool Source #
Looks up a key from a row and applies a predicate to its value (if this is found). If no value is found at that key the function returns False.
This function is meant to be used as first argument to filter
.
>>>
elemSatisfies (== 'a') 0 row0
True>>>
elemSatisfies (== 'a') 42 row0
False
Lookup utilities
maybeEmpty :: TrieKey k => Maybe (Row k v) -> Row k v Source #
Returns an empty row if the argument is Nothing.
Comparison by lookup
eqByLookup :: (TrieKey k, Eq k, Eq a) => k -> Row k a -> Row k a -> Maybe Bool Source #
Compares two rows by the values indexed at a specific key.
Returns Nothing if the key is not present in either row.
eqByLookups :: (Foldable t, TrieKey k, Eq k, Eq a) => t k -> Row k a -> Row k a -> Maybe Bool Source #
Compares two rows by the values indexed at a set of keys.
Returns Nothing if a key in either row is not present.
compareByLookup :: (TrieKey k, Eq k, Ord a) => k -> Row k a -> Row k a -> Maybe Ordering Source #
Compares for ordering two rows by the values indexed at a specific key.
Returns Nothing if the key is not present in either row.
Set operations
union :: TrieKey k => Row k v -> Row k v -> Row k v Source #
Set union of two rows
>>>
keys $ union row0 row1
[0,1,3,666]
unionWith :: TrieKey k => (v -> v -> v) -> Row k v -> Row k v -> Row k v Source #
Set union of two rows, using a combining function for equal keys
intersectionWith :: TrieKey k => (a -> b -> v) -> Row k a -> Row k b -> Row k v Source #
Set intersections of two rows, using a combining function for equal keys
Maps
mapWithKey :: TrieKey k => (k -> a -> b) -> Row k a -> Row k b Source #
Map over all elements with a function of both the key and the value
Folds
foldWithKey :: TrieKey k => (k -> a -> r -> r) -> r -> Row k a -> r Source #
Fold over a row with a function of both key and value
keysOnly :: (TrieKey k, Foldable f) => f (Row k v) -> Row k () Source #
Takes the union of a Foldable container of Row
s and discards the values
Traversals
traverseWithKey :: (Applicative f, TrieKey k) => (k -> a -> f b) -> Row k a -> f (Row k b) Source #
Traverse a Row
using a function of both the key and the element.
Lenses
bool :: TrieKey k => k -> Traversal' (Row k VP) Bool Source #
Decode a Bool
from the given column index
float :: TrieKey k => k -> Traversal' (Row k VP) Float Source #
Decode a Float
from the given column index
double :: TrieKey k => k -> Traversal' (Row k VP) Double Source #
Decode a Double
from the given column index
char :: TrieKey k => k -> Traversal' (Row k VP) Char Source #
Decode a Char
from the given column index
string :: TrieKey k => k -> Traversal' (Row k VP) String Source #
Decode a String
from the given column index
text :: TrieKey k => k -> Traversal' (Row k VP) Text Source #
Decode a Text
from the given column index
scientific :: TrieKey k => k -> Traversal' (Row k VP) Scientific Source #
Decode a Scientific
from the given column index
oneHot :: TrieKey k => k -> Traversal' (Row k VP) (OneHot Int) Source #
Decode a OneHot
from the given column index
Lens combinators
Helper for filtering Frame
s
e.g.
>>>
:t \k -> keep (text k) (== "hello")
:: GT.TrieKey k => k -> Row k VP -> Bool
Combinators for list-indexed rows
atPrefix
: a Lens' that takes a key prefix and relates a row having lists as keys and the subset of columns corresponding to keys having that prefix
:: (TrieKey k, Eq k) | |
=> [k] | key prefix of the columns of interest |
-> Traversal' (Row [k] v) v |
Focus on all elements that share a common key prefix
e.g.
>>> :t k ->toListOf
(eachPrefixed k .vpBool
) (GT.TrieKey k, Eq k) => [k] -> Row [k] VP -> [Bool]
:: (TrieKey k, Eq k, Monoid r) | |
=> [k] | key prefix of the columns of interest |
-> Getting r (Row [k] v) v |
Extract all elements that share a common key prefix into a monoidal value (e.g. a list)