{-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE PolyKinds #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE UndecidableInstances #-} {-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE BangPatterns #-} {-# OPTIONS_GHC -fwarn-incomplete-patterns #-} {-| Module : Control.MapReduce.Engines Description : map-reduce-folds builders Copyright : (c) Adam Conner-Sax 2019 License : BSD-3-Clause Maintainer : adam_conner_sax@yahoo.com Stability : experimental Types and functions used by all the engines. Notes: 1. The provided grouping functions group elements into a 'Data.Sequence.Seq' as this is a good default choice. 2. The <http://hackage.haskell.org/package/streamly Streamly> engine is the fastest in my benchmarks. It's the engine used by default if you import @Control.MapReduce.Simple@. 3. All the engines take a grouping function as a parameter and default ones are provided. For simple map/reduce, the grouping step may be the bottleneck and I wanted to leave room for experimentation. I've tried (and failed!) to find anything faster than using 'Map' or 'HashMap' via @toList . fromListWith (<>)@. -} module Control.MapReduce.Engines ( -- * Fold Types MapReduceFold , MapReduceFoldM -- * Engine Helpers , reduceFunction , reduceFunctionM -- * @groupBy@ Helpers , fromListWithHT ) where import qualified Control.MapReduce.Core as MRC import qualified Control.Foldl as FL import Control.Monad.ST as ST import Data.Hashable ( Hashable ) import qualified Data.HashTable.Class as HT -- | Type-alias for a map-reduce-fold engine type MapReduceFold y k c q x d = MRC.Unpack x y -> MRC.Assign k y c -> MRC.Reduce k c d -> FL.Fold x (q d) -- | Type-alias for a monadic (effectful) map-reduce-fold engine type MapReduceFoldM m y k c q x d = MRC.UnpackM m x y -> MRC.AssignM m k y c -> MRC.ReduceM m k c d -> FL.FoldM m x (q d) -- | Turn @Reduce@ into a function we can apply reduceFunction :: (Foldable h, Functor h) => MRC.Reduce k x d -> k -> h x -> d reduceFunction (MRC.Reduce f) k = f k reduceFunction (MRC.ReduceFold f) k = FL.fold (f k) {-# INLINABLE reduceFunction #-} -- | Turn @ReduceM@ into a function we can apply reduceFunctionM :: (Traversable h, Monad m) => MRC.ReduceM m k x d -> k -> h x -> m d reduceFunctionM (MRC.ReduceM f) k = f k reduceFunctionM (MRC.ReduceFoldM f) k = FL.foldM (f k) {-# INLINABLE reduceFunctionM #-} {- -- copied from Frames -- which causes overlapping instances. instance {-# OVERLAPPABLE #-} Grouping Text where grouping = contramap hash grouping -} {- | an implementation of @fromListWith@ for mutable hashtables from the <http://hackage.haskell.org/package/hashtables-1.2.3.1 hastables> package. Basically a copy @fromList@ from that package using mutate instead of insert to apply the given function if the was already in the map. Might not be the ideal implementation. Notes: * This function is specific hashtable agnostic so you'll have to supply a specific implementation from the package via TypeApplication * This function returns the hash-table in the @ST@ monad. You can fold over it (using @foldM@ from @hashtables@) and then use @runST@ to get the grouped structure out. -} fromListWithHT :: forall h k v s . (HT.HashTable h, Eq k, Hashable k) => (v -> v -> v) -> [(k, v)] -> ST.ST s (h s k v) fromListWithHT f l = do ht <- HT.new go ht l where g x mx = (Just $ maybe x (`f` x) mx, ()) go ht = go' where go' [] = return ht go' ((!k, !v) : xs) = do HT.mutate ht k (g v) go' xs {-# INLINABLE fromListWithHT #-}