{-# LANGUAGE DataKinds #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Data.TDigest.Vector.Internal where
import Control.DeepSeq (NFData (..))
import Data.Either (isRight)
import Data.Foldable (toList)
import Data.List (foldl', sortBy)
import Data.List.NonEmpty (nonEmpty)
import Data.Ord (comparing)
import Data.Proxy (Proxy (..))
import Data.Semigroup (Semigroup (..))
import Data.Semigroup.Reducer (Reducer (..))
import GHC.TypeLits (KnownNat, Nat, natVal)
import Prelude ()
import Prelude.Compat
import qualified Data.Vector.Unboxed as VU
import Data.TDigest.Internal
import qualified Data.TDigest.Postprocess.Internal as PP
data TDigest (compression :: Nat) = TDigest
{ tdigestTotalWeight :: !Size
, tdigestData :: !(VU.Vector Centroid)
, tdigestBufferSize :: !Size
, tdigestBuffer :: [Double]
, tdigestDirection :: !Bool
}
deriving Show
instance KnownNat comp => Semigroup (TDigest comp) where
(<>) = combineTDigest
instance KnownNat comp => Monoid (TDigest comp) where
mempty = emptyTDigest
mappend = (<>)
instance KnownNat comp => Reducer Double (TDigest comp) where
cons = insert
snoc = flip insert
unit = singleton
instance NFData (TDigest comp) where
rnf (TDigest _ _ _ b _) = rnf b
instance KnownNat comp => PP.HasHistogram (TDigest comp) Maybe where
histogram = fmap PP.histogramFromCentroids . nonEmpty . VU.toList . tdigestData . finalize
totalWeight = totalWeight
size :: TDigest comp -> Int
size td = VU.length (tdigestData td) + tdigestBufferSize td
totalWeight :: TDigest comp -> Weight
totalWeight = fromIntegral . tdigestTotalWeight
minimumValue :: KnownNat comp => TDigest comp -> Mean
minimumValue td
| VU.null d = posInf
| otherwise = fst (VU.head d)
where
d = tdigestData (finalize td)
maximumValue :: KnownNat comp => TDigest comp -> Mean
maximumValue td
| VU.null d = posInf
| otherwise = fst (VU.last d)
where
d = tdigestData (finalize td)
ksize
:: Double
-> Double
-> Double
ksize comp q = comp * (asin (2 * clamp q - 1) / pi + 0.5)
clamp :: Double -> Double
clamp x
| x < 0.0 = 0.0
| x > 1.0 = 1.0
| otherwise = x
ksizeInv
:: Double
-> Double
-> Double
ksizeInv comp k
| k > comp = 1
| k < 0 = 0
| otherwise = 0.5 * (sin ((k / comp - 0.5) * pi) + 1)
merge :: Int -> Double -> [(Mean, Weight)] -> [(Mean, Weight)]
merge _ _ [] = []
merge tw' comp (y:ys) = go 0 (qLimit' 0) y ys
where
tw = fromIntegral tw'
qLimit' :: Double -> Double
qLimit' q0 = ksizeInv comp (ksize comp q0 + 1)
go :: Double
-> Double
-> (Mean, Weight)
-> [(Mean, Weight)]
-> [(Mean, Weight)]
go _q0 _qLimit sigma [] = [sigma]
go q0 qLimit sigma (x:xs)
| q <= qLimit = go q0 qLimit (plus sigma x) xs
| otherwise = sigma : go q0' (qLimit' q0') x xs
where
q = q0 + (snd sigma + snd x) / tw
q0' = q0 + snd sigma / tw
plus :: Centroid -> Centroid -> Centroid
plus (m1,w1) (m2,w2) = ((m1 * w1 + m2 * w2) / w, w) where w = w1 + w2
emptyTDigest :: TDigest comp
emptyTDigest = TDigest 0 mempty 0 mempty True
combineTDigest :: forall comp. KnownNat comp => TDigest comp -> TDigest comp -> TDigest comp
combineTDigest (TDigest tw d _ b dir) (TDigest tw' d' _ b' dir') =
TDigest (tw + tw') newD 0 [] (dir /= dir')
where
newD = VU.fromList
. merge tw comp
. sortBy (comparing fst)
$ VU.toList d ++ VU.toList d' ++ map (flip (,) 1) (b ++ b')
comp = fromInteger (natVal (Proxy :: Proxy comp)) * sizeCoefficient
finalize :: forall comp. KnownNat comp => TDigest comp -> TDigest comp
finalize td
| null (tdigestBuffer td) = td
| otherwise = forceCompress td
forceCompress :: forall comp. KnownNat comp => TDigest comp -> TDigest comp
forceCompress (TDigest tw d _bs b dir) = TDigest tw d' 0 [] (not dir)
where
d' = VU.fromList
. rev
. merge tw comp
. rev
. sortBy (comparing fst)
. (++ map (flip (,) 1) b)
. VU.toList
$ d
comp = fromInteger (natVal (Proxy :: Proxy comp)) * sizeCoefficient
rev | dir = id
| otherwise = reverse
compress :: forall comp. KnownNat comp => TDigest comp -> TDigest comp
compress t@(TDigest _ _ bs _ _)
| bs > compInt * 2 = forceCompress t
| otherwise = t
where
compInt = fromInteger (natVal (Proxy :: Proxy comp)) * sizeCoefficient
sizeCoefficient :: Num a => a
sizeCoefficient = 32
valid :: TDigest comp -> Bool
valid = isRight . validate
validate :: TDigest comp -> Either String (TDigest comp)
validate td@(TDigest tw d bs b _dir)
| not (bs == length b) =
Left $ "Buffer lenght don't match: " ++ show (bs, length b)
| not (tw == bs + round dw) =
Left $ "Total weight doesn't match"
| dl /= sortBy (comparing fst) dl =
Left $ "Data buffer isn't ordered"
| otherwise = Right td
where
dl :: [Centroid]
dl = VU.toList d
dw :: Double
dw = sum (map snd dl)
insert
:: KnownNat comp
=> Double
-> TDigest comp
-> TDigest comp
insert x = compress . insert' x
insert'
:: KnownNat comp
=> Double
-> TDigest comp
-> TDigest comp
insert' x (TDigest s d sb b dir) = TDigest (s + 1) d (sb + 1) (x : b) dir
singleton :: Double -> TDigest comp
singleton x = TDigest 1 (VU.singleton (x, 1)) 0 [] True
tdigest :: (Foldable f, KnownNat comp) => f Double -> TDigest comp
tdigest = foldl' (flip insert) mempty . toList