{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveFoldable #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ViewPatterns #-}
module Statistics.Quantile
(
ContParam(..)
, Default(..)
, quantile
, quantiles
, quantilesVec
, cadpw
, hazen
, spss
, s
, medianUnbiased
, normalUnbiased
, weightedAvg
, median
, mad
, midspread
, continuousBy
) where
import Data.Binary (Binary)
import Data.Aeson (ToJSON,FromJSON)
import Data.Data (Data,Typeable)
import Data.Default.Class
import Data.Functor
import qualified Data.Foldable as F
import Data.Vector.Generic ((!))
import qualified Data.Vector as V
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Unboxed as U
import qualified Data.Vector.Storable as S
import GHC.Generics (Generic)
import Statistics.Function (partialSort)
weightedAvg :: G.Vector v Double =>
Int
-> Int
-> v Double
-> Double
weightedAvg k q x
| G.any isNaN x = modErr "weightedAvg" "Sample contains NaNs"
| n == 0 = modErr "weightedAvg" "Sample is empty"
| n == 1 = G.head x
| q < 2 = modErr "weightedAvg" "At least 2 quantiles is needed"
| k == q = G.maximum x
| k >= 0 || k < q = xj + g * (xj1 - xj)
| otherwise = modErr "weightedAvg" "Wrong quantile number"
where
j = floor idx
idx = fromIntegral (n - 1) * fromIntegral k / fromIntegral q
g = idx - fromIntegral j
xj = sx ! j
xj1 = sx ! (j+1)
sx = partialSort (j+2) x
n = G.length x
{-# SPECIALIZE weightedAvg :: Int -> Int -> U.Vector Double -> Double #-}
{-# SPECIALIZE weightedAvg :: Int -> Int -> V.Vector Double -> Double #-}
{-# SPECIALIZE weightedAvg :: Int -> Int -> S.Vector Double -> Double #-}
data ContParam = ContParam {-# UNPACK #-} !Double {-# UNPACK #-} !Double
deriving (Show,Eq,Ord,Data,Typeable,Generic)
instance Default ContParam where
def = s
instance Binary ContParam
instance ToJSON ContParam
instance FromJSON ContParam
quantile :: G.Vector v Double
=> ContParam
-> Int
-> Int
-> v Double
-> Double
quantile param q nQ xs
| nQ < 2 = modErr "continuousBy" "At least 2 quantiles is needed"
| badQ nQ q = modErr "continuousBy" "Wrong quantile number"
| G.any isNaN xs = modErr "continuousBy" "Sample contains NaNs"
| otherwise = estimateQuantile sortedXs pk
where
pk = toPk param n q nQ
sortedXs = psort xs $ floor pk + 1
n = G.length xs
{-# INLINABLE quantile #-}
{-# SPECIALIZE
quantile :: ContParam -> Int -> Int -> U.Vector Double -> Double #-}
{-# SPECIALIZE
quantile :: ContParam -> Int -> Int -> V.Vector Double -> Double #-}
{-# SPECIALIZE
quantile :: ContParam -> Int -> Int -> S.Vector Double -> Double #-}
quantiles :: (G.Vector v Double, F.Foldable f, Functor f)
=> ContParam
-> f Int
-> Int
-> v Double
-> f Double
quantiles param qs nQ xs
| nQ < 2 = modErr "quantiles" "At least 2 quantiles is needed"
| F.any (badQ nQ) qs = modErr "quantiles" "Wrong quantile number"
| G.any isNaN xs = modErr "quantiles" "Sample contains NaNs"
| fnull qs = 0 <$ qs
| otherwise = fmap (estimateQuantile sortedXs) ks'
where
ks' = fmap (\q -> toPk param n q nQ) qs
sortedXs = psort xs $ floor (F.maximum ks') + 1
n = G.length xs
{-# INLINABLE quantiles #-}
{-# SPECIALIZE quantiles
:: (Functor f, F.Foldable f) => ContParam -> f Int -> Int -> V.Vector Double -> f Double #-}
{-# SPECIALIZE quantiles
:: (Functor f, F.Foldable f) => ContParam -> f Int -> Int -> U.Vector Double -> f Double #-}
{-# SPECIALIZE quantiles
:: (Functor f, F.Foldable f) => ContParam -> f Int -> Int -> S.Vector Double -> f Double #-}
fnull :: F.Foldable f => f a -> Bool
#if !MIN_VERSION_base(4,8,0)
fnull = F.foldr (\_ _ -> False) True
#else
fnull = null
#endif
quantilesVec :: (G.Vector v Double, G.Vector v Int)
=> ContParam
-> v Int
-> Int
-> v Double
-> v Double
quantilesVec param qs nQ xs
| nQ < 2 = modErr "quantilesVec" "At least 2 quantiles is needed"
| G.any (badQ nQ) qs = modErr "quantilesVec" "Wrong quantile number"
| G.any isNaN xs = modErr "quantilesVec" "Sample contains NaNs"
| G.null qs = G.empty
| otherwise = G.map (estimateQuantile sortedXs) ks'
where
ks' = G.map (\q -> toPk param n q nQ) qs
sortedXs = psort xs $ floor (G.maximum ks') + 1
n = G.length xs
{-# INLINABLE quantilesVec #-}
{-# SPECIALIZE quantilesVec
:: ContParam -> V.Vector Int -> Int -> V.Vector Double -> V.Vector Double #-}
{-# SPECIALIZE quantilesVec
:: ContParam -> U.Vector Int -> Int -> U.Vector Double -> U.Vector Double #-}
{-# SPECIALIZE quantilesVec
:: ContParam -> S.Vector Int -> Int -> S.Vector Double -> S.Vector Double #-}
badQ :: Int -> Int -> Bool
badQ nQ q = q < 0 || q > nQ
toPk
:: ContParam
-> Int
-> Int
-> Int
-> Double
toPk (ContParam a b) (fromIntegral -> n) q nQ
= a + p * (n + 1 - a - b)
where
p = fromIntegral q / fromIntegral nQ
estimateQuantile :: G.Vector v Double => v Double -> Double -> Double
{-# INLINE estimateQuantile #-}
estimateQuantile sortedXs k'
= (1-g) * item (k-1) + g * item k
where
(k,g) = properFraction k'
item = (sortedXs !) . clamp
clamp = max 0 . min (n - 1)
n = G.length sortedXs
psort :: G.Vector v Double => v Double -> Int -> v Double
psort xs k = partialSort (max 0 $ min (G.length xs - 1) k) xs
{-# INLINE psort #-}
cadpw :: ContParam
cadpw = ContParam 0 1
hazen :: ContParam
hazen = ContParam 0.5 0.5
spss :: ContParam
spss = ContParam 0 0
s :: ContParam
s = ContParam 1 1
medianUnbiased :: ContParam
medianUnbiased = ContParam third third
where third = 1/3
normalUnbiased :: ContParam
normalUnbiased = ContParam ta ta
where ta = 3/8
modErr :: String -> String -> a
modErr f err = error $ "Statistics.Quantile." ++ f ++ ": " ++ err
median :: G.Vector v Double
=> ContParam
-> v Double
-> Double
{-# INLINE median #-}
median p = quantile p 1 2
midspread :: G.Vector v Double =>
ContParam
-> Int
-> v Double
-> Double
midspread param k x
| G.any isNaN x = modErr "midspread" "Sample contains NaNs"
| k <= 0 = modErr "midspread" "Nonpositive number of quantiles"
| otherwise = let Pair x1 x2 = quantiles param (Pair 1 (k-1)) k x
in x2 - x1
{-# INLINABLE midspread #-}
{-# SPECIALIZE midspread :: ContParam -> Int -> U.Vector Double -> Double #-}
{-# SPECIALIZE midspread :: ContParam -> Int -> V.Vector Double -> Double #-}
{-# SPECIALIZE midspread :: ContParam -> Int -> S.Vector Double -> Double #-}
data Pair a = Pair !a !a
deriving (Functor, F.Foldable)
mad :: G.Vector v Double
=> ContParam
-> v Double
-> Double
mad p xs
= median p $ G.map (abs . subtract med) xs
where
med = median p xs
{-# INLINABLE mad #-}
{-# SPECIALIZE mad :: ContParam -> U.Vector Double -> Double #-}
{-# SPECIALIZE mad :: ContParam -> V.Vector Double -> Double #-}
{-# SPECIALIZE mad :: ContParam -> S.Vector Double -> Double #-}
continuousBy :: G.Vector v Double =>
ContParam
-> Int
-> Int
-> v Double
-> Double
continuousBy = quantile
{-# DEPRECATED continuousBy "Use quantile instead" #-}