{-# LANGUAGE MultiParamTypeClasses, FlexibleContexts #-} module Data.Random.Distribution where import Data.Random.Lift import Data.Random.RVar -- |A 'Distribution' is a data representation of a random variable's probability -- structure. For example, in "Data.Random.Distribution.Normal", the 'Normal' -- distribution is defined as: -- -- > data Normal a -- > = StdNormal -- > | Normal a a -- -- Where the two parameters of the 'Normal' data constructor are the mean and -- standard deviation of the random variable, respectively. To make use of -- the 'Normal' type, one can convert it to an 'rvar' and manipulate it or -- sample it directly: -- -- > x <- sample (rvar (Normal 10 2)) -- > x <- sample (Normal 10 2) -- -- A 'Distribution' is typically more transparent than an 'RVar' -- but less composable (precisely because of that transparency). There are -- several practical uses for types implementing 'Distribution': -- -- * Typically, a 'Distribution' will expose several parameters of a standard -- mathematical model of a probability distribution, such as mean and std deviation for -- the normal distribution. Thus, they can be manipulated analytically using -- mathematical insights about the distributions they represent. For example, -- a collection of bernoulli variables could be simplified into a (hopefully) smaller -- collection of binomial variables. -- -- * Because they are generally just containers for parameters, they can be -- easily serialized to persistent storage or read from user-supplied -- configurations (eg, initialization data for a simulation). -- -- * If a type additionally implements the 'CDF' subclass, which extends -- 'Distribution' with a cumulative density function, an arbitrary random -- variable 'x' can be tested against the distribution by testing -- @fmap (cdf dist) x@ for uniformity. -- -- On the other hand, most 'Distribution's will not be closed under all the -- same operations as 'RVar' (which, being a monad, has a fully turing-complete -- internal computational model). The sum of two uniformly-distributed -- variables, for example, is not uniformly distributed. To support general -- composition, the 'Distribution' class defines a function 'rvar' to -- construct the more-abstract and more-composable 'RVar' representation -- of a random variable. class Distribution d t where -- |Return a random variable with this distribution. rvar :: d t -> RVar t rvar = forall (d :: * -> *) t (n :: * -> *). Distribution d t => d t -> RVarT n t rvarT -- |Return a random variable with the given distribution, pre-lifted to an arbitrary 'RVarT'. -- Any arbitrary 'RVar' can also be converted to an 'RVarT m' for an arbitrary 'm', using -- either 'lift' or 'sample'. rvarT :: d t -> RVarT n t rvarT d t d = forall (m :: * -> *) (n :: * -> *) a. Lift m n => m a -> n a lift (forall (d :: * -> *) t. Distribution d t => d t -> RVar t rvar d t d) -- FIXME: I am not sure about giving default instances class Distribution d t => PDF d t where pdf :: d t -> t -> Double pdf d t d = forall a. Floating a => a -> a exp forall b c a. (b -> c) -> (a -> b) -> a -> c . forall (d :: * -> *) t. PDF d t => d t -> t -> Double logPdf d t d logPdf :: d t -> t -> Double logPdf d t d = forall a. Floating a => a -> a log forall b c a. (b -> c) -> (a -> b) -> a -> c . forall (d :: * -> *) t. PDF d t => d t -> t -> Double pdf d t d class Distribution d t => CDF d t where -- |Return the cumulative distribution function of this distribution. -- That is, a function taking @x :: t@ to the probability that the next -- sample will return a value less than or equal to x, according to some -- order or partial order (not necessarily an obvious one). -- -- In the case where 't' is an instance of Ord, 'cdf' should correspond -- to the CDF with respect to that order. -- -- In other cases, 'cdf' is only required to satisfy the following law: -- @fmap (cdf d) (rvar d)@ -- must be uniformly distributed over (0,1). Inclusion of either endpoint is optional, -- though the preferred range is (0,1]. -- -- Note that this definition requires that 'cdf' for a product type -- should _not_ be a joint CDF as commonly defined, as that definition -- violates both conditions. -- Instead, it should be a univariate CDF over the product type. That is, -- it should represent the CDF with respect to the lexicographic order -- of the product. -- -- The present specification is probably only really useful for testing -- conformance of a variable to its target distribution, and I am open to -- suggestions for more-useful specifications (especially with regard to -- the interaction with product types). cdf :: d t -> t -> Double