Copyright | (c) Edward Kmett 2010-2021 |
---|---|
License | BSD3 |
Maintainer | ekmett@gmail.com |
Stability | experimental |
Portability | GHC only |
Safe Haskell | Safe-Inferred |
Language | Haskell2010 |
- AD modes
- Gradients (Reverse Mode)
- Higher Order Gradients (Sparse-on-Reverse)
- Variadic Gradients (Sparse or Kahn)
- Jacobians (Sparse or Reverse)
- Higher Order Jacobian (Sparse-on-Reverse)
- Transposed Jacobians (Forward Mode)
- Hessian (Sparse-On-Reverse)
- Hessian Tensors (Sparse or Sparse-On-Reverse)
- Hessian Tensors (Sparse)
- Hessian Vector Products (Forward-On-Reverse)
- Derivatives (Forward Mode)
- Derivatives (Tower)
- Directional Derivatives (Forward Mode)
- Directional Derivatives (Tower)
- Taylor Series (Tower)
- Maclaurin Series (Tower)
- Gradient Descent
- Working with towers
Mixed-Mode Automatic Differentiation, specialized to doubles.
Each combinator exported from this module chooses an appropriate AD mode. The following basic operations are supported, modified as appropriate by the suffixes below:
grad
computes the gradient (partial derivatives) of a function at a pointjacobian
computes the Jacobian matrix of a function at a pointdiff
computes the derivative of a function at a pointdu
computes a directional derivative of a function at a pointhessian
compute the Hessian matrix (matrix of second partial derivatives) of a function at a point
The suffixes have the following meanings:
'
-- also return the answerWith
lets the user supply a function to blend the input with the outputF
is a version of the base function lifted to return aTraversable
(orFunctor
) results
means the function returns all higher derivatives in a list or f-branchingStream
T
means the result is transposed with respect to the traditional formulation.0
means that the resulting derivative list is padded with 0s at the end.
Synopsis
- data AD s a
- class (Num t, Num (Scalar t)) => Mode t where
- type family Scalar t
- grad :: Traversable f => (forall s. (Reifies s Tape, Typeable s) => f (ReverseDouble s) -> ReverseDouble s) -> f Double -> f Double
- grad' :: Traversable f => (forall s. (Reifies s Tape, Typeable s) => f (ReverseDouble s) -> ReverseDouble s) -> f Double -> (Double, f Double)
- gradWith :: Traversable f => (Double -> Double -> b) -> (forall s. (Reifies s Tape, Typeable s) => f (ReverseDouble s) -> ReverseDouble s) -> f Double -> f b
- gradWith' :: Traversable f => (Double -> Double -> b) -> (forall s. (Reifies s Tape, Typeable s) => f (ReverseDouble s) -> ReverseDouble s) -> f Double -> (Double, f b)
- grads :: Traversable f => (forall s. f (AD s SparseDouble) -> AD s SparseDouble) -> f Double -> Cofree f Double
- class Grad i o o' | i -> o o', o -> i o', o' -> i o
- vgrad :: Grad i o o' => i -> o
- vgrad' :: Grad i o o' => i -> o'
- class Grads i o | i -> o, o -> i
- vgrads :: Grads i o => i -> o
- jacobian :: (Traversable f, Functor g) => (forall s. (Reifies s Tape, Typeable s) => f (ReverseDouble s) -> g (ReverseDouble s)) -> f Double -> g (f Double)
- jacobian' :: (Traversable f, Functor g) => (forall s. (Reifies s Tape, Typeable s) => f (ReverseDouble s) -> g (ReverseDouble s)) -> f Double -> g (Double, f Double)
- jacobianWith :: (Traversable f, Functor g) => (Double -> Double -> b) -> (forall s. (Reifies s Tape, Typeable s) => f (ReverseDouble s) -> g (ReverseDouble s)) -> f Double -> g (f b)
- jacobianWith' :: (Traversable f, Functor g) => (Double -> Double -> b) -> (forall s. (Reifies s Tape, Typeable s) => f (ReverseDouble s) -> g (ReverseDouble s)) -> f Double -> g (Double, f b)
- jacobians :: (Traversable f, Functor g) => (forall s. f (AD s SparseDouble) -> g (AD s SparseDouble)) -> f Double -> g (Cofree f Double)
- jacobianT :: (Traversable f, Functor g) => (forall s. f (AD s ForwardDouble) -> g (AD s ForwardDouble)) -> f Double -> f (g Double)
- jacobianWithT :: (Traversable f, Functor g) => (Double -> Double -> b) -> (forall s. f (AD s ForwardDouble) -> g (AD s ForwardDouble)) -> f Double -> f (g b)
- hessian :: Traversable f => (forall s. (Reifies s Tape, Typeable s) => f (On (Reverse s SparseDouble)) -> On (Reverse s SparseDouble)) -> f Double -> f (f Double)
- hessian' :: Traversable f => (forall s. f (AD s SparseDouble) -> AD s SparseDouble) -> f Double -> (Double, f (Double, f Double))
- hessianF :: (Traversable f, Functor g) => (forall s. (Reifies s Tape, Typeable s) => f (On (Reverse s SparseDouble)) -> g (On (Reverse s SparseDouble))) -> f Double -> g (f (f Double))
- hessianF' :: (Traversable f, Functor g) => (forall s. f (AD s SparseDouble) -> g (AD s SparseDouble)) -> f Double -> g (Double, f (Double, f Double))
- hessianProduct :: Traversable f => (forall s. (Reifies s Tape, Typeable s) => f (On (Reverse s ForwardDouble)) -> On (Reverse s ForwardDouble)) -> f (Double, Double) -> f Double
- hessianProduct' :: Traversable f => (forall s. (Reifies s Tape, Typeable s) => f (On (Reverse s ForwardDouble)) -> On (Reverse s ForwardDouble)) -> f (Double, Double) -> f (Double, Double)
- diff :: (forall s. AD s ForwardDouble -> AD s ForwardDouble) -> Double -> Double
- diffF :: Functor f => (forall s. AD s ForwardDouble -> f (AD s ForwardDouble)) -> Double -> f Double
- diff' :: (forall s. AD s ForwardDouble -> AD s ForwardDouble) -> Double -> (Double, Double)
- diffF' :: Functor f => (forall s. AD s ForwardDouble -> f (AD s ForwardDouble)) -> Double -> f (Double, Double)
- diffs :: (forall s. AD s TowerDouble -> AD s TowerDouble) -> Double -> [Double]
- diffsF :: Functor f => (forall s. AD s TowerDouble -> f (AD s TowerDouble)) -> Double -> f [Double]
- diffs0 :: (forall s. AD s TowerDouble -> AD s TowerDouble) -> Double -> [Double]
- diffs0F :: Functor f => (forall s. AD s TowerDouble -> f (AD s TowerDouble)) -> Double -> f [Double]
- du :: Functor f => (forall s. f (AD s ForwardDouble) -> AD s ForwardDouble) -> f (Double, Double) -> Double
- du' :: Functor f => (forall s. f (AD s ForwardDouble) -> AD s ForwardDouble) -> f (Double, Double) -> (Double, Double)
- duF :: (Functor f, Functor g) => (forall s. f (AD s ForwardDouble) -> g (AD s ForwardDouble)) -> f (Double, Double) -> g Double
- duF' :: (Functor f, Functor g) => (forall s. f (AD s ForwardDouble) -> g (AD s ForwardDouble)) -> f (Double, Double) -> g (Double, Double)
- dus :: Functor f => (forall s. f (AD s TowerDouble) -> AD s TowerDouble) -> f [Double] -> [Double]
- dus0 :: Functor f => (forall s. f (AD s TowerDouble) -> AD s TowerDouble) -> f [Double] -> [Double]
- dusF :: (Functor f, Functor g) => (forall s. f (AD s TowerDouble) -> g (AD s TowerDouble)) -> f [Double] -> g [Double]
- dus0F :: (Functor f, Functor g) => (forall s. f (AD s TowerDouble) -> g (AD s TowerDouble)) -> f [Double] -> g [Double]
- taylor :: (forall s. AD s TowerDouble -> AD s TowerDouble) -> Double -> Double -> [Double]
- taylor0 :: (forall s. AD s TowerDouble -> AD s TowerDouble) -> Double -> Double -> [Double]
- maclaurin :: (forall s. AD s TowerDouble -> AD s TowerDouble) -> Double -> [Double]
- maclaurin0 :: (forall s. AD s TowerDouble -> AD s TowerDouble) -> Double -> [Double]
- conjugateGradientDescent :: Traversable f => (forall s. Chosen s => f (Or s (On (Forward ForwardDouble)) KahnDouble) -> Or s (On (Forward ForwardDouble)) KahnDouble) -> f Double -> [f Double]
- conjugateGradientAscent :: Traversable f => (forall s. Chosen s => f (Or s (On (Forward ForwardDouble)) KahnDouble) -> Or s (On (Forward ForwardDouble)) KahnDouble) -> f Double -> [f Double]
- data Jet f a = a :- (Jet f (f a))
- headJet :: Jet f a -> a
- tailJet :: Jet f a -> Jet f (f a)
- jet :: Functor f => Cofree f a -> Jet f a
Documentation
Instances
AD modes
class (Num t, Num (Scalar t)) => Mode t where Source #
Nothing
Instances
Instances
Gradients (Reverse Mode)
grad :: Traversable f => (forall s. (Reifies s Tape, Typeable s) => f (ReverseDouble s) -> ReverseDouble s) -> f Double -> f Double Source #
The grad
function calculates the gradient of a non-scalar-to-scalar function with reverse-mode AD in a single pass.
>>>
grad (\[x,y,z] -> x*y+z) [1,2,3]
[2.0,1.0,1.0]
>>>
grad (\[x,y] -> x**y) [0,2]
[0.0,NaN]
grad' :: Traversable f => (forall s. (Reifies s Tape, Typeable s) => f (ReverseDouble s) -> ReverseDouble s) -> f Double -> (Double, f Double) Source #
The grad'
function calculates the result and gradient of a non-scalar-to-scalar function with reverse-mode AD ƒin a single pass.
>>>
grad' (\[x,y,z] -> x*y+z) [1,2,3]
(5.0,[2.0,1.0,1.0])
gradWith :: Traversable f => (Double -> Double -> b) -> (forall s. (Reifies s Tape, Typeable s) => f (ReverseDouble s) -> ReverseDouble s) -> f Double -> f b Source #
gradWith' :: Traversable f => (Double -> Double -> b) -> (forall s. (Reifies s Tape, Typeable s) => f (ReverseDouble s) -> ReverseDouble s) -> f Double -> (Double, f b) Source #
Higher Order Gradients (Sparse-on-Reverse)
grads :: Traversable f => (forall s. f (AD s SparseDouble) -> AD s SparseDouble) -> f Double -> Cofree f Double Source #
Variadic Gradients (Sparse or Kahn)
Variadic combinators for variadic mixed-mode automatic differentiation.
Unfortunately, variadicity comes at the expense of being able to use
quantification to avoid sensitivity confusion, so be careful when
counting the number of auto
calls you use when taking the gradient
of a function that takes gradients!
class Grad i o o' | i -> o o', o -> i o', o' -> i o Source #
Instances
Grad i o o' => Grad (KahnDouble -> i) (Double -> o) (Double -> o') Source # | |
Defined in Numeric.AD.Internal.Kahn.Double pack :: (KahnDouble -> i) -> [KahnDouble] -> KahnDouble Source # unpack :: (List -> List) -> Double -> o Source # unpack' :: (List -> (Double, List)) -> Double -> o' Source # |
class Grads i o | i -> o, o -> i Source #
Instances
Grads SparseDouble (Cofree List Double) Source # | |
Defined in Numeric.AD.Internal.Sparse.Double | |
Grads i o => Grads (SparseDouble -> i) (Double -> o) Source # | |
Defined in Numeric.AD.Internal.Sparse.Double packs :: (SparseDouble -> i) -> [SparseDouble] -> SparseDouble Source # unpacks :: ([Double] -> Cofree List Double) -> Double -> o Source # |
Jacobians (Sparse or Reverse)
jacobian :: (Traversable f, Functor g) => (forall s. (Reifies s Tape, Typeable s) => f (ReverseDouble s) -> g (ReverseDouble s)) -> f Double -> g (f Double) Source #
The jacobian
function calculates the jacobian of a non-scalar-to-non-scalar function with reverse AD lazily in m
passes for m
outputs.
>>>
jacobian (\[x,y] -> [y,x,x*y]) [2,1]
[[0.0,1.0],[1.0,0.0],[1.0,2.0]]
jacobian' :: (Traversable f, Functor g) => (forall s. (Reifies s Tape, Typeable s) => f (ReverseDouble s) -> g (ReverseDouble s)) -> f Double -> g (Double, f Double) Source #
The jacobian'
function calculates both the result and the Jacobian of a nonscalar-to-nonscalar function, using m
invocations of reverse AD,
where m
is the output dimensionality. Applying fmap snd
to the result will recover the result of jacobian
| An alias for gradF'
>>>
jacobian' (\[x,y] -> [y,x,x*y]) [2,1]
[(1.0,[0.0,1.0]),(2.0,[1.0,0.0]),(2.0,[1.0,2.0])]
jacobianWith :: (Traversable f, Functor g) => (Double -> Double -> b) -> (forall s. (Reifies s Tape, Typeable s) => f (ReverseDouble s) -> g (ReverseDouble s)) -> f Double -> g (f b) Source #
'jacobianWith g f' calculates the Jacobian of a non-scalar-to-non-scalar function f
with reverse AD lazily in m
passes for m
outputs.
Instead of returning the Jacobian matrix, the elements of the matrix are combined with the input using the g
.
jacobian
==jacobianWith
(_ dx -> dx)jacobianWith
const
== (f x ->const
x<$>
f x)
jacobianWith' :: (Traversable f, Functor g) => (Double -> Double -> b) -> (forall s. (Reifies s Tape, Typeable s) => f (ReverseDouble s) -> g (ReverseDouble s)) -> f Double -> g (Double, f b) Source #
jacobianWith
g f' calculates both the result and the Jacobian of a nonscalar-to-nonscalar function f
, using m
invocations of reverse AD,
where m
is the output dimensionality. Applying fmap snd
to the result will recover the result of jacobianWith
Instead of returning the Jacobian matrix, the elements of the matrix are combined with the input using the g
.
jacobian'
==jacobianWith'
(_ dx -> dx)
Higher Order Jacobian (Sparse-on-Reverse)
jacobians :: (Traversable f, Functor g) => (forall s. f (AD s SparseDouble) -> g (AD s SparseDouble)) -> f Double -> g (Cofree f Double) Source #
Transposed Jacobians (Forward Mode)
jacobianT :: (Traversable f, Functor g) => (forall s. f (AD s ForwardDouble) -> g (AD s ForwardDouble)) -> f Double -> f (g Double) Source #
A fast, simple, transposed Jacobian computed with forward-mode AD.
jacobianWithT :: (Traversable f, Functor g) => (Double -> Double -> b) -> (forall s. f (AD s ForwardDouble) -> g (AD s ForwardDouble)) -> f Double -> f (g b) Source #
A fast, simple, transposed Jacobian computed with Forward
mode AD
that combines the output with the input.
Hessian (Sparse-On-Reverse)
hessian :: Traversable f => (forall s. (Reifies s Tape, Typeable s) => f (On (Reverse s SparseDouble)) -> On (Reverse s SparseDouble)) -> f Double -> f (f Double) Source #
Compute the Hessian via the Jacobian of the gradient. gradient is computed in reverse mode and then the Jacobian is computed in sparse (forward) mode.
>>>
hessian (\[x,y] -> x*y) [1,2]
[[0.0,1.0],[1.0,0.0]]
hessian' :: Traversable f => (forall s. f (AD s SparseDouble) -> AD s SparseDouble) -> f Double -> (Double, f (Double, f Double)) Source #
Hessian Tensors (Sparse or Sparse-On-Reverse)
hessianF :: (Traversable f, Functor g) => (forall s. (Reifies s Tape, Typeable s) => f (On (Reverse s SparseDouble)) -> g (On (Reverse s SparseDouble))) -> f Double -> g (f (f Double)) Source #
Compute the order 3 Hessian tensor on a non-scalar-to-non-scalar function using Sparse
-on-Reverse
Hessian Tensors (Sparse)
hessianF' :: (Traversable f, Functor g) => (forall s. f (AD s SparseDouble) -> g (AD s SparseDouble)) -> f Double -> g (Double, f (Double, f Double)) Source #
Hessian Vector Products (Forward-On-Reverse)
hessianProduct :: Traversable f => (forall s. (Reifies s Tape, Typeable s) => f (On (Reverse s ForwardDouble)) -> On (Reverse s ForwardDouble)) -> f (Double, Double) -> f Double Source #
computes the product of the hessian hessianProduct
f wvH
of a non-scalar-to-scalar function f
at w =
with a vector fst
<$>
wvv = snd
using "Pearlmutter's method" from http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.29.6143, which states:<$>
wv
H v = (d/dr) grad_w (w + r v) | r = 0
Or in other words, we take the directional derivative of the gradient. The gradient is calculated in reverse mode, then the directional derivative is calculated in forward mode.
hessianProduct' :: Traversable f => (forall s. (Reifies s Tape, Typeable s) => f (On (Reverse s ForwardDouble)) -> On (Reverse s ForwardDouble)) -> f (Double, Double) -> f (Double, Double) Source #
computes both the gradient of a non-scalar-to-scalar hessianProduct'
f wvf
at w =
and the product of the hessian fst
<$>
wvH
at w
with a vector v = snd
using "Pearlmutter's method". The outputs are returned wrapped in the same functor.<$>
wv
H v = (d/dr) grad_w (w + r v) | r = 0
Or in other words, we return the gradient and the directional derivative of the gradient. The gradient is calculated in reverse mode, then the directional derivative is calculated in forward mode.
Derivatives (Forward Mode)
diff :: (forall s. AD s ForwardDouble -> AD s ForwardDouble) -> Double -> Double Source #
diffF :: Functor f => (forall s. AD s ForwardDouble -> f (AD s ForwardDouble)) -> Double -> f Double Source #
diff' :: (forall s. AD s ForwardDouble -> AD s ForwardDouble) -> Double -> (Double, Double) Source #
diffF' :: Functor f => (forall s. AD s ForwardDouble -> f (AD s ForwardDouble)) -> Double -> f (Double, Double) Source #
Derivatives (Tower)
diffs :: (forall s. AD s TowerDouble -> AD s TowerDouble) -> Double -> [Double] Source #
diffsF :: Functor f => (forall s. AD s TowerDouble -> f (AD s TowerDouble)) -> Double -> f [Double] Source #
diffs0 :: (forall s. AD s TowerDouble -> AD s TowerDouble) -> Double -> [Double] Source #
diffs0F :: Functor f => (forall s. AD s TowerDouble -> f (AD s TowerDouble)) -> Double -> f [Double] Source #
Directional Derivatives (Forward Mode)
du :: Functor f => (forall s. f (AD s ForwardDouble) -> AD s ForwardDouble) -> f (Double, Double) -> Double Source #
Compute the directional derivative of a function given a zipped up Functor
of the input values and their derivatives
du' :: Functor f => (forall s. f (AD s ForwardDouble) -> AD s ForwardDouble) -> f (Double, Double) -> (Double, Double) Source #
Compute the answer and directional derivative of a function given a zipped up Functor
of the input values and their derivatives
duF :: (Functor f, Functor g) => (forall s. f (AD s ForwardDouble) -> g (AD s ForwardDouble)) -> f (Double, Double) -> g Double Source #
Compute a vector of directional derivatives for a function given a zipped up Functor
of the input values and their derivatives.
duF' :: (Functor f, Functor g) => (forall s. f (AD s ForwardDouble) -> g (AD s ForwardDouble)) -> f (Double, Double) -> g (Double, Double) Source #
Compute a vector of answers and directional derivatives for a function given a zipped up Functor
of the input values and their derivatives.
Directional Derivatives (Tower)
dus :: Functor f => (forall s. f (AD s TowerDouble) -> AD s TowerDouble) -> f [Double] -> [Double] Source #
dus0 :: Functor f => (forall s. f (AD s TowerDouble) -> AD s TowerDouble) -> f [Double] -> [Double] Source #
dusF :: (Functor f, Functor g) => (forall s. f (AD s TowerDouble) -> g (AD s TowerDouble)) -> f [Double] -> g [Double] Source #
dus0F :: (Functor f, Functor g) => (forall s. f (AD s TowerDouble) -> g (AD s TowerDouble)) -> f [Double] -> g [Double] Source #
Taylor Series (Tower)
taylor :: (forall s. AD s TowerDouble -> AD s TowerDouble) -> Double -> Double -> [Double] Source #
taylor0 :: (forall s. AD s TowerDouble -> AD s TowerDouble) -> Double -> Double -> [Double] Source #
Maclaurin Series (Tower)
maclaurin :: (forall s. AD s TowerDouble -> AD s TowerDouble) -> Double -> [Double] Source #
maclaurin0 :: (forall s. AD s TowerDouble -> AD s TowerDouble) -> Double -> [Double] Source #
Gradient Descent
conjugateGradientDescent :: Traversable f => (forall s. Chosen s => f (Or s (On (Forward ForwardDouble)) KahnDouble) -> Or s (On (Forward ForwardDouble)) KahnDouble) -> f Double -> [f Double] Source #
Perform a conjugate gradient descent using reverse mode automatic differentiation to compute the gradient, and using forward-on-forward mode for computing extrema.
>>>
let sq x = x * x
>>>
let rosenbrock [x,y] = sq (1 - x) + 100 * sq (y - sq x)
>>>
rosenbrock [0,0]
1>>>
rosenbrock (conjugateGradientDescent rosenbrock [0, 0] !! 5) < 0.1
True
conjugateGradientAscent :: Traversable f => (forall s. Chosen s => f (Or s (On (Forward ForwardDouble)) KahnDouble) -> Or s (On (Forward ForwardDouble)) KahnDouble) -> f Double -> [f Double] Source #
Perform a conjugate gradient ascent using reverse mode automatic differentiation to compute the gradient.
Working with towers
A Jet
is a tower of all (higher order) partial derivatives of a function
At each step, a
is wrapped in another layer worth of Jet
ff
.
a :- f a :- f (f a) :- f (f (f a)) :- ...
Instances
Foldable f => Foldable (Jet f) Source # | |
Defined in Numeric.AD.Jet fold :: Monoid m => Jet f m -> m # foldMap :: Monoid m => (a -> m) -> Jet f a -> m # foldMap' :: Monoid m => (a -> m) -> Jet f a -> m # foldr :: (a -> b -> b) -> b -> Jet f a -> b # foldr' :: (a -> b -> b) -> b -> Jet f a -> b # foldl :: (b -> a -> b) -> b -> Jet f a -> b # foldl' :: (b -> a -> b) -> b -> Jet f a -> b # foldr1 :: (a -> a -> a) -> Jet f a -> a # foldl1 :: (a -> a -> a) -> Jet f a -> a # elem :: Eq a => a -> Jet f a -> Bool # maximum :: Ord a => Jet f a -> a # minimum :: Ord a => Jet f a -> a # | |
Traversable f => Traversable (Jet f) Source # | |
Functor f => Functor (Jet f) Source # | |
(Functor f, Show (f Showable), Show a) => Show (Jet f a) Source # | |