"""
Numdifftools.nd_algopy
======================
This module provide an easy to use interface to derivatives calculated with
AlgoPy. Algopy stands for Algorithmic Differentiation in Python.
The purpose of AlgoPy is the evaluation of higher-order derivatives in the
forward and reverse mode of Algorithmic Differentiation (AD) of functions that
are implemented as Python programs. Particular focus are functions that contain
numerical linear algebra functions as they often appear in statistically
motivated functions. The intended use of AlgoPy is for easy prototyping at
reasonable execution speeds. More precisely, for a typical program a
directional derivative takes order 10 times as much time as time as the
function evaluation. This is approximately also true for the gradient.
Algoritmic differentiation
==========================
Algorithmic differentiation (AD) is a set of techniques to numerically
evaluate the derivative of a function specified by a computer program. AD
exploits the fact that every computer program, no matter how complicated,
executes a sequence of elementary arithmetic operations (addition,
subtraction, multiplication, division, etc.) and elementary functions
(exp, log, sin, cos, etc.). By applying the chain rule repeatedly to these
operations, derivatives of arbitrary order can be computed automatically,
accurately to working precision, and using at most a small constant factor
more arithmetic operations than the original program.
Algorithmic differentiation is not:
Symbolic differentiation, nor Numerical differentiation (the method of
finite differences). These classical methods run into problems:
symbolic differentiation leads to inefficient code (unless carefully done)
and faces the difficulty of converting a computer program into a single
expression, while numerical differentiation can introduce round-off errors
in the discretization process and cancellation. Both classical methods have
problems with calculating higher derivatives, where the complexity and
errors increase. Finally, both classical methods are slow at computing the
partial derivatives of a function with respect to many inputs, as is needed
for gradient-based optimization algorithms. Algoritmic differentiation
solves all of these problems.
References
----------
Sebastian F. Walter and Lutz Lehmann 2013,
"Algorithmic differentiation in Python with AlgoPy",
in Journal of Computational Science, vol 4, no 5, pp 334 - 344,
http://www.sciencedirect.com/science/article/pii/S1877750311001013
https://en.wikipedia.org/wiki/Automatic_differentiation
https://pythonhosted.org/algopy/index.html
"""
from __future__ import absolute_import, division
from collections import namedtuple
from scipy import special
import numpy as np
try:
import algopy
from algopy import UTPM
except ImportError:
UTPM = algopy = None
EPS = np.finfo(float).eps
_cmn_doc = """
Calculate %(derivative)s with Algorithmic Differentiation method
Parameters
----------
fun: function
function of one array fun(x, `*args`, `**kwds`)%(extra_parameter)s
method: string, optional {'forward', 'reverse'}
defines method used in the approximation
Methods
-------
__call__: callable with the following parameters:
x: array_like
value at which function derivative is evaluated
args: tuple
Arguments for function `fun`.
kwds: dict
Keyword arguments for function `fun`.
%(returns)s
Notes
-----
Algorithmic differentiation is a set of techniques to numerically
evaluate the derivative of a function specified by a computer program.
AD exploits the fact that every computer program, no matter how
complicated, executes a sequence of elementary arithmetic operations
(addition, subtraction, multiplication, division, etc.) and elementary
functions (exp, log, sin, cos, etc.). By applying the chain rule
repeatedly to these operations, derivatives of arbitrary order can be
computed automatically, accurately to working precision, and using at
most a small constant factor more arithmetic operations than the original
program.
%(extra_note)s
References
----------
Sebastian F. Walter and Lutz Lehmann 2013,
"Algorithmic differentiation in Python with AlgoPy",
in Journal of Computational Science, vol 4, no 5, pp 334 - 344,
http://www.sciencedirect.com/science/article/pii/S1877750311001013
https://en.wikipedia.org/wiki/Automatic_differentiation
%(example)s
%(see_also)s
"""
class _Derivative(object):
"""Base class"""
info = namedtuple('info', ['error_estimate', 'final_step', 'index'])
def __init__(self, fun, n=1, method='forward', full_output=False):
self.fun = fun
self.method = method
self.n = n
self.full_output = full_output
@property
def fun(self):
return self._fun
@fun.setter
def fun(self, fun):
self._fun = fun
self._computational_graph = None
def computational_graph(self, x, *args, **kwds):
if self._computational_graph is None:
# STEP 1: trace the function evaluation
cg = algopy.CGraph()
tmp = algopy.Function(x)
y = self.fun(tmp, *args, **kwds)
cg.trace_off()
cg.independentFunctionList = [tmp]
cg.dependentFunctionList = [y]
self._computational_graph = cg
return self._computational_graph
def _get_function(self):
if self.n == 0:
return self.fun
name = '_' + dict(backward='reverse').get(self.method, self.method)
return getattr(self, name)
def __call__(self, x, *args, **kwds):
fun = self._get_function()
x0 = np.asarray(x, dtype=float)
df = fun(x0, *args, **kwds)
if self.full_output:
return df, self.info(np.maximum(10 * EPS * np.abs(df), EPS), EPS, 0)
return df
[docs]class Derivative(_Derivative):
__doc__ = _cmn_doc % dict(
derivative='n-th derivative',
extra_parameter="""
n: int, optional
Order of the derivative.""",
extra_note="", returns="""
Returns
-------
der: ndarray
array of derivatives
""", example="""
Examples
--------
# 1'st and 2'nd derivative of exp(x), at x == 1
>>> import numpy as np
>>> import numdifftools.nd_algopy as nda
>>> fd = nda.Derivative(np.exp) # 1'st derivative
>>> np.allclose(fd(1), 2.718281828459045)
True
>>> fd5 = nda.Derivative(np.exp, n=5) # 5'th derivative
>>> np.allclose(fd5(1), 2.718281828459045)
True
# 1'st derivative of x^3+x^4, at x = [0,1]
>>> fun = lambda x: x**3 + x**4
>>> fd3 = nda.Derivative(fun)
>>> np.allclose(fd3([0,1]), [ 0., 7.])
True
""", see_also="""
See also
--------
Gradient,
Hessdiag,
Hessian,
Jacobian
""")
def _forward(self, x, *args, **kwds):
x0 = np.asarray(x)
shape = x0.shape
P = 1
x = UTPM(np.zeros((self.n + 1, P) + shape))
x.data[0, 0] = x0
x.data[1, 0] = 1
z = self.fun(x, *args, **kwds)
y = UTPM.as_utpm(z)
return y.data[self.n, 0] * special.factorial(self.n)
def _reverse(self, x, *args, **kwds):
if self.n != 1:
raise NotImplementedError('Derivative reverse not implemented'
' for n>1')
c_graph = self.computational_graph(np.asarray(1), *args, **kwds)
shape0 = x.shape
y = np.array([c_graph.gradient(xi) for xi in x.ravel()])
return y.reshape(shape0)
[docs]class Gradient(_Derivative):
__doc__ = _cmn_doc % dict(
derivative='Gradient',
extra_parameter="",
extra_note="", returns="""
Returns
-------
grad: array
gradient
""", example="""
Examples
--------
>>> import numpy as np
>>> import numdifftools.nd_algopy as nda
>>> fun = lambda x: np.sum(x**2)
>>> df = nda.Gradient(fun, method='reverse')
>>> np.allclose(df([1,2,3]), [ 2., 4., 6.])
True
#At [x,y] = [1,1], compute the numerical gradient
#of the function sin(x-y) + y*exp(x)
>>> sin = np.sin; exp = np.exp
>>> z = lambda xy: sin(xy[0]-xy[1]) + xy[1]*exp(xy[0])
>>> dz = nda.Gradient(z)
>>> grad2 = dz([1, 1])
>>> np.allclose(grad2, [ 3.71828183, 1.71828183])
True
#At the global minimizer (1,1) of the Rosenbrock function,
#compute the gradient. It should be essentially zero.
>>> rosen = lambda x : (1-x[0])**2 + 105.*(x[1]-x[0]**2)**2
>>> rd = nda.Gradient(rosen)
>>> grad3 = rd([1,1])
>>> np.allclose(grad3, [ 0., 0.])
True
""", see_also="""
See also
--------
Derivative
Jacobian,
Hessdiag,
Hessian,
""")
def _forward(self, x, *args, **kwds):
# forward mode without building the computational graph
tmp = UTPM.init_jacobian(x)
y = self.fun(tmp, *args, **kwds)
return UTPM.extract_jacobian(y)
def _reverse(self, x, *args, **kwds):
c_graph = self.computational_graph(x, *args, **kwds)
return c_graph.gradient(x)
[docs]class Jacobian(Gradient):
__doc__ = _cmn_doc % dict(
derivative='Jacobian',
extra_parameter="",
extra_note="", returns="""
Returns
-------
jacob: array
Jacobian
""", example="""
Examples
--------
>>> import numpy as np
>>> import numdifftools.nd_algopy as nda
#(nonlinear least squares)
>>> xdata = np.arange(0,1,0.1)
>>> ydata = 1+2*np.exp(0.75*xdata)
>>> fun = lambda c: (c[0]+c[1]*np.exp(c[2]*xdata) - ydata)**2
Jfun = nda.Jacobian(fun) # Todo: This does not work
Jfun([1,2,0.75]).T # should be numerically zero
array([[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
>>> Jfun2 = nda.Jacobian(fun, method='reverse')
>>> res = Jfun2([1,2,0.75]).T # should be numerically zero
>>> np.allclose(res,
... [[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
... [ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
... [ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
True
>>> f2 = lambda x : x[0]*x[1]*x[2]**2
>>> Jfun2 = nda.Jacobian(f2)
>>> np.allclose(Jfun2([1., 2., 3.]), [[ 18., 9., 12.]])
True
>>> Jfun21 = nda.Jacobian(f2, method='reverse')
>>> np.allclose(Jfun21([1., 2., 3.]), [[ 18., 9., 12.]])
True
>>> def fun3(x):
... n = int(np.prod(np.shape(x[0])))
... out = nda.algopy.zeros((2, n), dtype=x)
... out[0] = x[0]*x[1]*x[2]**2
... out[1] = x[0]*x[1]*x[2]
... return out
>>> Jfun3 = nda.Jacobian(fun3)
>>> np.allclose(Jfun3([1., 2., 3.]), [[[18., 9., 12.]], [[6., 3., 2.]]])
True
>>> np.allclose(Jfun3([4., 5., 6.]), [[[180., 144., 240.]],
... [[30., 24., 20.]]])
True
>>> np.allclose(Jfun3(np.array([[1.,2.,3.], [4., 5., 6.]]).T),
... [[[18., 0., 9., 0., 12., 0.],
... [0., 180., 0., 144., 0., 240.]],
... [[6., 0., 3., 0., 2., 0.],
... [0., 30., 0., 24., 0., 20.]]])
True
""", see_also="""
See also
--------
Derivative
Gradient,
Hessdiag,
Hessian,
""")
def _forward(self, x, *args, **kwds):
return np.atleast_2d(super(Jacobian, self)._forward(x, *args, **kwds))
def _reverse(self, x, *args, **kwds):
x = np.atleast_1d(x)
c_graph = self.computational_graph(x, *args, **kwds)
return c_graph.jacobian(x)
[docs]class Hessian(_Derivative):
__doc__ = _cmn_doc % dict(
derivative='Hessian',
extra_parameter="",
returns="""
Returns
-------
hess : ndarray
array of partial second derivatives, Hessian
""", extra_note='', example="""
Examples
--------
>>> import numpy as np
>>> import numdifftools.nd_algopy as nda
# Rosenbrock function, minimized at [1,1]
>>> rosen = lambda x : (1.-x[0])**2 + 105*(x[1]-x[0]**2)**2
>>> Hf = nda.Hessian(rosen)
>>> h = Hf([1, 1]) # h =[ 842 -420; -420, 210];
>>> np.allclose(h, [[ 842., -420.],
... [-420., 210.]])
True
# cos(x-y), at (0,0)
>>> cos = np.cos
>>> fun = lambda xy : cos(xy[0]-xy[1])
>>> Hfun2 = nda.Hessian(fun)
>>> h2 = Hfun2([0, 0]) # h2 = [-1 1; 1 -1]
>>> np.allclose(h2, [[-1., 1.],
... [ 1., -1.]])
True
>>> Hfun3 = nda.Hessian(fun, method='reverse')
>>> h3 = Hfun3([0, 0]) # h2 = [-1, 1; 1, -1];
>>> np.allclose(h3, [[-1., 1.],
... [ 1., -1.]])
True
""", see_also="""
See also
--------
Derivative
Gradient,
Jacobian,
Hessdiag,
""")
[docs] def __init__(self, f, method='forward', full_output=False):
super(Hessian, self).__init__(f, n=2, method=method,
full_output=full_output)
def _forward(self, x, *args, **kwds):
x = np.atleast_1d(x)
tmp = UTPM.init_hessian(x)
y = self.fun(tmp, *args, **kwds)
return UTPM.extract_hessian(len(x), y)
def _reverse(self, x, *args, **kwds):
x = np.atleast_1d(np.asarray(x, dtype=float))
c_graph = self.computational_graph(x, *args, **kwds)
return c_graph.hessian(x)
[docs]class Hessdiag(Hessian):
__doc__ = _cmn_doc % dict(
derivative='Hessian diagonal',
extra_parameter="",
returns="""
Returns
-------
hessdiag : ndarray
Hessian diagonal array of partial second order derivatives.
""", extra_note='', example="""
Examples
--------
>>> import numpy as np
>>> import numdifftools.nd_algopy as nda
# Rosenbrock function, minimized at [1,1]
>>> rosen = lambda x : (1.-x[0])**2 + 105*(x[1]-x[0]**2)**2
>>> Hfun = nda.Hessdiag(rosen)
>>> h = Hfun([1, 1]) # h =[ 842, 210]
>>> np.allclose(h, [ 842., 210.])
True
# cos(x-y), at (0,0)
>>> cos = np.cos
>>> fun = lambda xy : cos(xy[0]-xy[1])
>>> Hfun2 = nda.Hessdiag(fun)
>>> h2 = Hfun2([0, 0]) # h2 = [-1, -1]
>>> np.allclose(h2, [-1., -1.])
True
>>> Hfun3 = nda.Hessdiag(fun, method='reverse')
>>> h3 = Hfun3([0, 0]) # h2 = [-1, -1];
>>> np.allclose(h3, [-1., -1.])
True
""", see_also="""
See also
--------
Derivative
Gradient,
Jacobian,
Hessian,
""")
def _forward(self, x, *args, **kwds):
d, n = 2 + 1, x.size
p = n
y = UTPM(np.zeros((d, p, n)))
y.data[0, :] = x.ravel()
y.data[1, :] = np.eye(n)
z0 = self.fun(y, *args, **kwds)
z = UTPM.as_utpm(z0)
H = z.data[2, ...] * 2
return H
def _reverse(self, x, *args, **kwds):
return np.diag(super(Hessdiag, self)._reverse(x, *args, **kwds))
[docs]def directionaldiff(f, x0, vec, **options):
"""
Return directional derivative of a function of n variables
Parameters
----------
fun: callable
analytical function to differentiate.
x0: array
vector location at which to differentiate fun. If x0 is an nxm array,
then fun is assumed to be a function of n*m variables.
vec: array
vector defining the line along which to take the derivative. It should
be the same size as x0, but need not be a vector of unit length.
**options:
optional arguments to pass on to Derivative.
Returns
-------
dder: scalar
estimate of the first derivative of fun in the specified direction.
Examples
--------
At the global minimizer (1,1) of the Rosenbrock function,
compute the directional derivative in the direction [1 2]
>>> import numpy as np
>>> import numdifftools.nd_algopy as nda
>>> vec = np.r_[1, 2]
>>> rosen = lambda x: (1-x[0])**2 + 105*(x[1]-x[0]**2)**2
>>> dd = nda.directionaldiff(rosen, [1, 1], vec)
>>> np.allclose(dd, 0)
True
See also
--------
Derivative,
Gradient
"""
x0 = np.asarray(x0)
vec = np.asarray(vec)
if x0.size != vec.size:
raise ValueError('vec and x0 must be the same shapes')
vec = np.reshape(vec / np.linalg.norm(vec.ravel()), x0.shape)
return Derivative(lambda t: f(x0 + t * vec), **options)(0)
[docs]class Taylor(object):
"""
Return Taylor coefficients of function using algorithmic differentiation
Parameters
----------
fun: callable
function to differentiate
z0: real or complex array
at which to evaluate the derivatives
n: scalar integer, default 1
Number of taylor coefficents to compute.
Returns
-------
coefs: ndarray
array of taylor coefficents
Examples
--------
Compute the first 6 taylor coefficients 1 + 2*z + 3*z**2 expanded round z0 = 0:
>>> import numpy as np
>>> import numdifftools.nd_algopy as nda
>>> c = nda.Taylor(lambda x: 1+2*x+3*x**2, n=6)(z0=0)
>>> np.allclose(c, [1, 2, 3, 0, 0, 0])
True
"""
def __init__(self, fun, n=1):
self.fun = fun
self.n = n
def __call__(self, z0=0):
z = np.atleast_1d(z0).ravel()
x = UTPM(np.zeros((self.n, 1, z.size), dtype=z.dtype))
x.data[0, 0, :] = z
x.data[1, 0, :] = 1
y = self.fun(x)
coefs = np.squeeze(y.data)
return coefs
if __name__ == '__main__':
from numdifftools.testing import test_docstrings
test_docstrings()