Source code for numdifftools.core

# !/usr/bin/env python
"""numerical differentiation functions:

Derivative, Gradient, Jacobian, and Hessian

Author:      Per A. Brodtkorb
Created:     01.08.2008
Copyright:   (c) pab 2008
Licence:     New BSD
"""

from __future__ import absolute_import, division, print_function
from collections import namedtuple

import numpy as np
from numdifftools.extrapolation import Richardson, dea3  # @UnusedImport
from numdifftools.step_generators import MaxStepGenerator, MinStepGenerator
from numdifftools.limits import _Limit
from numdifftools.finite_difference import (LogRule,
                                            LogHessdiagRule,
                                            LogHessianRule,
                                            LogJacobianRule,
                                            )


__all__ = ('dea3', 'Derivative', 'Jacobian', 'Gradient', 'Hessian', 'Hessdiag',
           'MinStepGenerator', 'MaxStepGenerator', 'Richardson',
           'directionaldiff')
FD_RULES = {}
_SQRT_J = (1j + 1.0) / np.sqrt(2.0)  # = 1j**0.5


def _assert(cond, msg):
    if not cond:
        raise ValueError(msg)


_CMN_DOC = """
    Calculate %(derivative)s with finite difference approximation

    Parameters
    ----------
    fun : function
       function of one array fun(x, `*args`, `**kwds`)
    step : float, array-like or StepGenerator object, optional
        Defines the spacing used in the approximation.
        Default is MinStepGenerator(**step_options) if method in in ['complex', 'multicomplex'],
        otherwise
            MaxStepGenerator(**step_options)
        The results are extrapolated if the StepGenerator generate more than 3
        steps.
    method : {'central', 'complex', 'multicomplex', 'forward', 'backward'}
        defines the method used in the approximation%(extra_parameter)s
    richardson_terms: scalar integer, default 2.
        number of terms used in the Richardson extrapolation.
    full_output : bool, optional
        If `full_output` is False, only the derivative is returned.
        If `full_output` is True, then (der, r) is returned `der` is the
        derivative, and `r` is a Results object.
    **step_options:
        options to pass on to the XXXStepGenerator used.

    Methods
    -------
    __call__ : callable with the following parameters:
        x : array_like
            value at which function derivative is evaluated
        args : tuple
            Arguments for function `fun`.
        kwds : dict
            Keyword arguments for function `fun`.
    %(returns)s
    Notes
    -----
    Complex methods are usually the most accurate provided the function to
    differentiate is analytic. The complex-step methods also requires fewer
    steps than the other methods and can work very close to the support of
    a function.
    The complex-step derivative has truncation error O(steps**2) for `n=1` and
    O(steps**4) for `n` larger, so truncation error can be eliminated by
    choosing steps to be very small.
    Especially the first order complex-step derivative avoids the problem of
    round-off error with small steps because there is no subtraction. However,
    this method fails if fun(x) does not support complex numbers or involves
    non-analytic functions such as e.g.: abs, max, min.
    Central difference methods are almost as accurate and has no restriction on
    type of function. For this reason the 'central' method is the default
    method, but sometimes one can only allow evaluation in forward or backward
    direction.

    For all methods one should be careful in decreasing the step size too much
    due to round-off errors.
    %(extra_note)s
    References
    ----------
    Ridout, M.S. (2009) Statistical applications of the complex-step method
        of numerical differentiation. The American Statistician, 63, 66-74

    K.-L. Lai, J.L. Crassidis, Y. Cheng, J. Kim (2005), New complex step
        derivative approximations with application to second-order
        kalman filtering, AIAA Guidance, Navigation and Control Conference,
        San Francisco, California, August 2005, AIAA-2005-5944.

    Lyness, J. M., Moler, C. B. (1966). Vandermonde Systems and Numerical
                     Differentiation. *Numerische Mathematik*.

    Lyness, J. M., Moler, C. B. (1969). Generalized Romberg Methods for
                     Integrals of Derivatives. *Numerische Mathematik*.
    %(example)s
    %(see_also)s
    """


[docs]class Derivative(_Limit): __doc__ = _CMN_DOC % dict( derivative='n-th derivative', extra_parameter=""" order : int, optional defines the order of the error term in the Taylor approximation used. For 'central' and 'complex' methods, it must be an even number. n : int, optional Order of the derivative.""", extra_note=""" Higher order approximation methods will generally be more accurate, but may also suffer more from numerical problems. First order methods is usually not recommended. """, returns=""" Returns ------- der : ndarray array of derivatives """, example=""" Examples -------- >>> import numpy as np >>> import numdifftools as nd # 1'st derivative of exp(x), at x == 1 >>> fd = nd.Derivative(np.exp) >>> np.allclose(fd(1), 2.71828183) True >>> d2 = fd([1, 2]) >>> np.allclose(d2, [ 2.71828183, 7.3890561 ]) True >>> def f(x): ... return x**3 + x**2 >>> df = nd.Derivative(f) >>> np.allclose(df(1), 5) True >>> ddf = nd.Derivative(f, n=2) >>> np.allclose(ddf(1), 8) True """, see_also=""" See also -------- Gradient, Hessian """) _fd_rule = LogRule info = namedtuple('info', ['f_value', 'error_estimate', 'final_step', 'index'])
[docs] def __init__(self, fun, step=None, method='central', order=2, n=1, **options): self.richardson_terms = options.pop('richardson_terms', 2) self.full_output = options.pop('full_output', False) self.fun = fun self.fd_rule = self._fd_rule(n=n, method=method, order=order) super(Derivative, self).__init__(step=step, **options) self._set_derivative()
@property def n(self): """Order of the derivative.""" return self.fd_rule.n @n.setter def n(self, value): self.fd_rule.n = value self._set_derivative() @property def order(self): """Defines the order of the error term in the Taylor approximation used.""" return self.fd_rule.order @order.setter def order(self, order): self.fd_rule.order = order @property def method(self): """Defines the method used in the finite difference approximation.""" return self.fd_rule.method @method.setter def method(self, method): self.fd_rule.method = method @property def method_order(self): """Defines the leading order of the error term in the Richardson extrapolation method.""" return self.fd_rule.method_order def _step_generator(self, step, options): if hasattr(step, '__call__'): return step if step is None and self.method not in ['complex', 'multicomplex']: return MaxStepGenerator(**options) if 'step_nom' not in options and step is not None: options['step_nom'] = 1.0 return MinStepGenerator(base_step=step, **options) def _set_derivative(self): if self.n == 0: self._derivative = self._derivative_zero_order else: self._derivative = self._derivative_nonzero_order def _derivative_zero_order(self, x_i, args, kwds): steps = [np.zeros_like(x_i)] results = [self.fun(x_i, *args, **kwds)] self.set_richardson_rule(2, 0) return self._vstack(results, steps), results[0] def _derivative_nonzero_order(self, x_i, args, kwds): diff, f = self._get_functions(args, kwds) steps, step_ratio = self._get_steps(x_i) fxi = self._eval_first(f, x_i) results = [diff(f, fxi, x_i, h) for h in steps] self.set_richardson_rule(step_ratio, self.richardson_terms) return self.fd_rule.apply(results, steps, step_ratio), fxi
[docs] def set_richardson_rule(self, step_ratio, num_terms=2): """Set Richardson exptrapolation options""" order = self.method_order step = self.fd_rule.richardson_step self.richardson = Richardson(step_ratio=step_ratio, step=step, order=order, num_terms=num_terms)
def _get_functions(self, args, kwds): fun = self.fun def export_fun(x): return fun(x, *args, **kwds) return self.fd_rule.diff, export_fun def _get_steps(self, x_i): method, n, order = self.method, self.n, self.method_order # pylint: disable=no-member step_gen = self.step.step_generator_function(x_i, method, n, order) return list(step_gen()), step_gen.step_ratio def _raise_error_if_any_is_complex(self, x, f_x): msg = ('The {} step derivative method does only work on a real valued analytic ' 'function of a real variable!'.format(self.method)) _assert(not np.any(np.iscomplex(x)), msg + ' But a complex variable was given!') _assert(not np.any(np.iscomplex(f_x)), msg + ' But the function given is complex valued!') def _eval_first(self, f, x): if self.method in ['complex', 'multicomplex']: f_x = f(x) self._raise_error_if_any_is_complex(x, f_x) return f_x if self.fd_rule.eval_first_condition or self.full_output: return f(x) return 0.0 def __call__(self, x, *args, **kwds): x_i = np.asarray(x) with np.errstate(divide='ignore', invalid='ignore'): results, f_xi = self._derivative(x_i, args, kwds) derivative, info = self._extrapolate(*results) if self.full_output: return derivative, self.info(f_xi, *info) return derivative
[docs]def directionaldiff(f, x0, vec, **options): """ Return directional derivative of a function of n variables Parameters ---------- f: function analytical function to differentiate. x0: array vector location at which to differentiate 'f'. If x0 is an nXm array, then 'f' is assumed to be a function of n*m variables. vec: array vector defining the line along which to take the derivative. It should be the same size as x0, but need not be a vector of unit length. **options: optional arguments to pass on to Derivative. Returns ------- dder: scalar estimate of the first derivative of 'f' in the specified direction. Examples -------- At the global minimizer (1,1) of the Rosenbrock function, compute the directional derivative in the direction [1 2] >>> import numpy as np >>> import numdifftools as nd >>> vec = np.r_[1, 2] >>> rosen = lambda x: (1-x[0])**2 + 105*(x[1]-x[0]**2)**2 >>> dd, info = nd.directionaldiff(rosen, [1, 1], vec, full_output=True) >>> np.allclose(dd, 0) True >>> np.abs(info.error_estimate)<1e-14 True See also -------- Derivative, Gradient """ x0 = np.asarray(x0) vec = np.asarray(vec) _assert(x0.size == vec.size, 'vec and x0 must be the same shapes') vec = np.reshape(vec / np.linalg.norm(vec.ravel()), x0.shape) return Derivative(lambda t: f(x0 + t * vec), **options)(0)
[docs]class Jacobian(Derivative): __doc__ = _CMN_DOC % dict( derivative='Jacobian', extra_parameter=""" order : int, optional defines the order of the error term in the Taylor approximation used. For 'central' and 'complex' methods, it must be an even number.""", returns=""" Returns ------- jacob : array Jacobian """, extra_note=""" Higher order approximation methods will generally be more accurate, but may also suffer more from numerical problems. First order methods is usually not recommended. If fun returns a 1d array, it returns a Jacobian. If a 2d array is returned by fun (e.g., with a value for each observation), it returns a 3d array with the Jacobian of each observation with shape xk x nobs x xk. I.e., the Jacobian of the first observation would be [:, 0, :] """, example=""" Examples -------- >>> import numpy as np >>> import numdifftools as nd #(nonlinear least squares) >>> xdata = np.arange(0,1,0.1) >>> ydata = 1+2*np.exp(0.75*xdata) >>> fun = lambda c: (c[0]+c[1]*np.exp(c[2]*xdata) - ydata)**2 >>> np.allclose(fun([1, 2, 0.75]).shape, (10,)) True >>> jfun = nd.Jacobian(fun) >>> val = jfun([1, 2, 0.75]) >>> np.allclose(val, np.zeros((10,3))) True >>> fun2 = lambda x : x[0]*x[1]*x[2]**2 >>> jfun2 = nd.Jacobian(fun2) >>> np.allclose(jfun2([1.,2.,3.]), [[18., 9., 12.]]) True >>> fun3 = lambda x : np.vstack((x[0]*x[1]*x[2]**2, x[0]*x[1]*x[2])) >>> jfun3 = nd.Jacobian(fun3) >>> np.allclose(jfun3([1., 2., 3.]), [[[18.], [9.], [12.]], [[6.], [3.], [2.]]]) True >>> np.allclose(jfun3([4., 5., 6.]), [[[180.], [144.], [240.]], [[30.], [24.], [20.]]]) True >>> np.allclose(jfun3(np.array([[1.,2.,3.]]).T), [[[18.], [9.], [12.]], [[6.], [3.], [2.]]]) True """, see_also=""" See also -------- Derivative, Hessian, Gradient """) # n = property(fget=lambda cls: 1, # fset=lambda cls, val: cls._set_derivative()) # @UnusedVariable _fd_rule = LogJacobianRule @staticmethod def _expand_steps(steps, x_i, fxi): if np.size(fxi) == 1: return steps n = len(x_i) one = np.ones_like(fxi) return [np.array([one * h[i] for i in range(n)]) for h in steps] def _derivative_nonzero_order(self, x_i, args, kwds): diff, f = self._get_functions(args, kwds) steps, step_ratio = self._get_steps(x_i) fxi = f(x_i) results = [diff(f, fxi, x_i, h) for h in steps] steps2 = self._expand_steps(steps, x_i, fxi) self.set_richardson_rule(step_ratio, self.richardson_terms) return self.fd_rule.apply(results, steps2, step_ratio), fxi def __call__(self, x, *args, **kwds): return super(Jacobian, self).__call__(np.atleast_1d(x), *args, **kwds)
[docs]class Gradient(Jacobian): __doc__ = _CMN_DOC % dict( derivative='Gradient', extra_parameter=""" order : int, optional defines the order of the error term in the Taylor approximation used. For 'central' and 'complex' methods, it must be an even number.""", returns=""" Returns ------- grad : array gradient """, extra_note=""" Higher order approximation methods will generally be more accurate, but may also suffer more from numerical problems. First order methods is usually not recommended. If x0 is an n x m array, then fun is assumed to be a function of n * m variables. """, example=""" Examples -------- >>> import numpy as np >>> import numdifftools as nd >>> fun = lambda x: np.sum(x**2) >>> dfun = nd.Gradient(fun) >>> np.allclose(dfun([1,2,3]), [ 2., 4., 6.]) True # At [x,y] = [1,1], compute the numerical gradient # of the function sin(x-y) + y*exp(x) >>> sin = np.sin; exp = np.exp >>> x, y = 1, 1 >>> z = lambda xy: sin(xy[0]-xy[1]) + xy[1]*exp(xy[0]) >>> dz = nd.Gradient(z) >>> dz_dx, dz_dy = dz([x, y]) >>> np.allclose([dz_dx, dz_dy], ... [ 3.7182818284590686, 1.7182818284590162]) True # At the global minimizer (1,1) of the Rosenbrock function, # compute the gradient. It should be essentially zero. >>> rosen = lambda x : (1-x[0])**2 + 105.*(x[1]-x[0]**2)**2 >>> grad_rosen = nd.Gradient(rosen) >>> df_dx, df_dy = grad_rosen([x, y]) >>> np.allclose([df_dx, df_dy], [0, 0]) True""", see_also=""" See also -------- Derivative, Hessian, Jacobian """) def __call__(self, x, *args, **kwds): result = super(Gradient, self).__call__(np.atleast_1d(x).ravel(), *args, **kwds) if self.full_output: return result[0].squeeze(), result[1] return result.squeeze()
[docs]class Hessdiag(Derivative): __doc__ = _CMN_DOC % dict( derivative='Hessian diagonal', extra_parameter="""order : int, optional defines the order of the error term in the Taylor approximation used. For 'central' and 'complex' methods, it must be an even number.""", returns=""" Returns ------- hessdiag : array hessian diagonal """, extra_note=""" Higher order approximation methods will generally be more accurate, but may also suffer more from numerical problems. First order methods is usually not recommended. """, example=""" Examples -------- >>> import numpy as np >>> import numdifftools as nd >>> fun = lambda x : x[0] + x[1]**2 + x[2]**3 >>> Hfun = nd.Hessdiag(fun, full_output=True) >>> hd, info = Hfun([1,2,3]) >>> np.allclose(hd, [0., 2., 18.]) True >>> np.all(info.error_estimate < 1e-11) True """, see_also=""" See also -------- Derivative, Hessian, Jacobian, Gradient """) _fd_rule = LogHessdiagRule
[docs] def __init__(self, f, step=None, method='central', order=2, **options): options.pop('n', None) super(Hessdiag, self).__init__(f, step=step, method=method, n=2, order=order, **options)
def __call__(self, x, *args, **kwds): return super(Hessdiag, self).__call__(np.atleast_1d(x), *args, **kwds)
[docs]class Hessian(Hessdiag): __doc__ = _CMN_DOC % dict( derivative='Hessian', extra_parameter="", returns=r""" Returns ------- hess : ndarray array of partial second derivatives, Hessian """, extra_note=r""" Computes the Hessian according to method as: 'forward' :eq:`7`, 'central' :eq:`9` and 'complex' :eq:`10`: .. math:: \quad ((f(x + d_j e_j + d_k e_k) + f(x) - f(x + d_j e_j) - f(x + d_k e_k))) / (d_j d_k) :label: 7 .. math:: \quad ((f(x + d_j e_j + d_k e_k) - f(x + d_j e_j - d_k e_k)) - (f(x - d_j e_j + d_k e_k) - f(x - d_j e_j - d_k e_k)) / (4 d_j d_k) :label: 9 .. math:: imag(f(x + i d_j e_j + d_k e_k) - f(x + i d_j e_j - d_k e_k)) / (2 d_j d_k) :label: 10 where :math:`e_j` is a vector with element :math:`j` is one and the rest are zero and :math:`d_j` is a scalar spacing :math:`steps_j`. """, example=""" Examples -------- >>> import numpy as np >>> import numdifftools as nd # Rosenbrock function, minimized at [1,1] >>> rosen = lambda x : (1.-x[0])**2 + 105*(x[1]-x[0]**2)**2 >>> Hfun = nd.Hessian(rosen) >>> h = Hfun([1, 1]) >>> h array([[ 842., -420.], [-420., 210.]]) # cos(x-y), at (0,0) >>> cos = np.cos >>> fun = lambda xy : cos(xy[0]-xy[1]) >>> Hfun2 = nd.Hessian(fun) >>> h2 = Hfun2([0, 0]) >>> h2 array([[-1., 1.], [ 1., -1.]])""", see_also=""" See also -------- Derivative, Hessian """) _fd_rule = LogHessianRule
[docs] def __init__(self, f, step=None, method='central', order=None, **options): if order is None: order = dict(backward=1, forward=1).get(method, 2) super(Hessian, self).__init__(f, step=step, method=method, order=order, **options)
if __name__ == "__main__": from numdifftools.testing import test_docstrings test_docstrings(__file__)