Source code for numdifftools.core

# !/usr/bin/env python
"""numerical differentiation functions:

Derivative, Gradient, Jacobian, and Hessian

Author:      Per A. Brodtkorb
Created:     01.08.2008
Copyright:   (c) pab 2008
Licence:     New BSD
"""

from __future__ import absolute_import, division, print_function
from collections import namedtuple

import numpy as np
from numdifftools.extrapolation import Richardson, dea3  # @UnusedImport
from numdifftools.step_generators import MaxStepGenerator, MinStepGenerator
from numdifftools.limits import _Limit
from numdifftools.finite_difference import (LogRule,
                                            LogHessdiagRule,
                                            LogHessianRule,
                                            LogJacobianRule,
                                            )


__all__ = ('dea3', 'Derivative', 'Jacobian', 'Gradient', 'Hessian', 'Hessdiag',
           'MinStepGenerator', 'MaxStepGenerator', 'Richardson',
           'directionaldiff')
FD_RULES = {}
_SQRT_J = (1j + 1.0) / np.sqrt(2.0)  # = 1j**0.5


def _assert(cond, msg):
    if not cond:
        raise ValueError(msg)


_CMN_DOC = """
    Calculate %(derivative)s with finite difference approximation

    Parameters
    ----------
    fun : function
       function of one array fun(x, `*args`, `**kwds`)
    step : float, array-like or StepGenerator object, optional
        Defines the spacing used in the approximation.
        Default is MinStepGenerator(**step_options) if method in in ['complex', 'multicomplex'],
        otherwise
            MaxStepGenerator(**step_options)
        The results are extrapolated if the StepGenerator generate more than 3
        steps.
    method : {'central', 'complex', 'multicomplex', 'forward', 'backward'}
        defines the method used in the approximation%(extra_parameter)s
    richardson_terms: scalar integer, default 2.
        number of terms used in the Richardson extrapolation.
    full_output : bool, optional
        If `full_output` is False, only the derivative is returned.
        If `full_output` is True, then (der, r) is returned `der` is the
        derivative, and `r` is a Results object.
    **step_options:
        options to pass on to the XXXStepGenerator used.

    Methods
    -------
    __call__ : callable with the following parameters:
        x : array_like
            value at which function derivative is evaluated
        args : tuple
            Arguments for function `fun`.
        kwds : dict
            Keyword arguments for function `fun`.
    %(returns)s
    Notes
    -----
    Complex methods are usually the most accurate provided the function to
    differentiate is analytic. The complex-step methods also requires fewer
    steps than the other methods and can work very close to the support of
    a function.
    The complex-step derivative has truncation error O(steps**2) for `n=1` and
    O(steps**4) for `n` larger, so truncation error can be eliminated by
    choosing steps to be very small.
    Especially the first order complex-step derivative avoids the problem of
    round-off error with small steps because there is no subtraction. However,
    this method fails if fun(x) does not support complex numbers or involves
    non-analytic functions such as e.g.: abs, max, min.
    Central difference methods are almost as accurate and has no restriction on
    type of function. For this reason the 'central' method is the default
    method, but sometimes one can only allow evaluation in forward or backward
    direction.

    For all methods one should be careful in decreasing the step size too much
    due to round-off errors.
    %(extra_note)s
    References
    ----------
    Ridout, M.S. (2009) Statistical applications of the complex-step method
        of numerical differentiation. The American Statistician, 63, 66-74

    K.-L. Lai, J.L. Crassidis, Y. Cheng, J. Kim (2005), New complex step
        derivative approximations with application to second-order
        kalman filtering, AIAA Guidance, Navigation and Control Conference,
        San Francisco, California, August 2005, AIAA-2005-5944.

    Lyness, J. M., Moler, C. B. (1966). Vandermonde Systems and Numerical
                     Differentiation. *Numerische Mathematik*.

    Lyness, J. M., Moler, C. B. (1969). Generalized Romberg Methods for
                     Integrals of Derivatives. *Numerische Mathematik*.
    %(example)s
    %(see_also)s
    """


[docs]class Derivative(_Limit):

    __doc__ = _CMN_DOC % dict(
        derivative='n-th derivative',
        extra_parameter="""
    order : int, optional
        defines the order of the error term in the Taylor approximation used.
        For 'central' and 'complex' methods, it must be an even number.
    n : int, optional
        Order of the derivative.""",
        extra_note="""
    Higher order approximation methods will generally be more accurate, but may
    also suffer more from numerical problems. First order methods is usually
    not recommended.
    """, returns="""
    Returns
    -------
    der : ndarray
       array of derivatives
    """, example="""
    Examples
    --------
    >>> import numpy as np
    >>> import numdifftools as nd

    # 1'st derivative of exp(x), at x == 1

    >>> fd = nd.Derivative(np.exp)
    >>> np.allclose(fd(1), 2.71828183)
    True

    >>> d2 = fd([1, 2])
    >>> np.allclose(d2, [ 2.71828183,  7.3890561 ])
    True

    >>> def f(x):
    ...     return x**3 + x**2

    >>> df = nd.Derivative(f)
    >>> np.allclose(df(1), 5)
    True
    >>> ddf = nd.Derivative(f, n=2)
    >>> np.allclose(ddf(1), 8)
    True
    """, see_also="""
    See also
    --------
    Gradient,
    Hessian
    """)

    _fd_rule = LogRule
    info = namedtuple('info', ['f_value', 'error_estimate', 'final_step', 'index'])

[docs]    def __init__(self, fun, step=None, method='central', order=2, n=1, **options):
        self.richardson_terms = options.pop('richardson_terms', 2)
        self.full_output = options.pop('full_output', False)

        self.fun = fun

        self.fd_rule = self._fd_rule(n=n, method=method, order=order)

        super(Derivative, self).__init__(step=step,  **options)
        self._set_derivative()

    @property
    def n(self):
        """Order of the derivative."""
        return self.fd_rule.n

    @n.setter
    def n(self, value):
        self.fd_rule.n = value
        self._set_derivative()

    @property
    def order(self):
        """Defines the order of the error term in the Taylor approximation used."""
        return self.fd_rule.order

    @order.setter
    def order(self, order):
        self.fd_rule.order = order

    @property
    def method(self):
        """Defines the method used in the finite difference approximation."""
        return self.fd_rule.method

    @method.setter
    def method(self, method):
        self.fd_rule.method = method

    @property
    def method_order(self):
        """Defines the leading order of the error term in the Richardson extrapolation method."""
        return self.fd_rule.method_order

    def _step_generator(self, step, options):
        if hasattr(step, '__call__'):
            return step

        if step is None and self.method not in ['complex', 'multicomplex']:
            return MaxStepGenerator(**options)
        if 'step_nom' not in options and step is not None:
            options['step_nom'] = 1.0
        return MinStepGenerator(base_step=step, **options)

    def _set_derivative(self):
        if self.n == 0:
            self._derivative = self._derivative_zero_order
        else:
            self._derivative = self._derivative_nonzero_order

    def _derivative_zero_order(self, x_i, args, kwds):
        steps = [np.zeros_like(x_i)]
        results = [self.fun(x_i, *args, **kwds)]
        self.set_richardson_rule(2, 0)
        return self._vstack(results, steps), results[0]

    def _derivative_nonzero_order(self, x_i, args, kwds):
        diff, f = self._get_functions(args, kwds)
        steps, step_ratio = self._get_steps(x_i)
        fxi = self._eval_first(f, x_i)
        results = [diff(f, fxi, x_i, h) for h in steps]

        self.set_richardson_rule(step_ratio, self.richardson_terms)

        return self.fd_rule.apply(results, steps, step_ratio), fxi

[docs]    def set_richardson_rule(self, step_ratio, num_terms=2):
        """Set Richardson exptrapolation options"""
        order = self.method_order
        step = self.fd_rule.richardson_step
        self.richardson = Richardson(step_ratio=step_ratio,
                                     step=step, order=order,
                                     num_terms=num_terms)

    def _get_functions(self, args, kwds):

        fun = self.fun

        def export_fun(x):
            return fun(x, *args, **kwds)

        return self.fd_rule.diff, export_fun

    def _get_steps(self, x_i):
        method, n, order = self.method, self.n, self.method_order
        # pylint: disable=no-member
        step_gen = self.step.step_generator_function(x_i, method, n, order)
        return list(step_gen()), step_gen.step_ratio

    def _raise_error_if_any_is_complex(self, x, f_x):
        msg = ('The {} step derivative method does only work on a real valued analytic '
               'function of a real variable!'.format(self.method))
        _assert(not np.any(np.iscomplex(x)),
                msg + ' But a complex variable was given!')

        _assert(not np.any(np.iscomplex(f_x)),
                msg + ' But the function given is complex valued!')

    def _eval_first(self, f, x):
        if self.method in ['complex', 'multicomplex']:
            f_x = f(x)
            self._raise_error_if_any_is_complex(x, f_x)
            return f_x
        if self.fd_rule.eval_first_condition or self.full_output:
            return f(x)
        return 0.0

    def __call__(self, x, *args, **kwds):
        x_i = np.asarray(x)
        with np.errstate(divide='ignore', invalid='ignore'):
            results, f_xi = self._derivative(x_i, args, kwds)
            derivative, info = self._extrapolate(*results)
        if self.full_output:
            return derivative, self.info(f_xi, *info)
        return derivative


[docs]def directionaldiff(f, x0, vec, **options):
    """
    Return directional derivative of a function of n variables

    Parameters
    ----------
    f: function
        analytical function to differentiate.
    x0: array
        vector location at which to differentiate 'f'. If x0 is an nXm array,
        then 'f' is assumed to be a function of n*m variables.
    vec: array
        vector defining the line along which to take the derivative. It should
        be the same size as x0, but need not be a vector of unit length.
    **options:
        optional arguments to pass on to Derivative.

    Returns
    -------
    dder:  scalar
        estimate of the first derivative of 'f' in the specified direction.

    Examples
    --------
    At the global minimizer (1,1) of the Rosenbrock function,
    compute the directional derivative in the direction [1 2]

    >>> import numpy as np
    >>> import numdifftools as nd
    >>> vec = np.r_[1, 2]
    >>> rosen = lambda x: (1-x[0])**2 + 105*(x[1]-x[0]**2)**2
    >>> dd, info = nd.directionaldiff(rosen, [1, 1], vec, full_output=True)
    >>> np.allclose(dd, 0)
    True
    >>> np.abs(info.error_estimate)<1e-14
    True

    See also
    --------
    Derivative,
    Gradient
    """
    x0 = np.asarray(x0)
    vec = np.asarray(vec)
    _assert(x0.size == vec.size, 'vec and x0 must be the same shapes')
    vec = np.reshape(vec / np.linalg.norm(vec.ravel()), x0.shape)
    return Derivative(lambda t: f(x0 + t * vec), **options)(0)


[docs]class Jacobian(Derivative):

    __doc__ = _CMN_DOC % dict(
        derivative='Jacobian',
        extra_parameter="""
    order : int, optional
        defines the order of the error term in the Taylor approximation used.
        For 'central' and 'complex' methods, it must be an even number.""",
        returns="""
    Returns
    -------
    jacob : array
        Jacobian
    """, extra_note="""
    Higher order approximation methods will generally be more accurate, but may
    also suffer more from numerical problems. First order methods is usually
    not recommended.

    If fun returns a 1d array, it returns a Jacobian. If a 2d array is returned
    by fun (e.g., with a value for each observation), it returns a 3d array
    with the Jacobian of each observation with shape xk x nobs x xk. I.e.,
    the Jacobian of the first observation would be [:, 0, :]
    """, example="""
    Examples
    --------
    >>> import numpy as np
    >>> import numdifftools as nd

    #(nonlinear least squares)

    >>> xdata = np.arange(0,1,0.1)
    >>> ydata = 1+2*np.exp(0.75*xdata)
    >>> fun = lambda c: (c[0]+c[1]*np.exp(c[2]*xdata) - ydata)**2
    >>> np.allclose(fun([1, 2, 0.75]).shape,  (10,))
    True

    >>> jfun = nd.Jacobian(fun)
    >>> val = jfun([1, 2, 0.75])
    >>> np.allclose(val, np.zeros((10,3)))
    True

    >>> fun2 = lambda x : x[0]*x[1]*x[2]**2
    >>> jfun2 = nd.Jacobian(fun2)
    >>> np.allclose(jfun2([1.,2.,3.]), [[18., 9., 12.]])
    True

    >>> fun3 = lambda x : np.vstack((x[0]*x[1]*x[2]**2, x[0]*x[1]*x[2]))
    >>> jfun3 = nd.Jacobian(fun3)

    >>> np.allclose(jfun3([1., 2., 3.]), [[[18.], [9.], [12.]], [[6.], [3.], [2.]]])
    True
    >>> np.allclose(jfun3([4., 5., 6.]), [[[180.], [144.], [240.]], [[30.], [24.], [20.]]])
    True
    >>> np.allclose(jfun3(np.array([[1.,2.,3.]]).T), [[[18.], [9.], [12.]], [[6.], [3.], [2.]]])
    True

    """, see_also="""
    See also
    --------
    Derivative, Hessian, Gradient
    """)

#     n = property(fget=lambda cls: 1,
#                  fset=lambda cls, val: cls._set_derivative())  # @UnusedVariable

    _fd_rule = LogJacobianRule

    @staticmethod
    def _expand_steps(steps, x_i, fxi):
        if np.size(fxi) == 1:
            return steps
        n = len(x_i)
        one = np.ones_like(fxi)
        return [np.array([one * h[i] for i in range(n)]) for h in steps]

    def _derivative_nonzero_order(self, x_i, args, kwds):
        diff, f = self._get_functions(args, kwds)
        steps, step_ratio = self._get_steps(x_i)
        fxi = f(x_i)
        results = [diff(f, fxi, x_i, h) for h in steps]

        steps2 = self._expand_steps(steps, x_i, fxi)

        self.set_richardson_rule(step_ratio, self.richardson_terms)
        return self.fd_rule.apply(results, steps2, step_ratio), fxi

    def __call__(self, x, *args, **kwds):
        return super(Jacobian, self).__call__(np.atleast_1d(x), *args, **kwds)


[docs]class Gradient(Jacobian):

    __doc__ = _CMN_DOC % dict(
        derivative='Gradient',
        extra_parameter="""
    order : int, optional
        defines the order of the error term in the Taylor approximation used.
        For 'central' and 'complex' methods, it must be an even number.""",
        returns="""
    Returns
    -------
    grad : array
        gradient
    """, extra_note="""
    Higher order approximation methods will generally be more accurate, but may
    also suffer more from numerical problems. First order methods is usually
    not recommended.

    If x0 is an n x m array, then fun is assumed to be a function of n * m
    variables.
    """, example="""
    Examples
    --------
    >>> import numpy as np
    >>> import numdifftools as nd
    >>> fun = lambda x: np.sum(x**2)
    >>> dfun = nd.Gradient(fun)
    >>> np.allclose(dfun([1,2,3]), [ 2.,  4.,  6.])
    True

    # At [x,y] = [1,1], compute the numerical gradient
    # of the function sin(x-y) + y*exp(x)

    >>> sin = np.sin; exp = np.exp
    >>> x, y = 1, 1
    >>> z = lambda xy: sin(xy[0]-xy[1]) + xy[1]*exp(xy[0])
    >>> dz = nd.Gradient(z)
    >>> dz_dx, dz_dy = dz([x, y])
    >>> np.allclose([dz_dx, dz_dy],
    ...             [ 3.7182818284590686, 1.7182818284590162])
    True

    # At the global minimizer (1,1) of the Rosenbrock function,
    # compute the gradient. It should be essentially zero.

    >>> rosen = lambda x : (1-x[0])**2 + 105.*(x[1]-x[0]**2)**2
    >>> grad_rosen = nd.Gradient(rosen)
    >>> df_dx, df_dy = grad_rosen([x, y])
    >>> np.allclose([df_dx, df_dy], [0, 0])
    True""", see_also="""
    See also
    --------
    Derivative, Hessian, Jacobian
    """)

    def __call__(self, x, *args, **kwds):
        result = super(Gradient, self).__call__(np.atleast_1d(x).ravel(), *args, **kwds)
        if self.full_output:
            return result[0].squeeze(), result[1]
        return result.squeeze()


[docs]class Hessdiag(Derivative):

    __doc__ = _CMN_DOC % dict(
        derivative='Hessian diagonal',
        extra_parameter="""order : int, optional
        defines the order of the error term in the Taylor approximation used.
        For 'central' and 'complex' methods, it must be an even number.""",
        returns="""
    Returns
    -------
    hessdiag : array
        hessian diagonal
    """, extra_note="""
    Higher order approximation methods will generally be more accurate, but may
    also suffer more from numerical problems. First order methods is usually
    not recommended.
    """, example="""
    Examples
    --------
    >>> import numpy as np
    >>> import numdifftools as nd
    >>> fun = lambda x : x[0] + x[1]**2 + x[2]**3
    >>> Hfun = nd.Hessdiag(fun, full_output=True)
    >>> hd, info = Hfun([1,2,3])
    >>> np.allclose(hd, [0.,   2.,  18.])
    True

    >>> np.all(info.error_estimate < 1e-11)
    True
    """, see_also="""
    See also
    --------
    Derivative, Hessian, Jacobian, Gradient
    """)

    _fd_rule = LogHessdiagRule

[docs]    def __init__(self, f, step=None, method='central', order=2, **options):
        options.pop('n', None)
        super(Hessdiag, self).__init__(f, step=step, method=method, n=2, order=order, **options)

    def __call__(self, x, *args, **kwds):
        return super(Hessdiag, self).__call__(np.atleast_1d(x), *args, **kwds)


[docs]class Hessian(Hessdiag):

    __doc__ = _CMN_DOC % dict(
        derivative='Hessian',
        extra_parameter="",
        returns=r"""
    Returns
    -------
    hess : ndarray
       array of partial second derivatives, Hessian
    """, extra_note=r"""
    Computes the Hessian according to method as:
    'forward' :eq:`7`, 'central' :eq:`9` and 'complex' :eq:`10`:

    .. math::
        \quad ((f(x + d_j e_j + d_k e_k) + f(x) - f(x + d_j e_j) - f(x + d_k e_k))) / (d_j d_k)
        :label: 7

    .. math::
        \quad  ((f(x + d_j e_j + d_k e_k) - f(x + d_j e_j - d_k e_k)) -
                (f(x - d_j e_j + d_k e_k) - f(x - d_j e_j - d_k e_k)) /
                (4 d_j d_k)
        :label: 9

    .. math::
        imag(f(x + i d_j e_j + d_k e_k) - f(x + i d_j e_j - d_k e_k)) /
            (2 d_j d_k)
        :label: 10

    where :math:`e_j` is a vector with element :math:`j` is one and the rest
    are zero and :math:`d_j` is a scalar spacing :math:`steps_j`.
    """, example="""
    Examples
    --------
    >>> import numpy as np
    >>> import numdifftools as nd

    # Rosenbrock function, minimized at [1,1]

    >>> rosen = lambda x : (1.-x[0])**2 + 105*(x[1]-x[0]**2)**2
    >>> Hfun = nd.Hessian(rosen)
    >>> h = Hfun([1, 1])
    >>> h
    array([[ 842., -420.],
           [-420.,  210.]])

    # cos(x-y), at (0,0)

    >>> cos = np.cos
    >>> fun = lambda xy : cos(xy[0]-xy[1])
    >>> Hfun2 = nd.Hessian(fun)
    >>> h2 = Hfun2([0, 0])
    >>> h2
    array([[-1.,  1.],
           [ 1., -1.]])""", see_also="""
    See also
    --------
    Derivative, Hessian
    """)

    _fd_rule = LogHessianRule

[docs]    def __init__(self, f, step=None, method='central', order=None, **options):
        if order is None:
            order = dict(backward=1, forward=1).get(method, 2)
        super(Hessian, self).__init__(f, step=step, method=method, order=order, **options)


if __name__ == "__main__":
    from numdifftools.testing import test_docstrings
    test_docstrings(__file__)