Source code for fides.trust_region

"""
Trust Region Steps
------------------
This module provides the machinery to compute different trust-region(
-reflective) step proposals and select among them based on to their
performance according to the quadratic approximation of the objective function
"""

import numpy as np
import scipy.linalg as linalg

from numpy.linalg import norm
from scipy.sparse import csc_matrix

from typing import Tuple

from .logging import logger
from .subproblem import (
    solve_1d_trust_region_subproblem, solve_nd_trust_region_subproblem
)
from .constants import SubSpaceDim


[docs]def normalize(v: np.ndarray) -> None:
    """
    Inplace normalization of a vector

    :param v:
        vector to be normalized
    """
    nv = norm(v)
    if nv > 0:
        v[:] = v/nv  # change inplace


[docs]class Step:
    """
    Base class for the computation of a proposal step

    :ivar x: current state of optimization variables
    :ivar s: proposed step
    :ivar sc: coefficients in the 1D/2D subspace that defines the affine
        transformed step ss: ss = subspace * sc
    :ivar ss: affine transformed step: s = scaling * ss
    :ivar og_s: s without step back
    :ivar og_sc: st without step back
    :ivar og_ss: ss without step back
    :ivar sg: rescaled gradient scaling * g
    :ivar hess: hessian of the objective function at x
    :ivar g_dscaling: diag(g) * dscaling
    :ivar delta: trust region radius in the transformed space defined by
        scaling matrix
    :ivar theta: controls step back, fraction of step to take if full
        step would reach breakpoint
    :ivar lb: lower boundaries for x
    :ivar ub: upper boundaries for x
    :ivar minbr: maximal fraction of step s that can be taken to reach
        first breakpoint
    :ivar ipt: index of x that specifies the variable that will hit the
        breakpoint if a step minbr * s is taken
    :ivar qpval0: value to the quadratic subproblem at x
    :ivar qpval: value of the quadratic subproblem for the proposed step
    :ivar shess: matrix of the full quadratic problem
    :ivar cg: projection of the g_hat to the subspace
    :ivar chess: projection of the B to the subspace
    """
[docs]    def __init__(self,
                 x: np.ndarray,
                 sg: np.ndarray,
                 hess: np.ndarray,
                 scaling: csc_matrix,
                 g_dscaling: csc_matrix,
                 delta: float,
                 theta: float,
                 ub: np.ndarray,
                 lb: np.ndarray):
        """

        :param x:
            Reference point
        :param sg:
            Gradient in rescaled coordinates
        :param hess:
            Hessian in unscaled coordinates
        :param scaling:
            Matrix that defines scaling transformation
        :param g_dscaling:
            Unscaled gradient multiplied by derivative of scaling
            transformation
        :param delta:
            Trust region Radius in scaled coordinates
        :param theta:
            Stepback parameter that controls how close steps are allowed to
            get to the boundary
        :param ub:
            Upper boundary
        :param lb:
            Lower boundary
        """
        self.x = x

        self.s = None
        self.sc = None
        self.ss = None

        self.og_s = None
        self.og_sc = None
        self.og_ss = None

        self.sg = sg
        self.scaling = scaling

        self.delta = delta
        self.theta = theta

        self.lb = lb
        self.ub = ub

        self.minbr = 1.0
        self.alpha = 1.0
        self.ipt = 0

        self.qpval0 = 0.0
        self.qpval = 0.0

        self.shess = scaling * hess * scaling + g_dscaling

        self.cg = None
        self.chess = None
        self.subspace = None

        self.s0 = np.zeros((hess.shape[0],))
        self.ss0 = np.zeros((hess.shape[0],))

[docs]    def step_back(self):
        """
        This function truncates the step based on the distance of the
        current point to the boundary.
        """
        # create copies of the calculated step
        self.og_s = self.s.copy()
        self.og_ss = self.ss.copy()
        self.og_sc = self.sc.copy()

        nonzero = np.abs(self.s) > 0
        if np.any(nonzero):
            # br quantifies the distance to the boundary normalized
            # by the proposed step, this indicates the fraction of the step
            # that would put the respective variable at the boundary
            # This is defined in [Coleman-Li1996] (3.1)
            br = np.max(np.vstack([
                (self.ub[nonzero] - self.x[nonzero])/self.s[nonzero],
                (self.lb[nonzero] - self.x[nonzero])/self.s[nonzero]
            ]), axis=0)
            self.ipt = np.argmin(br)
            if np.isscalar(br):
                self.minbr = br
            else:
                self.minbr = br[self.ipt]
            # compute the minimum of the step
            self.alpha = np.min([1, self.theta * self.minbr])

        self.s *= self.alpha
        self.sc *= self.alpha
        self.ss *= self.alpha

[docs]    def reduce_to_subspace(self) -> None:
        """
        This function projects the matrix shess and the vector sg to the
        subspace
        """
        self.chess = self.subspace.T.dot(self.shess.dot(self.subspace))
        self.cg = self.subspace.T.dot(self.sg)

[docs]    def compute_step(self) -> None:
        """
        Compute the step as solution to the trust region subproblem. Special
        code is used for the special case 1-dimensional subspace case
        """
        if self.subspace.shape[1] > 1:
            self.sc, _ = solve_nd_trust_region_subproblem(self.chess, self.cg,
                                                          self.delta)
        else:
            self.sc = solve_1d_trust_region_subproblem(self.shess, self.sg,
                                                       self.subspace[:, 0],
                                                       self.delta)
        self.ss = self.subspace.dot(np.real(self.sc)) + self.ss0
        self.s = self.scaling.dot(self.ss) + self.s0

[docs]    def calculate(self):
        """
        Calculates step and the expected objective function value according to
        the quadratic approximation
        """
        self.reduce_to_subspace()
        self.compute_step()
        self.step_back()
        self.qpval = self.qpval0 + self.cg.dot(self.sc) + \
            .5 * (self.sc.dot(self.chess).dot(self.sc))[0, 0]


[docs]class TRStepFull(Step):
    """
    This class provides the machinery to compute an exact solution of
    the trust region subproblem.
    """

    type = 'trnd'

[docs]    def __init__(self, x, sg, hess, scaling, g_dscaling, delta, theta,
                 ub, lb):
        super().__init__(x, sg, hess, scaling, g_dscaling, delta, theta,
                         ub, lb)
        self.subspace = np.eye(hess.shape[0])


[docs]class TRStep2D(Step):
    """
    This class provides the machinery to compute an approximate solution of
    the trust region subproblem according to a 2D subproblem
    """

    type = 'tr2d'

[docs]    def __init__(self, x, sg, hess, scaling, g_dscaling, delta, theta,
                 ub, lb, subspace):
        """
        :param subspace:
            Precomputed subspace
        """
        super().__init__(x, sg, hess, scaling, g_dscaling, delta, theta,
                         ub, lb)
        self.subspace = subspace
        if self.subspace is None:
            n = len(sg)

            s_newt = linalg.solve(hess, sg)
            posdef = s_newt.dot(hess.dot(s_newt)) > 0
            normalize(s_newt)

            if n > 1:
                if not posdef:
                    # in this case we are in Case 2 of Fig 12 in
                    # [Coleman-Li1994]
                    logger.debug('Newton direction did not have negative '
                                 'curvature adding scaling * np.sign(sg) to '
                                 '2D subspace.')
                    s_grad = scaling * np.sign(sg) + (sg == 0)
                else:
                    s_grad = sg.copy()

                # orthonormalize, this ensures that S.T.dot(S) = I and we
                # can use S/S.T for transformation
                s_grad = s_grad - s_newt * (s_newt.dot(s_grad))
                normalize(s_grad)
                # if non-zero, add s_grad to subspace
                if np.any(s_grad != 0):
                    self.subspace = np.vstack([s_newt, s_grad]).T
                    return
                else:
                    logger.debug('Singular subspace, continuing with 1D '
                                 'subspace.')

            self.subspace = np.expand_dims(s_newt, 1)


[docs]class TRStepReflected(Step):
    """
    This class provides the machinery to compute a reflected step based on
    trust region subproblem solution thathit the boundaries.
    """

    type = 'trr'

[docs]    def __init__(self, x, sg, hess, scaling, g_dscaling, delta, theta,
                 ub, lb, tr_step):
        """
        :param tr_step:
            Trust-region step that is reflected
        """
        super().__init__(x, sg, hess, scaling, g_dscaling, delta, theta,
                         ub, lb)

        self.s0 = tr_step.minbr * tr_step.og_s
        self.ss0 = tr_step.minbr * tr_step.og_ss
        # update x and at breakpoint
        self.x = x + self.s0

        # reflect the transformed step at the boundary
        nss = tr_step.og_ss.copy()
        nss[tr_step.ipt] *= -1
        normalize(nss)
        self.subspace = np.expand_dims(nss, 1)

        self.qpval0 = tr_step.qpval


[docs]class GradientStep(Step):
    """
    This class provides the machinery to compute a gradient step.
    """

    type = 'g'

[docs]    def __init__(self, x, sg, hess, scaling, g_dscaling, delta, theta,
                 ub, lb):
        super().__init__(x, sg, hess, scaling, g_dscaling, delta, theta,
                         ub, lb)
        s_grad = sg.copy()
        normalize(s_grad)
        self.subspace = np.expand_dims(s_grad, 1)


[docs]def trust_region_reflective(x: np.ndarray,
                            g: np.ndarray,
                            hess: np.ndarray,
                            scaling: csc_matrix,
                            tr_subspace: np.ndarray,
                            delta: float,
                            dv: np.ndarray,
                            theta: float,
                            lb: np.ndarray,
                            ub: np.ndarray,
                            subspace_dim: SubSpaceDim) -> Tuple[np.ndarray,
                                                                np.ndarray,
                                                                float,
                                                                np.ndarray,
                                                                str]:
    """
    Compute a step according to the solution of the trust-region subproblem.
    If step-back is necessary, gradient and reflected trust region step are
    also evaluated in terms of their performance according to the local
    quadratic approximation

    :param x:
        Current values of the optimization variables
    :param g:
        Objective function gradient at x
    :param hess:
        (Approximate) objective function Hessian at x
    :param scaling:
        Scaling transformation according to distance to boundary
    :param tr_subspace:
        Precomputed subspace from previous iteration for reuse if proposed
        step was not accepted
    :param delta:
        Trust region radius, note that this applies after scaling
        transformation
    :param dv:
        derivative of scaling transformation
    :param theta:
        parameter regulating stepback
    :param lb:
        lower optimization variable boundaries
    :param ub:
        upper optimization variable boundaries
    :param subspace_dim:
        Subspace dimension in which the subproblem will be solved. Larger
        subspaces require more compute time but can yield higher quality step
        proposals.

    :return:
        s: proposed step,
        ss: rescaled proposed step,
        qpval: expected function value according to local quadratic
        approximation,
        subspace: computed subspace for reuse if proposed step is not accepted,
        steptype: type of step that was selected for proposal
    """
    sg = scaling.dot(g)
    g_dscaling = csc_matrix(np.diag(np.abs(g) * dv))

    if subspace_dim == SubSpaceDim.TWO:
        tr_step = TRStep2D(
            x, sg, hess, scaling, g_dscaling, delta, theta, ub, lb, tr_subspace
        )
    elif subspace_dim == SubSpaceDim.FULL:
        tr_step = TRStepFull(
            x, sg, hess, scaling, g_dscaling, delta, theta, ub, lb,
        )
    else:
        raise ValueError('Invalid choice of subspace dimension.')
    tr_step.calculate()

    # in case of truncation, we hit the boundary and we check both the
    # gradient and the reflected step, either of which could be better than the
    # TR step

    step = tr_step
    if tr_step.alpha < 1 and len(g) > 1:
        rtr_step = TRStepReflected(x, g, hess, scaling, g_dscaling, delta,
                                   theta, ub, lb, tr_step)
        rtr_step.calculate()

        g_step = GradientStep(x, sg, hess, scaling, g_dscaling, delta,
                              theta, ub, lb)
        g_step.calculate()

        if g_step.qpval < min(tr_step.qpval, rtr_step.qpval):
            step = g_step
        elif rtr_step.qpval < min(g_step.qpval, tr_step.qpval):
            step = rtr_step

        logger.debug(' | '.join([
            f'{step.type}: [qp: {step.qpval:.2E}, a: {step.alpha:.2E}]'
            for step in [tr_step, g_step, rtr_step]
        ]))

    return step.s, step.ss, step.qpval, tr_step.subspace, step.type