Source code for fides.trust_region

"""
Trust Region Steps
------------------
This module provides the machinery to compute different trust-region(
-reflective) step proposals and select among them based on to their
performance according to the quadratic approximation of the objective function
"""

import numpy as np
import scipy.linalg as linalg

from numpy.linalg import norm
from scipy.sparse import csc_matrix

from .logging import logger
from .subproblem import (
    solve_1d_trust_region_subproblem, solve_nd_trust_region_subproblem
)
from .constants import SubSpaceDim, StepBackStrategy

from typing import List


[docs]def normalize(v: np.ndarray) -> None:
    """
    Inplace normalization of a vector

    :param v:
        vector to be normalized
    """
    nv = norm(v)
    if nv > 0:
        v[:] = v/nv  # change inplace


[docs]class Step:
    """
    Base class for the computation of a proposal step

    :ivar x: Current state of optimization variables
    :ivar s: Proposed step
    :ivar sc: Coefficients in the 1D/2D subspace that defines the affine
        transformed step ss: `ss = subspace * sc`
    :ivar ss: Affine transformed step: `s = scaling * ss`
    :ivar og_s: `s` without step back
    :ivar og_sc: `st` without step back
    :ivar og_ss: `ss` without step back
    :ivar sg: Rescaled gradient `scaling * g`
    :ivar hess: Hessian of the objective function at `x`
    :ivar g_dscaling: `diag(g) * dscaling`
    :ivar delta: Trust region radius in the transformed space defined by
        scaling matrix
    :ivar theta: Controls step back, fraction of step to take if full
        step would reach breakpoint
    :ivar lb: Lower boundaries for x
    :ivar ub: Upper boundaries for x
    :ivar minbr: Maximal fraction of step s that can be taken to reach
        first breakpoint
    :ivar ipt: Index of x that specifies the variable that will hit the
        breakpoint if a step minbr * s is taken
    :ivar qpval0: Value to the quadratic subproblem at x
    :ivar qpval: Value of the quadratic subproblem for the proposed step
    :ivar shess: Matrix of the full quadratic problem
    :ivar cg: Projection of the g_hat to the subspace
    :ivar chess: Projection of the B to the subspace
    :ivar reflection_count: Number of reflections that were applied to
        obtain this step
    :ivar truncation_count: Number of reflections that were applied to
        obtain this step

    :cvar type: Identifier that allows identification of subclasses
    """
    type = 'step'

[docs]    def __init__(self,
                 x: np.ndarray,
                 sg: np.ndarray,
                 hess: np.ndarray,
                 scaling: csc_matrix,
                 g_dscaling: csc_matrix,
                 delta: float,
                 theta: float,
                 ub: np.ndarray,
                 lb: np.ndarray):
        """

        :param x:
            Reference point
        :param sg:
            Gradient in rescaled coordinates
        :param hess:
            Hessian in unscaled coordinates
        :param scaling:
            Matrix that defines scaling transformation
        :param g_dscaling:
            Unscaled gradient multiplied by derivative of scaling
            transformation
        :param delta:
            Trust region Radius in scaled coordinates
        :param theta:
            Stepback parameter that controls how close steps are allowed to
            get to the boundary
        :param ub:
            Upper boundary
        :param lb:
            Lower boundary

        """
        self.x = x

        self.s = None
        self.sc = None
        self.ss = None

        self.og_s = None
        self.og_sc = None
        self.og_ss = None

        self.sg = sg
        self.scaling = scaling

        self.delta = delta
        self.theta = theta

        self.lb = lb
        self.ub = ub

        self.br = np.ones(sg.shape)
        self.minbr = 1.0
        self.alpha = 1.0
        self.ipt = 0

        self.qpval0 = 0.0
        self.qpval = 0.0

        self.shess = scaling * hess * scaling + g_dscaling

        self.cg = None
        self.chess = None
        self.subspace = None

        self.s0 = np.zeros(sg.shape)
        self.ss0 = np.zeros(sg.shape)

        self.reflection_count = 0
        self.truncation_count = 0

[docs]    def step_back(self):
        """
        This function truncates the step based on the distance of the
        current point to the boundary.
        """
        # create copies of the calculated step
        self.og_s = self.s.copy()
        self.og_ss = self.ss.copy()
        self.og_sc = self.sc.copy()

        nonzero = np.abs(self.s) > 0
        self.br = np.inf * np.ones(self.s.shape)
        if np.any(nonzero):
            # br quantifies the distance to the boundary normalized
            # by the proposed step, this indicates the fraction of the step
            # that would put the respective variable at the boundary
            # This is defined in [Coleman-Li1996] (3.1)
            self.br[nonzero] = np.max(np.vstack([
                (self.ub[nonzero] - self.x[nonzero])/self.s[nonzero],
                (self.lb[nonzero] - self.x[nonzero])/self.s[nonzero]
            ]), axis=0)
        self.minbr = np.min(self.br)
        self.ipt = np.where(self.br == self.minbr)[0]
        # compute the minimum of the step
        self.alpha = np.min([1, self.theta * self.minbr])

        self.s *= self.alpha
        self.sc *= self.alpha
        self.ss *= self.alpha

[docs]    def reduce_to_subspace(self) -> None:
        """
        This function projects the matrix shess and the vector sg to the
        subspace
        """
        self.chess = self.subspace.T.dot(self.shess.dot(self.subspace))
        self.cg = self.subspace.T.dot(self.sg)

[docs]    def compute_step(self) -> None:
        """
        Compute the step as solution to the trust region subproblem. Special
        code is used for the special case 1-dimensional subspace case
        """
        if self.subspace.shape[1] > 1:
            self.sc, _ = solve_nd_trust_region_subproblem(
                self.chess, self.cg,
                np.sqrt(max(self.delta ** 2 - norm(self.ss0) ** 2, 0.0))
            )
        else:
            self.sc = solve_1d_trust_region_subproblem(
                self.shess, self.sg, self.subspace[:, 0], self.delta, self.ss0
            )
        self.ss = self.subspace.dot(np.real(self.sc))
        self.s = self.scaling.dot(self.ss)

[docs]    def calculate(self):
        """
        Calculates step and the expected objective function value according to
        the quadratic approximation
        """
        self.reduce_to_subspace()
        self.compute_step()
        self.step_back()
        self.qpval = self.qpval0 + self.sg.dot(self.ss) + \
            .5 * (self.ss.dot(self.shess).dot(self.ss))[0, 0]


[docs]class TRStepFull(Step):
    """
    This class provides the machinery to compute an exact solution of
    the trust region subproblem.
    """

    type = 'trnd'

[docs]    def __init__(self, x, sg, hess, scaling, g_dscaling, delta, theta,
                 ub, lb):
        super().__init__(x, sg, hess, scaling, g_dscaling, delta, theta,
                         ub, lb)
        self.subspace = np.eye(hess.shape[0])


[docs]class TRStep2D(Step):
    """
    This class provides the machinery to compute an approximate solution of
    the trust region subproblem according to a 2D subproblem
    """

    type = 'tr2d'

[docs]    def __init__(self, x, sg, hess, scaling, g_dscaling, delta, theta,
                 ub, lb):
        super().__init__(x, sg, hess, scaling, g_dscaling, delta, theta,
                         ub, lb)
        n = len(sg)

        s_newt = linalg.solve(hess, sg)
        posdef = s_newt.dot(hess.dot(s_newt)) > 0
        normalize(s_newt)

        if n > 1:
            if not posdef:
                # in this case we are in Case 2 of Fig 12 in
                # [Coleman-Li1994]
                logger.debug('Newton direction did not have negative '
                             'curvature adding scaling * np.sign(sg) to '
                             '2D subspace.')
                s_grad = scaling * np.sign(sg) + (sg == 0)
            else:
                s_grad = sg.copy()

            # orthonormalize, this ensures that S.T.dot(S) = I and we
            # can use S/S.T for transformation
            s_grad = s_grad - s_newt * (s_newt.dot(s_grad))
            normalize(s_grad)
            # if non-zero, add s_grad to subspace
            if np.any(s_grad != 0):
                self.subspace = np.vstack([s_newt, s_grad]).T
                return
            else:
                logger.debug('Singular subspace, continuing with 1D '
                             'subspace.')

        self.subspace = np.expand_dims(s_newt, 1)


[docs]class TRStepReflected(Step):
    """
    This class provides the machinery to compute a reflected step based on
    trust region subproblem solution that hit the boundaries.
    """

    type = 'trr'

[docs]    def __init__(self, x, sg, hess, scaling, g_dscaling, delta, theta,
                 ub, lb, step: Step):
        """
        :param step:
            Trust-region step that is reflected
        """
        super().__init__(x, sg, hess, scaling, g_dscaling, delta, theta,
                         ub, lb)

        alpha = min(step.minbr, 1)
        self.s0 = alpha * step.og_s + step.s0
        self.ss0 = alpha * step.og_ss + step.ss0
        # update x and at breakpoint
        self.x = x + self.s0

        # reflect the transformed step at the boundary
        nss = step.og_ss.copy()
        nss[step.ipt] *= -1
        normalize(nss)
        self.subspace = np.expand_dims(nss, 1)

        self.qpval0 = step.qpval
        self.reflection_count = step.reflection_count + 1


[docs]class TRStepTruncated(Step):
    """
    This class provides the machinery to compute a reduced step based on
    trust region subproblem solution that hit the boundaries.
    """

    type = 'trt'

[docs]    def __init__(self, x, sg, hess, scaling, g_dscaling, delta, theta,
                 ub, lb, step: Step):
        """
        :param step:
            Trust-region step that is reduced
        """
        super().__init__(x, sg, hess, scaling, g_dscaling, delta, theta,
                         ub, lb)

        self.s0 = step.s0.copy()
        self.ss0 = step.ss0.copy()
        self.s0[step.ipt] += step.s[step.ipt]
        self.ss0[step.ipt] += step.ss[step.ipt]
        # update x and at breakpoint
        self.x = x + self.s0

        subspace = step.subspace.copy()
        subspace[step.ipt, :] = 0
        # reduce subspace
        subspace = subspace[:, (subspace != 0).any(axis=0)]
        # normalize subspace
        for ix in range(subspace.shape[1]):
            normalize(subspace[:, ix])
        self.subspace = subspace

        self.qpval0 = step.qpval
        self.truncation_count = step.truncation_count + 1


[docs]class GradientStep(Step):
    """
    This class provides the machinery to compute a gradient step.
    """

    type = 'g'

[docs]    def __init__(self, x, sg, hess, scaling, g_dscaling, delta, theta,
                 ub, lb):
        super().__init__(x, sg, hess, scaling, g_dscaling, delta, theta,
                         ub, lb)
        s_grad = sg.copy()
        normalize(s_grad)
        self.subspace = np.expand_dims(s_grad, 1)


[docs]def trust_region_reflective(x: np.ndarray,
                            g: np.ndarray,
                            hess: np.ndarray,
                            scaling: csc_matrix,
                            delta: float,
                            dv: np.ndarray,
                            theta: float,
                            lb: np.ndarray,
                            ub: np.ndarray,
                            subspace_dim: SubSpaceDim,
                            stepback_strategy: StepBackStrategy) -> Step:
    """
    Compute a step according to the solution of the trust-region subproblem.
    If step-back is necessary, gradient and reflected trust region step are
    also evaluated in terms of their performance according to the local
    quadratic approximation

    :param x:
        Current values of the optimization variables
    :param g:
        Objective function gradient at x
    :param hess:
        (Approximate) objective function Hessian at x
    :param scaling:
        Scaling transformation according to distance to boundary
    :param delta:
        Trust region radius, note that this applies after scaling
        transformation
    :param dv:
        derivative of scaling transformation
    :param theta:
        parameter regulating stepback
    :param lb:
        lower optimization variable boundaries
    :param ub:
        upper optimization variable boundaries
    :param subspace_dim:
        Subspace dimension in which the subproblem will be solved. Larger
        subspaces require more compute time but can yield higher quality step
        proposals.
    :param stepback_strategy:
        Strategy that is applied when the proposed step exceeds the
        optimization boundary.

    :return:
        s: proposed step,
        ss: rescaled proposed step,
        qpval: expected function value according to local quadratic
        approximation,
        subspace: computed subspace for reuse if proposed step is not accepted,
        steptype: type of step that was selected for proposal
    """
    sg = scaling.dot(g)
    g_dscaling = csc_matrix(np.diag(np.abs(g) * dv))

    if subspace_dim == SubSpaceDim.TWO:
        tr_step = TRStep2D(
            x, sg, hess, scaling, g_dscaling, delta, theta, ub, lb
        )
    elif subspace_dim == SubSpaceDim.FULL:
        tr_step = TRStepFull(
            x, sg, hess, scaling, g_dscaling, delta, theta, ub, lb,
        )
    else:
        raise ValueError('Invalid choice of subspace dimension.')
    tr_step.calculate()

    # in case of truncation, we hit the boundary and we check both the
    # gradient and the reflected step, either of which could be better than the
    # TR step

    steps = [tr_step]
    if tr_step.alpha < 1 and len(g) > 1:
        if stepback_strategy == StepBackStrategy.REFLECT:
            steps.extend(stepback_reflect(tr_step, x, sg, hess, scaling,
                                          g_dscaling, delta, theta, ub, lb))
        elif stepback_strategy == StepBackStrategy.TRUNCATE:
            steps.extend(stepback_truncate(tr_step, x, sg, hess, scaling,
                                           g_dscaling, delta, theta, ub, lb))
        else:
            ValueError('Invalid value for stepback_strategy!')

    if len(steps) > 1:
        rcountstrs = [str(step.reflection_count)
                      * int(step.reflection_count > 0)
                      for step in steps]
        logger.debug(' | '.join([
            f'{step.type + rcountstr}: [qp:'
            f' {step.qpval:.2E}, '
            f'a: {step.alpha:.2E}]'
            for rcountstr, step in zip(rcountstrs, steps)
        ]))

    qpvals = [step.qpval for step in steps]
    return steps[np.argmin(qpvals)]


[docs]def stepback_reflect(tr_step: Step,
                     x: np.ndarray,
                     sg: np.ndarray,
                     hess: np.ndarray,
                     scaling: csc_matrix,
                     g_dscaling: csc_matrix,
                     delta: float,
                     theta: float,
                     ub: np.ndarray,
                     lb: np.ndarray) -> List[Step]:
    """
    Compute new proposal steps according to a reflection strategy.

    :param tr_step:
        Reference trust region step that will be reflect
    :param x:
        Current values of the optimization variables
    :param sg:
        Rescaled objective function gradient at x
    :param hess:
        (Approximate) objective function Hessian at x
    :param g_dscaling:
        Unscaled gradient multiplied by derivative of scaling
        transformation
    :param scaling:
        Scaling transformation according to distance to boundary
    :param delta:
        Trust region radius, note that this applies after scaling
        transformation
    :param theta:
        parameter regulating stepback
    :param lb:
        lower optimization variable boundaries
    :param ub:
        upper optimization variable boundaries

    :return:
        New proposal steps
    """
    g_step = GradientStep(x, sg, hess, scaling, g_dscaling, delta,
                          theta, ub, lb)
    g_step.calculate()

    steps = [g_step]

    rtr_step = TRStepReflected(x, sg, hess, scaling, g_dscaling, delta,
                               theta, ub, lb, tr_step)
    rtr_step.calculate()
    steps.append(rtr_step)
    for ireflection in range(len(x) - 1):
        if rtr_step.alpha == 1:
            break
        # recursively add more reflections
        rtr_old = rtr_step
        rtr_step = TRStepReflected(x, sg, hess, scaling, g_dscaling, delta,
                                   theta, ub, lb, rtr_old)
        rtr_step.calculate()
        steps.append(rtr_step)

    return steps


[docs]def stepback_truncate(tr_step: Step,
                      x: np.ndarray,
                      sg: np.ndarray,
                      hess: np.ndarray,
                      scaling: csc_matrix,
                      g_dscaling: csc_matrix,
                      delta: float,
                      theta: float,
                      ub: np.ndarray,
                      lb: np.ndarray) -> List[Step]:
    """
    Compute new proposal steps according to a truncation strategy.

    :param tr_step:
        Reference trust region step that will be reflect
    :param x:
        Current values of the optimization variables
    :param sg:
        Rescaled objective function gradient at x
    :param hess:
        (Approximate) objective function Hessian at x
    :param g_dscaling:
        Unscaled gradient multiplied by derivative of scaling
        transformation
    :param scaling:
        Scaling transformation according to distance to boundary
    :param delta:
        Trust region radius, note that this applies after scaling
        transformation
    :param theta:
        parameter regulating stepback
    :param lb:
        lower optimization variable boundaries
    :param ub:
        upper optimization variable boundaries

    :return:
        New proposal steps
    """
    rtt_step = TRStepTruncated(x, sg, hess, scaling, g_dscaling, delta,
                               theta, ub, lb, tr_step)
    steps = [rtt_step]
    rtt_step.calculate()
    steps.append(rtt_step)
    for itruncation in range(len(x) - 1):
        if rtt_step.alpha == 1:
            break
        rtt_step = TRStepTruncated(x, sg, hess, scaling, g_dscaling, delta,
                                   theta, ub, lb, rtt_step)
        rtt_step.calculate()
        steps.append(rtt_step)

    return steps