Source code for fides.hessian_approximation

"""
Hessian Update Strategies
-------------------------
This module provides various generic Hessian approximation strategies that
can be employed when the calculating the exact Hessian or an approximation
is computationally too demanding.
"""


import warnings
from typing import Optional

import numpy as np
from numpy.linalg import norm


[docs]class HessianApproximation: """ Abstract class from which Hessian update strategies should subclass """
[docs] def __init__(self, init_with_hess: Optional[bool] = False): """ Create a Hessian update strategy instance :param init_with_hess: Whether the hybrid update strategy should be initialized according to the user-provided objective function """ self._hess: np.ndarray = np.empty(0) self._diff: np.ndarray = np.empty(0) self.init_with_hess = init_with_hess
[docs] def init_mat(self, dim: int, hess: Optional[np.ndarray] = None) -> None: """ Initializes this approximation instance and checks the dimensionality :param dim: dimension of optimization variables :param hess: user provided initialization """ if hess is None or not self.init_with_hess: self._hess = np.eye(dim) else: if hess.shape[0] != dim: raise ValueError( 'Initial approximation had inconsistent ' f'dimension, was {hess.shape[0]}, ' f'but should be {dim}.' ) self._hess = hess.copy() self._diff = np.zeros_like(self._hess)
[docs] def get_mat(self) -> np.ndarray: """ Getter for the Hessian approximation :return: Hessian approximation """ return self._hess.copy()
[docs] def get_diff(self) -> np.ndarray: """ Getter for the Hessian approximation update :return: Hessian approximation update """ return self._diff.copy()
[docs] def set_mat(self, mat: np.ndarray) -> None: """ Setter for the Hessian approximation :param mat: Hessian approximation """ if mat.shape != self._hess.shape: raise ValueError( 'Passed matrix had inconsistent ' f'shape, was {mat.shape}, ' f'but should be {self._hess.shape}.' ) self._hess = mat.copy()
@property def requires_resfun(self): return False # pragma: no cover @property def requires_hess(self): return False # pragma: no cover def _update_hess_and_store_diff(self, hess): self._diff = hess - self._hess self._hess = hess.copy()
[docs]class IterativeHessianApproximation(HessianApproximation): """ Iterative update schemes that only use s and y values for update. """
[docs] def update(self, s: np.ndarray, y: np.ndarray) -> None: """ Update the Hessian approximation :param s: step in optimization variables :param y: step in gradient """ self._compute_update(s, y) self._apply_update()
def _compute_update(self, s: np.ndarray, y: np.ndarray) -> None: raise NotImplementedError() # pragma: no cover def _apply_update(self) -> None: self._hess += self._diff
[docs]class Broyden(IterativeHessianApproximation): """ BroydenClass Update scheme as described in [Nocedal & Wright]( http://dx.doi.org/10.1007/b98874) Chapter 6.3. This is a generalization of BFGS/DFP methods where the parameter :math:`phi` controls the convex combination between the two. This is a rank 2 update strategy that preserves positive-semidefiniteness and symmetry (if :math:`\\phi \\in [0,1]`). This scheme only works with a function that returns (fval, grad) :parameter phi: convex combination parameter interpolating between BFGS (phi==0) and DFP (phi==1). :parameter enforce_curv_cond: boolean that controls whether the employed broyden class update should attempt to preserve positive definiteness. If set to True, updates from steps that violate the curvature condition will be discarded. """
[docs] def __init__( self, phi: float, init_with_hess: Optional[bool] = False, enforce_curv_cond: Optional[bool] = True, ): self.phi = phi self.enforce_curv_cond = enforce_curv_cond if phi < 0 or phi > 1: warnings.warn( 'Setting phi to values outside the interval [0, 1]' 'will not guarantee that positive definiteness is ' 'preserved during updating.' ) super(Broyden, self).__init__(init_with_hess)
def _compute_update(self, s: np.ndarray, y: np.ndarray): self._diff = broyden_class_update( y, s, self._hess, self.phi, self.enforce_curv_cond )
[docs]class BFGS(Broyden): """ Broyden-Fletcher-Goldfarb-Shanno update strategy. This is a rank 2 update strategy that preserves symmetry and positive-semidefiniteness. This scheme only works with a function that returns (fval, grad) """
[docs] def __init__( self, init_with_hess: Optional[bool] = False, enforce_curv_cond: Optional[bool] = True, ): super(BFGS, self).__init__( phi=0.0, init_with_hess=init_with_hess, enforce_curv_cond=enforce_curv_cond, )
[docs]class DFP(Broyden): """ Davidon-Fletcher-Powell update strategy. This is a rank 2 update strategy that preserves symmetry and positive-semidefiniteness. This scheme only works with a function that returns (fval, grad) """
[docs] def __init__( self, init_with_hess: Optional[bool] = False, enforce_curv_cond: Optional[bool] = True, ): super(DFP, self).__init__( phi=1.0, init_with_hess=init_with_hess, enforce_curv_cond=enforce_curv_cond, )
[docs]class SR1(IterativeHessianApproximation): """ Symmetric Rank 1 update strategy as described in [Nocedal & Wright](http://dx.doi.org/10.1007/b98874) Chapter 6.2. This is a rank 1 update strategy that preserves symmetry but does not preserve positive-semidefiniteness. This scheme only works with a function that returns (fval, grad) """ def _compute_update(self, s: np.ndarray, y: np.ndarray): z = y - self._hess.dot(s) d = z.T.dot(s) # [NocedalWright2006] (6.26) reject if update degenerate if np.abs(d) >= np.sqrt(np.spacing(1)) * norm(s) * norm(z): self._diff = np.outer(z, z.T) / d else: self._diff = np.zeros_like(self._hess)
[docs]class BG(IterativeHessianApproximation): """ Broydens "good" method as introduced in [Broyden 1965](https://doi.org/10.1090%2FS0025-5718-1965-0198670-6). This is a rank 1 update strategy that does not preserve symmetry or positive definiteness. This scheme only works with a function that returns (fval, grad) """ def _compute_update(self, s: np.ndarray, y: np.ndarray): z = y - self._hess.dot(s) self._diff = np.outer(z, s.T) / s.T.dot(s)
[docs]class BB(IterativeHessianApproximation): """ Broydens "bad" method as introduced in [Broyden 1965](https://doi.org/10.1090%2FS0025-5718-1965-0198670-6). This is a rank 1 update strategy that does not preserve symmetry or positive definiteness. This scheme only works with a function that returns (fval, grad) """
[docs] def update(self, s: np.ndarray, y: np.ndarray) -> None: b = y.T.dot(s) z = y - self._hess.dot(s) if b <= 0: self._diff = np.zeros_like(self._hess) else: self._diff = np.outer(z, s.T) / b
[docs]class HybridApproximation(HessianApproximation):
[docs] def __init__(self, happ: IterativeHessianApproximation = BFGS()): """ Create a Hybrid Hessian update strategy that switches between an iterative approximation and a dynamic approximation :param happ: Iterative Hessian Approximation """ self.hessian_update = happ super(HybridApproximation, self).__init__()
[docs] def init_mat(self, dim: int, hess: Optional[np.ndarray] = None): self.hessian_update.init_mat(dim, hess) super(HybridApproximation, self).init_mat(dim, hess)
def requires_hess(self): return True # pragma: no cover
[docs]class HybridSwitchApproximation(HybridApproximation): def _switched_update(self, s: np.ndarray, y: np.ndarray, hess: np.ndarray): self.hessian_update.update(s, y) if self._switched: new_hess = self.hessian_update.get_mat() else: new_hess = hess self._update_hess_and_store_diff(new_hess)
[docs]class HybridFixed(HybridSwitchApproximation):
[docs] def __init__( self, happ: IterativeHessianApproximation = BFGS(), switch_iteration: Optional[int] = 20, ): """ Switch from a dynamic approximation to the user provided iterative scheme after a fixed number of successive iterations without trust-region update. The switching is non-reversible. The iterative scheme is initialized and updated rom the beginning, but only employed after the specified number of iterations. This scheme only works with a function that returns (fval, grad, hess) :param switch_iteration: Number of iterations without trust region update after which switch occurs. """ self.switch_iteration: int = switch_iteration super(HybridFixed, self).__init__(happ) self._switched = False
def update( self, s: np.ndarray, y: np.ndarray, hess: np.ndarray, iter_since_tr_update: int, ): self._switched_update(s, y, hess) if not self._switched: self._switched = iter_since_tr_update >= self.switch_iteration
[docs]class HybridFraction(HybridSwitchApproximation):
[docs] def __init__( self, happ: IterativeHessianApproximation = BFGS(), switch_threshold: Optional[float] = 0.8, ): """ Switch from a dynamic approximation to the user provided iterative scheme as soon as the fraction of iterations where the step is accepted but the trust region is not update exceeds the user provided threshold.Threshold check is only performed after 25 iterations. The switching is non-reversible. The iterative scheme is initialized and updated rom the beginning, but only employed after the switching. This scheme only works with a function that returns (fval, grad, hess) :param switch_threshold: Threshold for fraction of iterations where step is accepted but trust region is not updated, which when exceeded triggers switch of approximation. """ self.switch_threshold: float = switch_threshold super(HybridFraction, self).__init__(happ) self._switched = False
def update( self, s: np.ndarray, y: np.ndarray, hess: np.ndarray, tr_nonupdates: int, iterations: int, ): self._switched_update(s, y, hess) if not self._switched and iterations > 25: self._switched = tr_nonupdates / iterations > self.switch_threshold
[docs]class FX(HybridApproximation):
[docs] def __init__( self, happ: IterativeHessianApproximation = BFGS(), hybrid_tol: Optional[float] = 0.2, ): r""" Hybrid method HY2 as introduced by [Fletcher & Xu 1986](https://doi.org/10.1093/imanum/7.3.371). This approximation scheme employs a dynamic approximation as long as function values satisfy :math:`\frac{f_k - f_{k+1}}{f_k} < \epsilon` and employs the iterative scheme applied to the last dynamic approximation if not. This scheme only works with a function that returns (fval, grad, hess) :param hybrid_tol: switch tolerance :math:`\epsilon` """ self.hybrid_tol = hybrid_tol super(FX, self).__init__(happ)
[docs] def update( self, delta: np.ndarray, gamma: np.ndarray, r: np.ndarray, rprev: np.ndarray, hess: np.ndarray, ) -> None: """ Update the Hessian approximation :param delta: step in optimization variables :param gamma: step in gradient :param r: residuals after current step :param rprev: residuals befor current step :param hess: user-provided (Gauss-Newton) Hessian approximation """ # Equation (3.5) ratio = (rprev.dot(rprev) - r.dot(r)) / rprev.dot(rprev) if ratio >= self.hybrid_tol: self._diff = hess - self.hessian_update.get_mat() self.hessian_update.set_mat(hess) else: self.hessian_update.update(delta, gamma) self._diff = self.hessian_update.get_diff()
[docs] def get_mat(self) -> np.ndarray: return self.hessian_update.get_mat()
@property def requires_resfun(self) -> bool: return True # pragma: no cover
[docs]class StructuredApproximation(HessianApproximation):
[docs] def __init__( self, phi: Optional[float] = 0.0, enforce_curv_cond: Optional[bool] = True, ): """ This is the base class for structured secant methods (SSM). SSMs approximate the hessian by combining the Gauss-Newton component C(x) and an iteratively updated component that approximates the difference S to the true Hessian. :parameter phi: convex combination parameter interpolating between BFGS (phi==0) and DFP (phi==1) update schemes. :parameter enforce_curv_cond: boolean that controls whether the employed broyden class update should attempt to preserve positive definiteness. If set to True, updates from steps that violate the curvature condition will be discarded. """ self.A: np.ndarray = np.empty(0) self.phi = phi self.enforce_curv_cond = enforce_curv_cond self._structured_diff = np.empty(0) if phi < 0 or phi > 1: warnings.warn( 'Setting phi to values outside the interval [0, 1]' 'will not guarantee that positive definiteness is ' 'preserved during updating.' ) super(StructuredApproximation, self).__init__(init_with_hess=True)
[docs] def init_mat(self, dim: int, hess: Optional[np.ndarray] = None): self.A = np.eye(dim) * np.spacing(1) self._structured_diff = np.zeros_like(self.A) super(StructuredApproximation, self).init_mat(dim, hess)
[docs] def update( self, s: np.ndarray, y: np.ndarray, r: np.ndarray, hess: np.ndarray, yb: np.ndarray, ) -> None: """ Update the structured approximation :param s: step in optimization parameters :param y: step in gradient parameters :param r: residual vector :param hess: user-provided (Gauss-Newton) Hessian approximation :param yb: approximation to A*s, where A is structured approximation matrix """ raise NotImplementedError() # pragma: no cover
@property def requires_resfun(self): return True # pragma: no cover @property def requires_hess(self): return True # pragma: no cover def get_structured_diff(self) -> np.ndarray: return self._structured_diff
[docs]class SSM(StructuredApproximation): """ Structured Secant Method as introduced by [Dennis et al 1989](https://doi.org/10.1007/BF00962795), which is compatible with BFGS, DFP update schemes. This scheme only works with a function that returns (res, sres) """
[docs] def update( self, s: np.ndarray, y: np.ndarray, r: np.ndarray, hess: np.ndarray, yb: np.ndarray, ) -> None: # B^S = A + C(x_+) Bs = hess + self.A # y^S = y^# + C(x_+)*s ys = yb + hess.dot(s) # Equation (13) self._structured_diff = broyden_class_update( ys, s, Bs, phi=self.phi, enforce_curv_cond=self.enforce_curv_cond ) self.A += self._structured_diff # B_+ = C(x_+) + A + BFGS update A (=A_+) self._update_hess_and_store_diff(hess + self.A)
[docs]class TSSM(StructuredApproximation): """ Totally Structured Secant Method as introduced by [Huschens 1994](https://doi.org/10.1137/0804005), which uses a self-adjusting update method for the second order term. This scheme only works with a function that returns (res, sres) """
[docs] def update( self, s: np.ndarray, y: np.ndarray, r: np.ndarray, hess: np.ndarray, yb: np.ndarray, ) -> None: # Equation (2.7) Bs = hess + norm(r) * self.A # Equation (2.6) ys = hess.dot(s) + yb # Equation (2.10) self._structured_diff = broyden_class_update( ys, s, Bs, phi=self.phi, enforce_curv_cond=self.enforce_curv_cond ) / norm(r) self.A += self._structured_diff # Equation (2.9) self._update_hess_and_store_diff(hess + norm(r) * self.A)
[docs]class GNSBFGS(StructuredApproximation):
[docs] def __init__( self, hybrid_tol: float = 1e-6, enforce_curv_cond: bool = True ): """ Hybrid Gauss-Newton Structured BFGS method as introduced by [Zhou & Chen 2010](https://doi.org/10.1137/090748470), which combines ideas of hybrid switching methods and structured secant methods. This scheme only works with a function that returns (res, sres) :parameter hybrid_tol: switching tolerance that controls switching between update methods """ self.hybrid_tol: float = hybrid_tol super(GNSBFGS, self).__init__( phi=0.0, enforce_curv_cond=enforce_curv_cond )
[docs] def update( self, s: np.ndarray, y: np.ndarray, r: np.ndarray, hess: np.ndarray, yb: np.ndarray, ) -> None: # Equation (2.1) ratio = yb.T.dot(s) / s.dot(s) if ratio > self.hybrid_tol: # Equation (2.3) self._structured_diff = broyden_class_update( yb, s, self.A, phi=self.phi, enforce_curv_cond=self.enforce_curv_cond, ) self.A += self._structured_diff # Equation (2.2) self._update_hess_and_store_diff(hess + self.A) else: # Equation (2.2) self._structured_diff = np.zeros_like(self.A) self._update_hess_and_store_diff(hess + norm(r) * np.eye(len(y)))
[docs]def broyden_class_update( y: np.ndarray, s: np.ndarray, mat: np.ndarray, phi: float = 0.0, enforce_curv_cond: bool = True, ) -> np.ndarray: """ Scale free implementation of the broyden class update scheme. :param y: difference in gradient :param s: search direction in previous step :param mat: current hessian approximation :param phi: convex combination parameter, interpolates between BFGS (phi=0) and DFP (phi=1). :parameter enforce_curv_cond: boolean that controls whether the employed broyden class update should attempt to preserve positive definiteness. If set to True, updates from steps that violate the curvature condition will be discarded. """ u = mat.dot(s) c = u.T.dot(s) b = y.T.dot(s) if b <= 0 and enforce_curv_cond: return np.zeros_like(mat) update = np.outer(y, y.T) / b - np.outer(u, u.T) / c if phi != 0.0: v = y / b - u / c update += phi * c * np.outer(v, v.T) return update