Source code for fides.subproblem

"""
Subproblem Solvers
------------------
This module provides the machinery to solve 1- and N-dimensional
trust-region subproblems.
"""

import logging
import math
from typing import Tuple, Union

import numpy as np
from numpy.linalg import norm
from scipy import linalg
from scipy.optimize import brentq, newton


[docs]def quadratic_form(Q: np.ndarray, p: np.ndarray, x: np.ndarray) -> float: """ Computes the quadratic form :math:`x^TQx + x^Tp` :param Q: Matrix :param p: Vector :param x: Input :return: Value of form """ return 0.5 * x.T.dot(Q).dot(x) + p.T.dot(x)
[docs]def solve_1d_trust_region_subproblem( B: np.ndarray, g: np.ndarray, s: np.ndarray, delta: float, s0: np.ndarray ) -> np.ndarray: """ Solves the special case of a one-dimensional subproblem :param B: Hessian of the quadratic subproblem :param g: Gradient of the quadratic subproblem :param s: Vector defining the one-dimensional search direction :param delta: Norm boundary for the solution of the quadratic subproblem :param s0: reference point from where search is started, also counts towards norm of step :return: Proposed step-length """ if delta == 0.0: return delta * np.ones((1,)) if np.array_equal(s, np.zeros_like(s)): return np.zeros((1,)) a = 0.5 * B.dot(s).dot(s) if not isinstance(a, float): a = a[0, 0] b = s.T.dot(B.dot(s0) + g) minq = -b / (2 * a) if a > 0 and norm(minq * s + s0) <= delta: # interior solution tau = minq else: tau = get_1d_trust_region_boundary_solution(B, g, s, s0, delta) return tau * np.ones((1,))
[docs]def get_1d_trust_region_boundary_solution(B, g, s, s0, delta): a = np.dot(s, s) b = 2 * np.dot(s0, s) c = np.dot(s0, s0) - delta**2 aux = b + math.copysign(np.sqrt(b**2 - 4 * a * c), b) ts = [-aux / (2 * a), -2 * c / aux] qs = [quadratic_form(B, g, s0 + t * s) for t in ts] return ts[np.argmin(qs)]
[docs]def solve_nd_trust_region_subproblem( B: np.ndarray, g: np.ndarray, delta: float, logger: Union[logging.Logger, None] = None, ) -> Tuple[np.ndarray, str]: r""" This function exactly solves the n-dimensional subproblem. :math:`argmin_s\{s^T B s + s^T g = 0: ||s|| <= \Delta, s \in \mathbb{ R}^n\}` The solution is characterized by the equation :math:`-(B + \lambda I)s = g`. If B is positive definite, the solution can be obtained by :math:`\lambda = 0`$` if :math:`Bs = -g` satisfies :math:`||s|| <= \Delta`. If B is indefinite or :math:`Bs = -g` satisfies :math:`||s|| > \Delta` and an appropriate :math:`\lambda` has to be identified via 1D rootfinding of the secular equation :math:`\phi(\lambda) = \frac{1}{||s(\lambda)||} - \frac{1}{\Delta} = 0` with :math:`s(\lambda)` computed according to an eigenvalue decomposition of B. The eigenvalue decomposition, although being more expensive than a cholesky decomposition, has the advantage that eigenvectors are invariant to changes in :math:`\lambda` and eigenvalues are linear in :math:`\lambda`, so factorization only has to be performed once. We perform the linesearch via Newton's algorithm and Brent-Q as fallback. The hard case is treated separately and serves as general fallback. :param B: Hessian of the quadratic subproblem :param g: Gradient of the quadratic subproblem :param delta: Norm boundary for the solution of the quadratic subproblem :param logger: Logger instance to be used for logging :return: s: Selected step, step_type: Type of solution that was obtained """ if logger is None: logger = logging.getLogger('fides') if delta == 0: return np.zeros(g.shape), 'zero' # See Nocedal & Wright 2006 for details # INITIALIZATION # instead of a cholesky factorization, we go with an eigenvalue # decomposition, which works pretty well for n=2 eigvals, eigvecs = linalg.eig(B) eigvals = np.real(eigvals) eigvecs = np.real(eigvecs) w = -eigvecs.T.dot(g) jmin = eigvals.argmin() mineig = eigvals[jmin] # since B symmetric eigenvecs V are orthonormal # B + lambda I = V * (E + lambda I) * V.T # inv(B + lambda I) = V * inv(E + lambda I) * V.T # w = V.T * g # s(lam) = V * w./(eigvals + lam) # ds(lam) = - V * w./((eigvals + lam)**2) # \phi(lam) = 1/||s(lam)|| - 1/delta # \phi'(lam) = - s(lam).T*ds(lam)/||s(lam)||^3 # POSITIVE DEFINITE if mineig > 0: # positive definite s = np.real(slam(0, w, eigvals, eigvecs)) # s = - self.cB\self.cg_hat if norm(s) <= delta + np.sqrt(np.spacing(1)): # CASE 0 logger.debug('Interior subproblem solution') return s, 'posdef' else: laminit = 0 else: laminit = -mineig # INDEFINITE CASE # note that this includes what Nocedal calls the "hard case" but with # ||s|| > delta, so the provided formula is not applicable, # the respective w should be close to 0 anyways if secular(laminit, w, eigvals, eigvecs, delta) < 0: maxiter = 100 try: r = newton( secular, laminit, dsecular, tol=1e-12, maxiter=maxiter, args=(w, eigvals, eigvecs, delta), ) s = slam(r, w, eigvals, eigvecs) if norm(s) <= delta + 1e-12: logger.debug('Found boundary subproblem solution via newton') return s, 'indef' except RuntimeError: pass try: xa = laminit xb = (laminit + np.sqrt(np.spacing(1))) * 10 # search to the right for a change of sign while secular(xb, w, eigvals, eigvecs, delta) < 0 and maxiter > 0: xa = xb xb = xb * 10 maxiter -= 1 if maxiter > 0: r = brentq( secular, xa, xb, xtol=1e-12, maxiter=maxiter, args=(w, eigvals, eigvecs, delta), ) s = slam(r, w, eigvals, eigvecs) if norm(s) <= delta + np.sqrt(np.spacing(1)): logger.debug( 'Found boundary subproblem solution via brentq' ) return s, 'indef' except RuntimeError: pass # may end up here due to ill-conditioning, treat as hard case # HARD CASE (gradient is orthogonal to eigenvector to smallest eigenvalue) w[(eigvals - mineig) == 0] = 0 s = slam(-mineig, w, eigvals, eigvecs) # we know that ||s(lam) + sigma*v_jmin|| = delta, since v_jmin is # orthonormal, we can just substract the difference in norm to get # the right length. sigma = np.sqrt(max(delta**2 - norm(s) ** 2, 0)) s = s + sigma * eigvecs[:, jmin] logger.debug('Found boundary 2D subproblem solution via hard case') return s, 'hard'
[docs]def slam( lam: float, w: np.ndarray, eigvals: np.ndarray, eigvecs: np.ndarray ) -> np.ndarray: r""" Computes the solution :math:`s(\lambda)` as subproblem solution according to :math:`-(B + \lambda I)s = g` :param lam: :math:`\lambda` :param w: precomputed eigenvector coefficients for -g :param eigvals: precomputed eigenvalues of B :param eigvecs: precomputed eigenvectors of B :return: :math:`s(\lambda)` """ c = w.copy() el = eigvals + lam c[el != 0] /= el[el != 0] return eigvecs.dot(c)
[docs]def dslam(lam: float, w: np.ndarray, eigvals: np.ndarray, eigvecs: np.ndarray): r""" Computes the derivative of the solution :math:`s(\lambda)` with respect to lambda, where :math:`s` is the subproblem solution according to :math:`-(B + \lambda I)s = g` :param lam: :math:`\lambda` :param w: precomputed eigenvector coefficients for -g :param eigvals: precomputed eigenvalues of B :param eigvecs: precomputed eigenvectors of B :return: :math:`\frac{\partial s(\lambda)}{\partial \lambda}` """ c = w.copy() el = eigvals + lam c[el != 0] /= -np.power(el[el != 0], 2) c[(el == 0) & (c != 0)] = np.inf return eigvecs.dot(c)
[docs]def secular( lam: float, w: np.ndarray, eigvals: np.ndarray, eigvecs: np.ndarray, delta: float, ): r""" Secular equation :math:`\phi(\lambda) = \frac{1}{||s||} - \frac{1}{\Delta}` Subproblem solutions are given by the roots of this equation :param lam: :math:`\lambda` :param w: precomputed eigenvector coefficients for -g :param eigvals: precomputed eigenvalues of B :param eigvecs: precomputed eigenvectors of B :param delta: trust region radius :math:`\Delta` :return: :math:`\phi(\lambda)` """ if lam < -np.min(eigvals): return np.inf # safeguard to implement boundary s = slam(lam, w, eigvals, eigvecs) sn = norm(s) if sn > 0: return 1 / sn - 1 / delta else: return np.inf
[docs]def dsecular( lam: float, w: np.ndarray, eigvals: np.ndarray, eigvecs: np.ndarray, delta: float, ): r""" Derivative of the secular equation :math:`\phi(\lambda) = \frac{1}{||s||} - \frac{1}{\Delta}` with respect to :math:`\lambda` :param lam: :math:`\lambda` :param w: precomputed eigenvector coefficients for -g :param eigvals: precomputed eigenvalues of B :param eigvecs: precomputed eigenvectors of B :param delta: trust region radius :math:`\Delta` :return: :math:`\frac{\partial \phi(\lambda)}{\partial \lambda}` """ s = slam(lam, w, eigvals, eigvecs) ds = dslam(lam, w, eigvals, eigvecs) sn = norm(s) if sn > 0: return -s.T.dot(ds) / (norm(s) ** 3) else: return np.inf