Source code for fides.subproblem

"""
Subproblem Solvers
------------------
This module provides the machinery to solve 1- and N-dimensional
trust-region subproblems.
"""

import numpy as np
from numpy.linalg import norm

from scipy import linalg
from scipy.optimize import newton, brentq

from typing import Tuple

from .logging import logger


[docs]def solve_1d_trust_region_subproblem(B: np.ndarray, g: np.ndarray, s: np.ndarray, delta: float, s0: np.ndarray) -> np.ndarray: """ Solves the special case of a one-dimensional subproblem :param B: Hessian of the quadratic subproblem :param g: Gradient of the quadratic subproblem :param s: Vector defining the one-dimensional search direction :param delta: Norm boundary for the solution of the quadratic subproblem :param s0: reference point from where search is started, also counts towards norm of step :return: Proposed step-length """ if delta == 0.0: return delta * np.ones((1,)) a = 0.5 * B.dot(s).dot(s) if not isinstance(a, float): a = a[0, 0] b = s.T.dot(g) minq = - b / (2 * a) if a > 0 and norm(minq * s + s0) <= delta: # interior solution tau = minq else: nrms0 = norm(s0) if nrms0 == 0: tau = - delta * np.sign(b) elif nrms0 >= delta: tau = 0 else: tau = brentq(lambda q: 1/norm(q * s + s0) - 1/delta, a=0, b=2*delta, xtol=1e-12, maxiter=100) return tau * np.ones((1,))
[docs]def solve_nd_trust_region_subproblem(B: np.ndarray, g: np.ndarray, delta: float) -> Tuple[np.ndarray, str]: r""" This function exactly solves the n-dimensional subproblem. :math:`argmin_s\{s^T B s + s^T g = 0: ||s|| <= \Delta, s \in \mathbb{ R}^n\}` The solution to is characterized by the equation :math:`-(B + \lambda I)s = g`. If B is positive definite, the solution can be obtained by :math:`\lambda = 0`$` if :math:`Bs = -g` satisfies :math:`||s|| <= \Delta`. If B is indefinite or :math:`Bs = -g` satisfies :math:`||s|| > \Delta` and an approppriate :math:`\lambda` has to be identified via 1D rootfinding of the secular equation :math:`\phi(\lambda) = \frac{1}{||s(\lambda)||} - \frac{1}{\Delta} = 0` with :math:`s(\lambda)` computed according to an eigenvalue decomposition of B. The eigenvalue decomposition, although being more expensive than a cholesky decomposition, has the advantage that eigenvectors are invariant to changes in :math:`\lambda` and eigenvalues are linear in :math:`\lambda`, so factorization only has to be performed once. We perform the linesearch via Newton's algorithm and Brent-Q as fallback. The hard case is treated seperately and serves as general fallback. :param B: Hessian of the quadratic subproblem :param g: Gradient of the quadratic subproblem :param delta: Norm boundary for the solution of the quadratic subproblem :return: s: Selected step, step_type: Type of solution that was obtained """ if delta == 0: return np.zeros(g.shape), 'zero' # See Nocedal & Wright 2006 for details # INITIALIZATION # instead of a cholesky factorization, we go with an eigenvalue # decomposition, which works pretty well for n=2 eigvals, eigvecs = linalg.eig(B) eigvals = np.real(eigvals) eigvecs = np.real(eigvecs) w = - eigvecs.T.dot(g) jmin = eigvals.argmin() mineig = eigvals[jmin] # since B symmetric eigenvecs V are orthonormal # B + lambda I = V * (E + lambda I) * V.T # inv(B + lambda I) = V * inv(E + lambda I) * V.T # w = V.T * g # s(lam) = V * w./(eigvals + lam) # ds(lam) = - V * w./((eigvals + lam)**2) # \phi(lam) = 1/||s(lam)|| - 1/delta # \phi'(lam) = - s(lam).T*ds(lam)/||s(lam)||^3 # POSITIVE DEFINITE if mineig > np.sqrt(np.spacing(1)): # positive definite s = np.real(slam(0, w, eigvals, eigvecs)) # s = - self.cB\self.cg_hat if norm(s) <= delta + np.sqrt(np.spacing(1)): # CASE 0 logger.debug('Interior subproblem solution') return s, 'posdef' else: laminit = 0 else: # add a little wiggle room so we don't end up at the pole laminit = -mineig + np.sqrt(np.spacing(1)) # INDEFINITE CASE ( # note that this includes what Nocedal calls the "hard case" but with # ||s|| > delta, so the provided formula is not applicable, # the respective w should be close to 0 anyways if secular(laminit, w, eigvals, eigvecs, delta) < 0: maxiter = 100 try: r = newton(secular, laminit, dsecular, tol=1e-12, maxiter=maxiter, args=(w, eigvals, eigvecs, delta),) s = slam(r, w, eigvals, eigvecs) if norm(s) <= delta + 1e-12: logger.debug('Found boundary subproblem solution via newton') return s, 'indef' except RuntimeError: pass try: xa = laminit xb = (laminit + np.sqrt(np.spacing(1))) * 10 # search to the right for a change of sign while secular(xb, w, eigvals, eigvecs, delta) < 0 and \ maxiter > 0: xa = xb xb = xb * 10 maxiter -= 1 if maxiter > 0: r = brentq(secular, xa, xb, xtol=1e-12, maxiter=maxiter, args=(w, eigvals, eigvecs, delta)) s = slam(r, w, eigvals, eigvecs) if norm(s) <= delta + np.sqrt(np.spacing(1)): logger.debug( 'Found boundary subproblem solution via brentq' ) return s, 'indef' except RuntimeError: pass # may end up here due to ill-conditioning, treat as hard case # HARD CASE (gradient is orthogonal to eigenvector to smallest eigenvalue) w[(eigvals-mineig) == 0] = 0 s = slam(-mineig, w, eigvals, eigvecs) # we know that ||s(lam) + sigma*v_jmin|| = delta, since v_jmin is # orthonormal, we can just substract the difference in norm to get # the right length. sigma = np.sqrt(max(delta ** 2 - norm(s) ** 2, 0)) s = s + sigma * eigvecs[:, jmin] logger.debug('Found boundary 2D subproblem solution via hard case') return s, 'hard'
[docs]def slam(lam: float, w: np.ndarray, eigvals: np.ndarray, eigvecs: np.ndarray) -> np.ndarray: r""" Computes the solution :math:`s(\lambda)` as subproblem solution according to :math:`-(B + \lambda I)s = g` :param lam: :math:`\lambda` :param w: precomputed eigenvector coefficients for -g :param eigvals: precomputed eigenvalues of B :param eigvecs: precomputed eigenvectors of B :return: :math:`s(\lambda)` """ c = w.copy() el = eigvals + lam c[el != 0] /= el[el != 0] return eigvecs.dot(c)
[docs]def dslam(lam: float, w: np.ndarray, eigvals: np.ndarray, eigvecs: np.ndarray): r""" Computes the derivative of the solution :math:`s(\lambda)` with respect to lambda, where :math:`s` is the ubproblem solution according to :math:`-(B + \lambda I)s = g` :param lam: :math:`\lambda` :param w: precomputed eigenvector coefficients for -g :param eigvals: precomputed eigenvalues of B :param eigvecs: precomputed eigenvectors of B :return: :math:`\frac{\partial s(\lambda)}{\partial \lambda}` """ c = w.copy() el = eigvals + lam c[el != 0] /= - np.power(el[el != 0], 2) c[(el == 0) & (c != 0)] = np.inf return eigvecs.dot(c)
[docs]def secular(lam: float, w: np.ndarray, eigvals: np.ndarray, eigvecs: np.ndarray, delta: float): r""" Secular equation :math:`\phi(\lambda) = \frac{1}{||s||} - \frac{1}{\Delta}` Subproblem solutions are given by the roots of this equation :param lam: :math:`\lambda` :param w: precomputed eigenvector coefficients for -g :param eigvals: precomputed eigenvalues of B :param eigvecs: precomputed eigenvectors of B :param delta: trust region radius :math:`\Delta` :return: :math:`\phi(\lambda)` """ if lam < -np.min(eigvals): return np.inf # safeguard to implement boundary s = slam(lam, w, eigvals, eigvecs) sn = norm(s) if sn > 0: return 1 / sn - 1 / delta else: return np.inf
[docs]def dsecular(lam: float, w: np.ndarray, eigvals: np.ndarray, eigvecs: np.ndarray, delta: float): r""" Derivative of the secular equation :math:`\phi(\lambda) = \frac{1}{||s||} - \frac{1}{\Delta}` with respect to :math:`\lambda` :param lam: :math:`\lambda` :param w: precomputed eigenvector coefficients for -g :param eigvals: precomputed eigenvalues of B :param eigvecs: precomputed eigenvectors of B :param delta: trust region radius :math:`\Delta` :return: :math:`\frac{\partial \phi(\lambda)}{\partial \lambda}` """ s = slam(lam, w, eigvals, eigvecs) ds = dslam(lam, w, eigvals, eigvecs) sn = norm(s) if sn > 0: return - s.T.dot(ds) / (norm(s) ** 3) else: return np.inf