from kona.linalg.matrices.hessian.basic import BaseHessian, QuasiNewtonApprox
[docs]class ReducedHessian(BaseHessian):
"""
Reduced-space approximation of the Hessian-vector product using a 2nd
order adjoint formulation.
.. note::
Insert inexact-Hessian paper reference here
Attributes
----------
product_fact : float
Solution tolerance for 2nd order adjoints.
lamb : float
???
scale : float
???
quasi_newton : QuasiNewtonApproximation -like
QN Hessian object to be used as preconditioner.
"""
def __init__(self, vector_factories, optns=None):
super(ReducedHessian, self).__init__(vector_factories, optns)
# read reduced options
self.product_tol = get_opt(self.optns, 1e-6, 'product_tol')
self.lamb = get_opt(self.optns, 0.0, 'lambda')
self.scale = get_opt(self.optns, 1.0, 'scale')
self.nu = get_opt(self.optns, 0.95, 'nu')
self.dynamic_tol = get_opt(self.optns, False, 'dynamic_tol')
# preconditioner and solver settings
self.precond = get_opt(self.optns, None, 'precond')
self.quasi_newton = None
self.krylov = None
# reset the linearization flag
self._allocated = False
# request vector memory for future allocation
self.primal_factory.request_num_vectors(4)
self.state_factory.request_num_vectors(7)
# initialize abtract jacobians
self.dRdX = dRdX()
self.dRdU = dRdU()
[docs] def set_krylov_solver(self, krylov_solver):
if isinstance(krylov_solver, KrylovSolver):
self.krylov = krylov_solver
else:
raise TypeError('Solver is not a valid KrylovSolver')
[docs] def set_quasi_newton(self, quasi_newton):
if isinstance(quasi_newton, QuasiNewtonApprox):
self.quasi_newton = quasi_newton
else:
raise TypeError('Object is not a valid QuasiNewtonApprox')
[docs] def linearize(self, at_design, at_state, at_adjoint, scale=1.0):
"""
An abstracted "linearization" method for the matrix.
This method does not actually factor any real matrices. It also does
not perform expensive linear or non-linear solves. It is used to update
internal vector references and perform basic calculations using only
cheap matrix-vector products.
Parameters
----------
at_design : DesignVector
Design point at which the product is evaluated.
at_state : StateVector
State point at which the product is evaluated.
at_dual : DualVector
Lagrange multipliers at which the product is evaluated.
at_adjoint : StateVector
1st order adjoint variables at which the product is evaluated.
"""
# store the linearization point
self.at_design = at_design
self.primal_norm = self.at_design.norm2
self.at_state = at_state
self.state_norm = self.at_state.norm2
self.at_adjoint = at_adjoint
self.scale = scale
# if this is the first ever linearization...
if not self._allocated:
# generate state vectors
self.adjoint_res = self.state_factory.generate()
self.w_adj = self.state_factory.generate()
self.lambda_adj = self.state_factory.generate()
self.state_work = []
for i in xrange(4):
self.state_work.append(self.state_factory.generate())
# generate primal vectors
self.pert_design = self.primal_factory.generate()
self.reduced_grad = self.primal_factory.generate()
self.primal_work = []
for i in xrange(2):
self.primal_work.append(self.primal_factory.generate())
self._allocated = True
# compute adjoint residual at the linearization
self.adjoint_res.equals_objective_partial(
self.at_design, self.at_state, scale=self.scale)
self.dRdU.linearize(self.at_design, self.at_state)
self.dRdU.T.product(self.at_adjoint, self.state_work[0])
self.adjoint_res.plus(self.state_work[0])
# compute reduced gradient at the linearization
self.reduced_grad.equals_objective_partial(
self.at_design, self.at_state, scale=self.scale)
self.dRdX.linearize(self.at_design, self.at_state)
self.dRdX.T.product(self.at_adjoint, self.primal_work[0])
self.reduced_grad.plus(self.primal_work[0])
[docs] def product(self, in_vec, out_vec):
"""
Matrix-vector product for the reduced KKT system.
Parameters
----------
in_vec : ReducedKKTVector
Vector to be multiplied with the KKT matrix.
out_vec : ReducedKKTVector
Result of the operation.
"""
# calculate perturbation
# epsilon_fd = calc_epsilon(self.primal_norm, in_vec.norm2)
epsilon_fd = 1e-5
# perturb the design vector
self.pert_design.equals_ax_p_by(1.0, self.at_design, epsilon_fd, in_vec)
# compute total gradient at the perturbed design
out_vec.equals_objective_partial(self.pert_design, self.at_state,
scale=self.scale)
self.dRdX.linearize(self.pert_design, self.at_state)
self.dRdX.T.product(self.at_adjoint, self.primal_work[0])
out_vec.plus(self.primal_work[0])
# take the difference between perturbed and unperturbed gradient
out_vec.minus(self.reduced_grad)
# divide it by the perturbation
out_vec.divide_by(epsilon_fd)
# first adjoint system
####################################
# build RHS
self.dRdX.linearize(self.at_design, self.at_state)
self.dRdX.product(in_vec, self.state_work[0])
self.state_work[0].times(-1.0)
# solve the first 2nd order adjoint
rel_tol = self.product_tol/max(self.state_work[0].norm2, EPS)
self.dRdU.linearize(self.at_design, self.at_state)
self.dRdU.solve(
self.state_work[0], self.w_adj, rel_tol=rel_tol)
# second adjoint system
#####################################
# calculate total (dg/dx)^T*w using FD
self.state_work[0].equals_objective_partial(
self.pert_design, self.at_state, scale=self.scale)
self.dRdU.linearize(self.pert_design, self.at_state)
self.dRdU.T.product(self.at_adjoint, self.state_work[1])
self.state_work[0].plus(self.state_work[1])
self.state_work[0].minus(self.adjoint_res)
self.state_work[0].divide_by(epsilon_fd)
# multiply by -1 to use it as RHS
self.state_work[0].times(-1.0)
# perform state perturbation
self.state_work[1].equals_ax_p_by(
1.0, self.at_state, epsilon_fd, self.w_adj)
# calculate total (dS/du)^T*z using FD
self.state_work[2].equals_objective_partial(
self.at_design, self.state_work[1], scale=self.scale)
self.state_work[3].equals_ax_p_by(
-1./epsilon_fd, self.state_work[2], 1./epsilon_fd, self.adjoint_res)
self.dRdU.linearize(self.at_design, self.state_work[1])
self.dRdU.T.product(self.at_adjoint, self.state_work[2])
self.state_work[3].equals_ax_p_by(
1., self.state_work[3], -1./epsilon_fd, self.state_work[2])
# assemble RHS
self.state_work[0].plus(self.state_work[3])
# solve the second 2nd order adjoint
rel_tol = self.product_tol/max(self.state_work[0].norm2, EPS)
self.dRdU.linearize(self.at_design, self.at_state)
self.dRdU.T.solve(
self.state_work[0], self.lambda_adj, rel_tol=rel_tol)
# assemble the Hessian-vector product using 2nd order adjoints
##############################################################
# apply lambda_adj to the design part of the jacobian
self.dRdX.linearize(self.at_design, self.at_state)
self.dRdX.T.product(self.lambda_adj, self.primal_work[0])
out_vec.plus(self.primal_work[0])
# apply w_adj to the cross-derivative part of the jacobian
self.primal_work[0].equals_objective_partial(
self.at_design, self.state_work[1], scale=self.scale)
self.dRdX.linearize(self.at_design, self.state_work[1])
self.dRdX.T.product(self.at_adjoint, self.primal_work[1])
self.primal_work[0].plus(self.primal_work[1])
self.primal_work[0].equals_ax_p_by(
1./epsilon_fd, self.primal_work[0],
-1./epsilon_fd, self.reduced_grad)
out_vec.plus(self.primal_work[0])
# update quasi-Newton method if necessary
if self.quasi_newton is not None:
self.quasi_newton.add_correction(in_vec, out_vec)
# add globalization if necessary
if self.lamb > numpy.finfo(float).eps:
out_vec.equals_ax_p_by(
1.-self.lamb, out_vec, self.lamb*self.scale, in_vec)
[docs] def solve(self, rhs, solution, rel_tol=None):
"""
Solve the linear system defined by this matrix using the embedded
krylov solver.
Parameters
----------
rhs : DesignVector
Right hand side vector for the system.
solution : PrimalVector
Solution of the system.
rel_tol : float, optional
Relative tolerance for the krylov solver.
"""
# make sure we have a krylov solver
if self.krylov is None:
raise AttributeError('krylov solver not set')
# define the preconditioner
if self.precond is not None:
if self.quasi_newton is not None:
precond = self.quasi_newton.solve
else:
raise AttributeError('preconditioner is specified but not set')
else:
eye = IdentityMatrix()
precond = eye.product
# update the solution tolerance if necessary
if isinstance(rel_tol, float):
self.krylov.rel_tol = rel_tol
# trigger the solution
return self.krylov.solve(self.product, rhs, solution, precond)
# imports here to prevent circular errors
import numpy
from kona.options import get_opt
from kona.linalg.vectors.common import DesignVector, StateVector
from kona.linalg.matrices.common import dRdX, dRdU, IdentityMatrix
from kona.linalg.solvers.krylov.basic import KrylovSolver
from kona.linalg.solvers.util import calc_epsilon, EPS