Source code for kona.linalg.matrices.hessian.reduced_hessian

from kona.linalg.matrices.hessian.basic import BaseHessian, QuasiNewtonApprox

[docs]class ReducedHessian(BaseHessian): """ Reduced-space approximation of the Hessian-vector product using a 2nd order adjoint formulation. .. note:: Insert inexact-Hessian paper reference here Attributes ---------- product_fact : float Solution tolerance for 2nd order adjoints. lamb : float ??? scale : float ??? quasi_newton : QuasiNewtonApproximation -like QN Hessian object to be used as preconditioner. """ def __init__(self, vector_factories, optns=None): super(ReducedHessian, self).__init__(vector_factories, optns) # read reduced options self.product_tol = get_opt(self.optns, 1e-6, 'product_tol') self.lamb = get_opt(self.optns, 0.0, 'lambda') self.scale = get_opt(self.optns, 1.0, 'scale') self.nu = get_opt(self.optns, 0.95, 'nu') self.dynamic_tol = get_opt(self.optns, False, 'dynamic_tol') # preconditioner and solver settings self.precond = get_opt(self.optns, None, 'precond') self.quasi_newton = None self.krylov = None # reset the linearization flag self._allocated = False # request vector memory for future allocation self.primal_factory.request_num_vectors(4) self.state_factory.request_num_vectors(7) # initialize abtract jacobians self.dRdX = dRdX() self.dRdU = dRdU()
[docs] def set_krylov_solver(self, krylov_solver): if isinstance(krylov_solver, KrylovSolver): self.krylov = krylov_solver else: raise TypeError('Solver is not a valid KrylovSolver')
[docs] def set_quasi_newton(self, quasi_newton): if isinstance(quasi_newton, QuasiNewtonApprox): self.quasi_newton = quasi_newton else: raise TypeError('Object is not a valid QuasiNewtonApprox')
[docs] def linearize(self, at_design, at_state, at_adjoint, scale=1.0): """ An abstracted "linearization" method for the matrix. This method does not actually factor any real matrices. It also does not perform expensive linear or non-linear solves. It is used to update internal vector references and perform basic calculations using only cheap matrix-vector products. Parameters ---------- at_design : DesignVector Design point at which the product is evaluated. at_state : StateVector State point at which the product is evaluated. at_dual : DualVector Lagrange multipliers at which the product is evaluated. at_adjoint : StateVector 1st order adjoint variables at which the product is evaluated. """ # store the linearization point self.at_design = at_design self.primal_norm = self.at_design.norm2 self.at_state = at_state self.state_norm = self.at_state.norm2 self.at_adjoint = at_adjoint self.scale = scale # if this is the first ever linearization... if not self._allocated: # generate state vectors self.adjoint_res = self.state_factory.generate() self.w_adj = self.state_factory.generate() self.lambda_adj = self.state_factory.generate() self.state_work = [] for i in xrange(4): self.state_work.append(self.state_factory.generate()) # generate primal vectors self.pert_design = self.primal_factory.generate() self.reduced_grad = self.primal_factory.generate() self.primal_work = [] for i in xrange(2): self.primal_work.append(self.primal_factory.generate()) self._allocated = True # compute adjoint residual at the linearization self.adjoint_res.equals_objective_partial( self.at_design, self.at_state, scale=self.scale) self.dRdU.linearize(self.at_design, self.at_state) self.dRdU.T.product(self.at_adjoint, self.state_work[0]) self.adjoint_res.plus(self.state_work[0]) # compute reduced gradient at the linearization self.reduced_grad.equals_objective_partial( self.at_design, self.at_state, scale=self.scale) self.dRdX.linearize(self.at_design, self.at_state) self.dRdX.T.product(self.at_adjoint, self.primal_work[0]) self.reduced_grad.plus(self.primal_work[0])
[docs] def product(self, in_vec, out_vec): """ Matrix-vector product for the reduced KKT system. Parameters ---------- in_vec : ReducedKKTVector Vector to be multiplied with the KKT matrix. out_vec : ReducedKKTVector Result of the operation. """ # calculate perturbation # epsilon_fd = calc_epsilon(self.primal_norm, in_vec.norm2) epsilon_fd = 1e-5 # perturb the design vector self.pert_design.equals_ax_p_by(1.0, self.at_design, epsilon_fd, in_vec) # compute total gradient at the perturbed design out_vec.equals_objective_partial(self.pert_design, self.at_state, scale=self.scale) self.dRdX.linearize(self.pert_design, self.at_state) self.dRdX.T.product(self.at_adjoint, self.primal_work[0]) out_vec.plus(self.primal_work[0]) # take the difference between perturbed and unperturbed gradient out_vec.minus(self.reduced_grad) # divide it by the perturbation out_vec.divide_by(epsilon_fd) # first adjoint system #################################### # build RHS self.dRdX.linearize(self.at_design, self.at_state) self.dRdX.product(in_vec, self.state_work[0]) self.state_work[0].times(-1.0) # solve the first 2nd order adjoint rel_tol = self.product_tol/max(self.state_work[0].norm2, EPS) self.dRdU.linearize(self.at_design, self.at_state) self.dRdU.solve( self.state_work[0], self.w_adj, rel_tol=rel_tol) # second adjoint system ##################################### # calculate total (dg/dx)^T*w using FD self.state_work[0].equals_objective_partial( self.pert_design, self.at_state, scale=self.scale) self.dRdU.linearize(self.pert_design, self.at_state) self.dRdU.T.product(self.at_adjoint, self.state_work[1]) self.state_work[0].plus(self.state_work[1]) self.state_work[0].minus(self.adjoint_res) self.state_work[0].divide_by(epsilon_fd) # multiply by -1 to use it as RHS self.state_work[0].times(-1.0) # perform state perturbation self.state_work[1].equals_ax_p_by( 1.0, self.at_state, epsilon_fd, self.w_adj) # calculate total (dS/du)^T*z using FD self.state_work[2].equals_objective_partial( self.at_design, self.state_work[1], scale=self.scale) self.state_work[3].equals_ax_p_by( -1./epsilon_fd, self.state_work[2], 1./epsilon_fd, self.adjoint_res) self.dRdU.linearize(self.at_design, self.state_work[1]) self.dRdU.T.product(self.at_adjoint, self.state_work[2]) self.state_work[3].equals_ax_p_by( 1., self.state_work[3], -1./epsilon_fd, self.state_work[2]) # assemble RHS self.state_work[0].plus(self.state_work[3]) # solve the second 2nd order adjoint rel_tol = self.product_tol/max(self.state_work[0].norm2, EPS) self.dRdU.linearize(self.at_design, self.at_state) self.dRdU.T.solve( self.state_work[0], self.lambda_adj, rel_tol=rel_tol) # assemble the Hessian-vector product using 2nd order adjoints ############################################################## # apply lambda_adj to the design part of the jacobian self.dRdX.linearize(self.at_design, self.at_state) self.dRdX.T.product(self.lambda_adj, self.primal_work[0]) out_vec.plus(self.primal_work[0]) # apply w_adj to the cross-derivative part of the jacobian self.primal_work[0].equals_objective_partial( self.at_design, self.state_work[1], scale=self.scale) self.dRdX.linearize(self.at_design, self.state_work[1]) self.dRdX.T.product(self.at_adjoint, self.primal_work[1]) self.primal_work[0].plus(self.primal_work[1]) self.primal_work[0].equals_ax_p_by( 1./epsilon_fd, self.primal_work[0], -1./epsilon_fd, self.reduced_grad) out_vec.plus(self.primal_work[0]) # update quasi-Newton method if necessary if self.quasi_newton is not None: self.quasi_newton.add_correction(in_vec, out_vec) # add globalization if necessary if self.lamb > numpy.finfo(float).eps: out_vec.equals_ax_p_by( 1.-self.lamb, out_vec, self.lamb*self.scale, in_vec)
[docs] def solve(self, rhs, solution, rel_tol=None): """ Solve the linear system defined by this matrix using the embedded krylov solver. Parameters ---------- rhs : DesignVector Right hand side vector for the system. solution : PrimalVector Solution of the system. rel_tol : float, optional Relative tolerance for the krylov solver. """ # make sure we have a krylov solver if self.krylov is None: raise AttributeError('krylov solver not set') # define the preconditioner if self.precond is not None: if self.quasi_newton is not None: precond = self.quasi_newton.solve else: raise AttributeError('preconditioner is specified but not set') else: eye = IdentityMatrix() precond = eye.product # update the solution tolerance if necessary if isinstance(rel_tol, float): self.krylov.rel_tol = rel_tol # trigger the solution return self.krylov.solve(self.product, rhs, solution, precond)
# imports here to prevent circular errors import numpy from kona.options import get_opt from kona.linalg.vectors.common import DesignVector, StateVector from kona.linalg.matrices.common import dRdX, dRdU, IdentityMatrix from kona.linalg.solvers.krylov.basic import KrylovSolver from kona.linalg.solvers.util import calc_epsilon, EPS