Source code for kona.linalg.matrices.hessian.lagrangian_hessian

from kona.linalg.matrices.hessian.basic import BaseHessian

[docs]class LagrangianHessian(BaseHessian): """ Matrix object for the Hessian block of the reduced KKT matrix. Uses the same 2nd order adjoint formulation as ReducedKKTMatrix, but only for the diagonal Hessian block, :math:`\mathsf{W} = \\nabla^2_x \\mathcal{L}`. If slack terms are present, it will also perform a product with the slack derivative of the Lagrangian. """ def __init__(self, vector_factories, optns=None): super(LagrangianHessian, self).__init__(vector_factories, optns) # request vector allocation self.primal_factory.request_num_vectors(3) self.state_factory.request_num_vectors(6) if self.eq_factory is not None: self.eq_factory.request_num_vectors(4) # set misc flags self._approx = False self._transposed = False self._allocated = False # get 2nd order adjoint tolerances self.product_tol = get_opt(self.optns, 1e-6, 'product_tol') # initialize the internal krylov method krylov_optns = { 'out_file' : get_opt(self.optns, 'kona_tangent_stcg.dat', 'out_file'), 'subspace_size' : get_opt(self.optns, 20, 'subspace_size'), 'proj_cg' : True, 'check_res' : get_opt(self.optns, True, 'check_res'), 'rel_tol' : get_opt(self.optns, 1e-3, 'rel_tol'), } self.krylov = STCG( self.primal_factory, optns=krylov_optns, dual_factory=self.ineq_factory) @property def approx(self): self._approx = True return self
[docs] def set_projector(self, proj_cg): if isinstance(proj_cg, AugmentedKKTMatrix): self.proj_cg = proj_cg else: raise TypeError('Invalid null-space projector!')
[docs] def linearize(self, X, at_state, at_adjoint, obj_scale=1.0, cnstr_scale=1.0): # store references to the evaluation point if isinstance(X.primal, CompositePrimalVector): self.at_design = X.primal.design self.at_slack = X.primal.slack else: self.at_design = X.primal self.at_slack = None self.at_dual = X.dual self.at_state = at_state self.at_adjoint = at_adjoint # if this is the first linearization, produce some work vectors if not self._allocated: self.design_work = self.primal_factory.generate() self.reduced_grad = self.primal_factory.generate() self.pert_design = self.primal_factory.generate() self.state_work = self.state_factory.generate() self.adjoint_work = self.state_factory.generate() self.forward_adjoint = self.state_factory.generate() self.reverse_adjoint = self.state_factory.generate() self.adjoint_res = self.state_factory.generate() self.pert_state = self.state_factory.generate() if self.eq_factory is not None and self.ineq_factory is not None: self.dual_in = CompositeDualVector( self.eq_factory.generate(), self.ineq_factory.generate()) self.dual_out = CompositeDualVector( self.eq_factory.generate(), self.ineq_factory.generate()) self.dual_work = CompositeDualVector( self.eq_factory.generate(), self.ineq_factory.generate()) self.slack_term = self.ineq_factory.generate() elif self.eq_factory is not None: self.dual_in = self.eq_factory.generate() self.dual_out = self.eq_factory.generate() self.dual_work = self.eq_factory.generate() self.slack_term = None elif self.ineq_factory is not None: self.dual_in = self.ineq_factory.generate() self.dual_out = self.ineq_factory.generate() self.dual_work = self.ineq_factory.generate() self.slack_term = self.ineq_factory.generate() else: self.dual_work = None self.slack_term = None self.dual_in = None self.dual_out = None self._allocated = True # reset radius self.radius = None # store scales self.obj_scale = obj_scale self.cnstr_scale = cnstr_scale # compute adjoint residual at the linearization self.adjoint_res.equals_objective_partial( self.at_design, self.at_state, scale=self.obj_scale) dRdU(self.at_design, self.at_state).T.product( self.at_adjoint, self.state_work) self.adjoint_res.plus(self.state_work) dCdU(self.at_design, self.at_state).T.product( self.at_dual, self.state_work) self.state_work.times(self.cnstr_scale) self.adjoint_res.plus(self.state_work) # compute reduced gradient at the linearization self.reduced_grad.equals_objective_partial( self.at_design, self.at_state, scale=self.obj_scale) dRdX(self.at_design, self.at_state).T.product( self.at_adjoint, self.design_work) self.reduced_grad.plus(self.design_work) dCdX(self.at_design, self.at_state).T.product( self.at_dual, self.design_work) self.design_work.times(self.cnstr_scale) self.reduced_grad.plus(self.design_work) # if slacks exist, compute the slack term if self.at_slack is not None: self.slack_term.equals(self.at_slack) self.slack_term.pow(-1.) self.slack_term.times(self.at_dual)
[docs] def multiply_W(self, in_vec, out_vec): # calculate the FD perturbation for the design epsilon_fd = calc_epsilon(self.at_design.norm2, in_vec.norm2) # perturb the design variables self.pert_design.equals_ax_p_by( 1.0, self.at_design, epsilon_fd, in_vec) # compute partial (d^2 L/dx^2)*in_vec and store in out_vec out_vec.equals_objective_partial( self.pert_design, self.at_state, scale=self.obj_scale) dRdX(self.pert_design, self.at_state).T.product( self.at_adjoint, self.design_work) out_vec.plus(self.design_work) dCdX(self.pert_design, self.at_state).T.product( self.at_dual, self.design_work) self.design_work.times(self.cnstr_scale) out_vec.plus(self.design_work) out_vec.minus(self.reduced_grad) out_vec.divide_by(epsilon_fd) # build RHS for first adjoint system and solve for forward adjoint dRdX(self.at_design, self.at_state).product(in_vec, self.state_work) self.state_work.times(-1.) if self._approx: dRdU(self.at_design, self.at_state).precond( self.state_work, self.forward_adjoint) else: rel_tol = self.product_tol/max(self.state_work.norm2, EPS) dRdU(self.at_design, self.at_state).solve( self.state_work, self.forward_adjoint, rel_tol=rel_tol) # compute the FD perturbation for the states epsilon_fd = calc_epsilon( self.at_state.norm2, self.forward_adjoint.norm2) self.pert_state.equals_ax_p_by( 1.0, self.at_state, epsilon_fd, self.forward_adjoint) # build RHS for second adjoint system # STEP 1: perturb design, evaluate adjoint residual, take difference self.adjoint_work.equals_objective_partial( self.pert_design, self.at_state, scale=self.obj_scale) dRdU(self.pert_design, self.at_state).T.product( self.at_adjoint, self.state_work) self.adjoint_work.plus(self.state_work) dCdU(self.pert_design, self.at_state).T.product( self.at_dual, self.state_work) self.state_work.times(self.cnstr_scale) self.adjoint_work.plus(self.state_work) self.adjoint_work.minus(self.adjoint_res) self.adjoint_work.divide_by(epsilon_fd) # STEP 2: perturb state, evaluate adjoint residual, take difference self.reverse_adjoint.equals_objective_partial( self.at_design, self.pert_state, scale=self.obj_scale) dRdU(self.at_design, self.pert_state).T.product( self.at_adjoint, self.state_work) self.reverse_adjoint.plus(self.state_work) dCdU(self.at_design, self.pert_state).T.product( self.at_dual, self.state_work) self.state_work.times(self.cnstr_scale) self.reverse_adjoint.plus(self.state_work) self.reverse_adjoint.minus(self.adjoint_res) self.reverse_adjoint.divide_by(epsilon_fd) # STEP 3: assemble the final RHS and solve the adjoint system self.adjoint_work.plus(self.reverse_adjoint) self.adjoint_work.times(-1.) if self._approx: dRdU(self.at_design, self.at_state).T.precond( self.adjoint_work, self.reverse_adjoint) else: rel_tol = self.product_tol/max(self.adjoint_work.norm2, EPS) dRdU(self.at_design, self.at_state).T.solve( self.adjoint_work, self.reverse_adjoint, rel_tol=rel_tol) # now we can assemble the remaining pieces of the Hessian-vector product # apply reverse_adjoint to design part of the jacobian dRdX(self.at_design, self.at_state).T.product( self.reverse_adjoint, self.design_work) out_vec.plus(self.design_work) # apply the Lagrangian adjoint to the cross-derivative part of Hessian self.design_work.equals_objective_partial( self.at_design, self.pert_state, scale=self.obj_scale) dRdX(self.at_design, self.pert_state).T.product( self.at_adjoint, self.pert_design) self.design_work.plus(self.pert_design) dCdX(self.at_design, self.pert_state).T.product( self.at_dual, self.pert_design) self.pert_design.times(self.cnstr_scale) self.design_work.plus(self.pert_design) self.design_work.minus(self.reduced_grad) self.design_work.divide_by(epsilon_fd) out_vec.plus(self.design_work) # reset the approx and transpose flags at the end self._approx = False
[docs] def multiply_slack(self, in_vec, out_vec): out_vec.equals(in_vec) out_vec.times(self.slack_term)
[docs] def product(self, in_vec, out_vec): # do some aliasing if self.at_slack is not None: in_design = in_vec.design in_slack = in_vec.slack out_design = out_vec.design out_slack = out_vec.slack else: in_design = in_vec in_slack = None out_design = out_vec out_slack = None # multiply the design component self.multiply_W(in_design, out_design) # multiply the slack component if self.at_slack is not None: self.multiply_slack(in_slack, out_slack)
[docs] def precond(self, in_vec, out_vec): in_kkt = ReducedKKTVector(in_vec, self.dual_in) in_kkt.dual.equals(0.0) out_kkt = ReducedKKTVector(out_vec, self.dual_out) self.proj_cg.solve(in_kkt, out_kkt, rel_tol=1e-4)
[docs] def solve(self, rhs, solution, rel_tol=None): if self.radius is None: raise ValueError('Trust radius not set!') if self.proj_cg is None: raise ValueError('CG projection preconditioner not set!') # set krylov relative tolerance and radius self.krylov.radius = self.radius if rel_tol is not None: tmp_rel_tol = self.krylov.rel_tol self.krylov.rel_tol = rel_tol # solve the system solution.equals(0.0) self.pred, self.trust_active = \ self.krylov.solve(self.product, rhs, solution, self.precond) # reset the krylov relative tolerance if rel_tol is not None: self.krylov.rel_tol = tmp_rel_tol
# imports here to prevent circular errors from kona.options import get_opt from kona.linalg.vectors.common import DesignVector, StateVector from kona.linalg.vectors.composite import DualVectorEQ, DualVectorINEQ from kona.linalg.vectors.composite import ReducedKKTVector from kona.linalg.vectors.composite import CompositePrimalVector from kona.linalg.vectors.composite import CompositeDualVector from kona.linalg.matrices.common import dRdX, dRdU, dCdX, dCdU from kona.linalg.matrices.hessian import AugmentedKKTMatrix from kona.linalg.solvers.util import calc_epsilon, EPS from kona.linalg.solvers.krylov import STCG