Source code for kona.linalg.matrices.hessian.reduced_kkt

from kona.linalg.matrices.hessian.basic import BaseHessian

[docs]class ReducedKKTMatrix(BaseHessian):
    """
    Reduced approximation of the KKT matrix using a 2nd order adjoint
    formulation.

    For problems with only equality constraints, the KKT system is given as:

    .. math::
        \\begin{bmatrix}
        \\nabla_x^2 \\mathcal{L} && \\nabla_x c^T \\\\
        \\nabla_x c_{eq} && 0
        \\end{bmatrix}
        \\begin{bmatrix}
        \\Delta x \\\\
        \\Delta \\lambda
        \\end{bmatrix}
        =
        \\begin{bmatrix}
        -\\nabla_x \\mathcal{f} - \\lambda^T \\nabla_x c \\\\
        - c
        \\end{bmatrix}

    where :math:`\\mathcal{L}` is the Lagrangian defined as:

    .. math::
        \\mathcal{L}(x, u(x), \lambda) = F(x, u(x)) + \\lambda^T c(x, u(x))

    For problems with inequality constraints, slack variables :math:`s` are introduced 
    alongside a log-barrier term for non-negativity, such that the Lagrangian 
    :math:`\\mathcal{L}` becomes:

    .. math::
        \\mathcal{L}(x, u(x), \lambda) = F(x, u(x)) +
        \\lambda_{eq}^T c_{eq}(x, u(x)) +
        \\lambda_{ineq}^T \\left[c_{ineq}(x, u(x)) - s\\right] +
        \\frac{1}{2}\\mu\\sum_{i=1}^{n_{ineq}}ln(s_i)

    The inequality constrained KKT system is then defined as:

    .. math::
        \\begin{bmatrix}
        \\nabla_x^2 \\mathcal{L} && 0 && \\nabla_x c_{eq}^T && \\nabla_x c_{ineq}^T \\\\
        0 && \\Sigma && 0 && I \\\\
        \\nabla_x c_{eq} && 0 && 0 && 0 \\\\
        \\nabla_x c_{ineq} && I && 0 && 0
        \\end{bmatrix}
        \\begin{bmatrix}
        \\Delta x \\\\
        \\Delta s \\\\
        \\Delta \\lambda_{eq} \\\\
        \\Delta \\lambda_{ineq}
        \\end{bmatrix}
        =
        \\begin{bmatrix}
        -\\nabla_x \\mathcal{f} - \\lambda_{eq}^T \\nabla_x c_{eq} -\\lambda_{eq}^T \\nabla_x c_{eq} \\\\
        \\lambda^T \\mu * S^{-1}e \\\\
        - c_{eq} \\\\
        - c_{ineq} + s
        \\end{bmatrix}

    .. note::

        Currently, Kona does not have any optimization algorithms that support inequality 
        constraints. The slack implementation in this matrix is part of an ongoing development 
        effort to support inequality constraints at a future date.

    Attributes
    ----------
    product_tol : float
        Tolerance for 2nd order adjoint system solutions.
    scale, grad_scale, feas_scale : float
        Optimality metric normalization factors.
    krylov : KrylovSolver
        A krylov solver object used to solve the system defined by this matrix.
    dRdX, dRdU, dCdX, dCdU : KonaMatrix
        Various abstract jacobians used in calculating the mat-vec product.
    """
    def __init__(self, vector_factories, optns=None):
        super(ReducedKKTMatrix, self).__init__(vector_factories, optns)

        # read reduced options
        self.product_tol = get_opt(self.optns, 1e-6, 'product_tol')
        self.scale = get_opt(self.optns, 1.0, 'scale')
        self.grad_scale = get_opt(self.optns, 1.0, 'grad_scale')
        self.feas_scale = get_opt(self.optns, 1.0, 'feas_scale')

        # set empty solver handle
        self.krylov = None

        # reset the linearization flag
        self._allocated = False

        # request vector memory for future allocation
        self.primal_factory.request_num_vectors(3)
        self.state_factory.request_num_vectors(6)
        if self.eq_factory is not None:
            self.eq_factory.request_num_vectors(3)
        if self.ineq_factory is not None:
            self.ineq_factory.request_num_vectors(3)

        # initialize abtract jacobians
        self.dRdX = dRdX()
        self.dRdU = dRdU()
        self.dCdX = dCdX()
        self.dCdU = dCdU()

    def _linear_solve(self, rhs_vec, solution, rel_tol=1e-6):
        self.dRdU.linearize(self.at_design, self.at_state)
        self.dRdU.solve(rhs_vec, solution, rel_tol=rel_tol)

    def _adjoint_solve(self, rhs_vec, solution, rel_tol=1e-6):
        self.dRdU.linearize(self.at_design, self.at_state)
        self.dRdU.T.solve(rhs_vec, solution, rel_tol=rel_tol)

[docs]    def set_krylov_solver(self, krylov_solver):
        if isinstance(krylov_solver, KrylovSolver):
            self.krylov = krylov_solver
        else:
            raise TypeError('Solver is not a valid KrylovSolver')

[docs]    def linearize(self, at_kkt, at_state, at_adjoint,
                  obj_scale=1.0, cnstr_scale=1.0):
        """
        Linearize the KKT matrix at the given KKT, state, adjoint and barrier
        point. This method does not perform any factorizations or matrix
        operations.

        Parameters
        ----------
        at_kkt : ReducedKKTVector
            KKT vector at which the product is evaluated
        at_state : StateVector
            State point at which the product is evaluated.
        at_adjoint : StateVector
            1st order adjoint variables at which the product is evaluated.
        obj_scale : float, optional
            Factor by which the objective component of the product is scaled.
        cnstr_scale : float, optional
            Factor by which the constraint component of the product is scaled.
        """
        # if this is the first ever linearization...
        if not self._allocated:

            # generate state vectors
            self.adjoint_res = self.state_factory.generate()
            self.w_adj = self.state_factory.generate()
            self.lambda_adj = self.state_factory.generate()
            self.state_work = []
            for i in xrange(3):
                self.state_work.append(self.state_factory.generate())

            # generate primal vectors
            self.pert_design = self.primal_factory.generate()
            self.reduced_grad = self.primal_factory.generate()
            self.primal_work = self.primal_factory.generate()

            # generate dual vectors
            if isinstance(at_kkt.dual, CompositeDualVector):
                dual_eq = self.eq_factory.generate()
                dual_ineq = self.ineq_factory.generate()
                self.dual_work = CompositeDualVector(dual_eq, dual_ineq)
            else:
                self.dual_work = self.eq_factory.generate()
            self.slack_block = None
            if self.ineq_factory is not None:
                self.slack_block = self.ineq_factory.generate()

            self._allocated = True

        # store the linearization point
        self.at_dual = at_kkt.dual
        if isinstance(at_kkt.primal, CompositePrimalVector):
            self.at_design = at_kkt.primal.design
            self.at_slack = at_kkt.primal.slack
            if isinstance(self.at_dual, CompositeDualVector):
                self.at_dual_ineq = self.at_dual.ineq
            else:
                self.at_dual_ineq = self.at_dual
        else:
            self.at_design = at_kkt.primal
            self.at_slack = None
            self.at_dual_ineq = None
        self.design_norm = self.at_design.norm2
        self.at_state = at_state
        self.state_norm = self.at_state.norm2
        self.at_adjoint = at_adjoint
        self.at_dual = at_kkt.dual

        # store scales
        self.obj_scale = obj_scale
        self.cnstr_scale = cnstr_scale

        # pre compute the slack block
        if self.slack_block is not None:
            self.slack_block.equals(self.at_slack)
            self.slack_block.pow(-1.)
            self.slack_block.times(self.at_dual_ineq)

        # compute adjoint residual at the linearization
        self.dual_work.equals_constraints(self.at_design, self.at_state)
        self.adjoint_res.equals_objective_partial(
            self.at_design, self.at_state, scale=self.obj_scale)
        self.dRdU.linearize(self.at_design, self.at_state)
        self.dRdU.T.product(self.at_adjoint, self.state_work[0])
        self.adjoint_res.plus(self.state_work[0])
        self.dCdU.linearize(self.at_design, self.at_state)
        self.dCdU.T.product(self.at_dual, self.state_work[0])
        self.state_work[0].times(self.cnstr_scale)
        self.adjoint_res.plus(self.state_work[0])

        # compute reduced gradient at the linearization
        self.reduced_grad.equals_objective_partial(
            self.at_design, self.at_state, scale=self.obj_scale)
        self.dRdX.linearize(self.at_design, self.at_state)
        self.dRdX.T.product(self.at_adjoint, self.primal_work)
        self.reduced_grad.plus(self.primal_work)
        self.dCdX.linearize(self.at_design, self.at_state)
        self.dCdX.T.product(self.at_dual, self.primal_work)
        self.primal_work.times(self.cnstr_scale)
        self.reduced_grad.plus(self.primal_work)

[docs]    def product(self, in_vec, out_vec):
        """
        Matrix-vector product for the reduced KKT system.

        Parameters
        ----------
        in_vec : ReducedKKTVector
            Vector to be multiplied with the KKT matrix.
        out_vec : ReducedKKTVector
            Result of the operation.
        """
        # type check given vectors
        if not isinstance(in_vec, ReducedKKTVector):
            raise TypeError('Multiplying vector is not a ReducedKKTVector')
        if not isinstance(out_vec, ReducedKKTVector):
            raise TypeError('Result vector is not a ReducedKKTVector')

        # clear output vector
        out_vec.equals(0.0)

        # do some aliasing to make the code cleanier
        in_dual = in_vec.dual
        out_dual = out_vec.dual
        if isinstance(in_vec.primal, CompositePrimalVector):
            if self.at_slack is None:
                raise TypeError('No slack variables defined!')
            in_design = in_vec.primal.design
            in_slack = in_vec.primal.slack
            out_design = out_vec.primal.design
            out_slack = out_vec.primal.slack
            if isinstance(in_dual, CompositeDualVector):
                in_dual_ineq = in_dual.ineq
                out_dual_ineq = out_dual.ineq
            else:
                in_dual_ineq = in_dual
                out_dual_ineq = out_dual
        else:
            in_design = in_vec.primal
            in_slack = None
            out_design = out_vec.primal
            out_slack = None
            in_dual_ineq = None
            out_dual_ineq = None

        # calculate appropriate FD perturbation for design
        epsilon_fd = calc_epsilon(self.design_norm, in_design.norm2)

        # assemble RHS for first adjoint system
        self.dRdX.linearize(self.at_design, self.at_state)
        self.dRdX.product(in_design, self.state_work[0])
        self.state_work[0].times(-1.0)

        # perform the adjoint solution
        self.w_adj.equals(0.0)
        # rel_tol = self.product_tol/max(self.state_work[0].norm2, EPS)
        rel_tol = 1e-6
        self._linear_solve(self.state_work[0], self.w_adj, rel_tol=rel_tol)

        # find the adjoint perturbation by solving the linearized dual equation
        self.pert_design.equals_ax_p_by(
            1.0, self.at_design, epsilon_fd, in_design)
        self.state_work[2].equals_ax_p_by(
            1.0, self.at_state, epsilon_fd, self.w_adj)

        # first part of LHS: evaluate the adjoint equation residual at
        # perturbed design and state
        self.state_work[0].equals_objective_partial(
            self.pert_design, self.state_work[2], scale=self.obj_scale)
        pert_state = self.state_work[2] # aliasing for readability
        self.dRdU.linearize(self.pert_design, pert_state)
        self.dRdU.T.product(self.at_adjoint, self.state_work[1])
        self.state_work[0].plus(self.state_work[1])
        self.dCdU.linearize(self.pert_design, pert_state)
        self.dCdU.T.product(self.at_dual, self.state_work[1])
        self.state_work[1].times(self.cnstr_scale)
        self.state_work[0].plus(self.state_work[1])

        # at this point state_work[0] should contain the perturbed adjoint
        # residual, so take difference with unperturbed adjoint residual
        self.state_work[0].minus(self.adjoint_res)
        self.state_work[0].divide_by(epsilon_fd)

        # multiply by -1 to move to RHS
        self.state_work[0].times(-1.0)

        # second part of LHS: (dC/dU) * in_vec.dual
        self.dCdU.linearize(self.at_design, self.at_state)
        self.dCdU.T.product(in_dual, self.state_work[1])
        self.state_work[1].times(self.cnstr_scale)

        # assemble final RHS
        self.state_work[0].minus(self.state_work[1])

        # perform the adjoint solution
        self.lambda_adj.equals(0.0)
        # rel_tol = self.product_tol/max(self.state_work[0].norm2, EPS)
        rel_tol = 1e-6
        self._adjoint_solve(
            self.state_work[0], self.lambda_adj, rel_tol=rel_tol)

        # evaluate first order optimality conditions at perturbed design, state
        # and adjoint:
        # g = df/dX + lag_mult*dC/dX + (adjoint + eps_fd*lambda_adj)*dR/dX
        self.state_work[1].equals_ax_p_by(
            1.0, self.at_adjoint, epsilon_fd, self.lambda_adj)
        pert_adjoint = self.state_work[1] # aliasing for readability
        out_design.equals_objective_partial(
            self.pert_design, pert_state, scale=self.obj_scale)
        self.dRdX.linearize(self.pert_design, pert_state)
        self.dRdX.T.product(pert_adjoint, self.primal_work)
        out_design.plus(self.primal_work)
        self.dCdX.linearize(self.pert_design, pert_state)
        self.dCdX.T.product(self.at_dual, self.primal_work)
        self.primal_work.times(self.cnstr_scale)
        out_design.plus(self.primal_work)

        # take difference with unperturbed conditions
        out_design.times(self.grad_scale)
        out_design.minus(self.reduced_grad)
        out_design.divide_by(epsilon_fd)

        # the dual part needs no FD
        self.dCdX.linearize(self.at_design, self.at_state)
        self.dCdX.T.product(in_dual, self.primal_work)
        self.primal_work.times(self.cnstr_scale)
        out_design.plus(self.primal_work)

        # evaluate dual part of product:
        # C = dC/dX*in_vec + dC/dU*w_adj
        self.dCdX.linearize(self.at_design, self.at_state)
        self.dCdX.product(in_design, out_vec.dual)
        out_vec.dual.times(self.cnstr_scale)
        self.dCdU.linearize(self.at_design, self.at_state)
        self.dCdU.product(self.w_adj, self.dual_work)
        self.dual_work.times(self.cnstr_scale)
        out_dual.plus(self.dual_work)
        out_dual.times(self.feas_scale)

        # add the slack term to the dual component
        if in_slack is not None:
            # set slack output
            # out_slack = Sigma * in_slack + in_dual_ineq
            out_slack.equals(in_slack)
            out_slack.times(self.slack_block)
            out_slack.plus(in_dual_ineq)
            # add the slack contribution to dual component
            # out_dual_ineq += in_slack
            out_dual_ineq.plus(in_slack)

# imports here to prevent circular errors
from kona.options import get_opt
from kona.linalg.vectors.common import StateVector
from kona.linalg.vectors.composite import ReducedKKTVector
from kona.linalg.vectors.composite import CompositePrimalVector
from kona.linalg.vectors.composite import CompositeDualVector
from kona.linalg.matrices.common import dRdX, dRdU, dCdX, dCdU
from kona.linalg.solvers.krylov.basic import KrylovSolver
from kona.linalg.solvers.util import calc_epsilon, EPS