Source code for kona.algorithms.composite_step_rsnk

import numpy as np

from kona.options import get_opt
from kona.linalg.common import current_solution, factor_linear_system, objective_value
from kona.linalg.vectors.composite import ReducedKKTVector
from kona.linalg.matrices.common import dCdX, dCdU, dRdX, dRdU
from kona.linalg.matrices.hessian import AugmentedKKTMatrix, LagrangianHessian
from kona.linalg.solvers.util import EPS
from kona.algorithms.base_algorithm import OptimizationAlgorithm

[docs]class CompositeStepRSNK(OptimizationAlgorithm):
    """
    A reduced-space composite-step optimization algorithm for PDE-governed
    equality constrained problems, globalized using a trust-region approach.

    This implementation is based on the composite-step algorithm proposed by 
    `Heinkenschloss and Ridzal<http://epubs.siam.org/doi/abs/10.1137/130921738>`_. 
    However, we have omitted the inexactness corrections for simplicity and implemented
    a 2nd order adjoint approach for producing the necessary matrix-vector products.

    Attributes
    ----------
    factor_matrices : bool
        Boolean flag for matrix-based PDE solvers.
    radius, min_radius, max_radius : float
        Trust radius parameters.
    mu, mu_max, mu_pow : float
        Augmented Lagrangian constraint factor parameters.
    normal_KKT : :class:`~kona.linalg.matrices.hessian.AugmentedKKTMatrix`
        Matrix object for the normal step system.
    tangent_KKT : :class:`~kona.linalg.matrices.hessian.LagrangianHessian`
        Matrix object for the tangent step system.
    globalization : string
        Flag to determine solution globalization type.
    """
    def __init__(self, primal_factory, state_factory, eq_factory, ineq_factory, optns={}):
        # trigger base class initialization
        super(CompositeStepRSNK, self).__init__(
            primal_factory, state_factory, eq_factory, ineq_factory, optns)

        # number of vectors required in solve() method
        self.primal_factory.request_num_vectors(9)
        self.state_factory.request_num_vectors(4)
        self.dual_factory = self.eq_factory
        self.dual_factory.request_num_vectors(15)

        # get general options
        self.factor_matrices = get_opt(optns, False, 'matrix_explicit')

        # get trust region options
        self.radius = get_opt(optns, 0.5, 'trust', 'init_radius')
        self.min_radius = get_opt(optns, 0.5/(2**3), 'trust', 'min_radius')
        self.max_radius = get_opt(optns, 0.5*(2**3), 'trust', 'max_radius')

        # get penalty parameter options
        self.mu = get_opt(optns, 1.0, 'penalty', 'mu_init')
        self.mu_pow = get_opt(optns, 1e-8, 'penalty', 'mu_pow')
        self.mu_max = get_opt(optns, 1e4, 'penalty', 'mu_max')

        # get globalization type
        self.globalization = get_opt(optns, 'linesearch', 'globalization')

        if self.globalization not in ['trust', 'linesearch', None]:
            raise TypeError(
                'Invalid globalization type! ' +
                'Can only use \'trust\' or \'linesearch\'. ' +
                'If you want to skip globalization, set to None.')

        # initialize the KKT matrix definition
        normal_optns = get_opt(optns, {}, 'composite-step', 'normal-step')
        self.normal_KKT = AugmentedKKTMatrix(
            [self.primal_factory, self.state_factory, self.dual_factory],
            normal_optns)
        tangent_optns = get_opt(optns, {}, 'composite-step', 'tangent-step')
        self.tangent_KKT = LagrangianHessian(
            [self.primal_factory, self.state_factory, self.dual_factory],
            tangent_optns)
        self.tangent_KKT.set_projector(self.normal_KKT)

    def _write_header(self):
        self.hist_file.write(
            '# Kona composite-step convergence history file\n' +
            '# iters' + ' '*5 +
            '   cost' + ' '*5 +
            'optimality  ' + ' '*5 +
            'feasibility ' + ' '*5 +
            'objective   ' + ' '*5 +
            'penalty     ' + ' '*5 +
            'radius      ' + '\n'
        )

    def _write_history(self, opt, feas, obj):
        self.hist_file.write(
            '%7i'%self.iter + ' '*5 +
            '%7i'%self.primal_factory._memory.cost + ' '*5 +
            '%11e'%opt + ' '*5 +
            '%11e'%feas + ' '*5 +
            '%11e'%obj + ' '*5 +
            '%11e'%self.mu + ' '*5 +
            '%11e'%self.radius + '\n'
        )

    def _generate_KKT_vector(self):
        primal = self.primal_factory.generate()
        dual = self.dual_factory.generate()
        return ReducedKKTVector(primal, dual)

    def _generate_all_memory(self):
        # generate composite KKT vectors
        self.X = self._generate_KKT_vector()
        self.P = self._generate_KKT_vector()
        self.dLdX = self._generate_KKT_vector()
        self.kkt_work = self._generate_KKT_vector()
        self.normal_rhs = self._generate_KKT_vector()
        self.normal_step = self._generate_KKT_vector()
        self.tangent_rhs = self.primal_factory.generate()
        self.tangent_step = self.primal_factory.generate()

        # generate primal vectors
        self.design_work = self.primal_factory.generate()
        self.design_save = self.primal_factory.generate()

        # generate state vectors
        self.state = self.state_factory.generate()
        self.state_work = self.state_factory.generate()
        self.adjoint = self.state_factory.generate()
        self.adjoint_work = self.state_factory.generate()

        # generate dual vectors
        self.dual_work = self.dual_factory.generate()

[docs]    def solve(self):
        self._write_header()
        self.info_file.write(
            '\n' +
            '**************************************************\n' +
            '***       Using Composite-Step Algorithm       ***\n' +
            '**************************************************\n' +
            '\n')

        # generate all the vectors used in the optimization
        self._generate_all_memory()

        # do some aliasing
        X = self.X
        state = self.state
        adjoint = self.adjoint
        dLdX = self.dLdX

        P = self.P
        normal_rhs = self.normal_rhs
        normal_step = self.normal_step
        tangent_rhs = self.tangent_rhs
        tangent_step = self.tangent_step

        design_work = self.design_work
        dual_work = self.dual_work
        state_work = self.state_work

        # initialize basic data for outer iterations
        converged = False
        self.iter = 0

        # evaluate the initial design before starting outer iterations
        X.equals_init_guess()
        if not state.equals_primal_solution(X.primal):
            raise RuntimeError(
                'Invalid initial guess! Nonlinear solution breakdown.')

        if self.factor_matrices and self.iter < self.max_iter:
            factor_linear_system(X.primal, state)

        # perform an adjoint solution for the Lagrangian
        state_work.equals_objective_partial(X.primal, state)
        dCdU(X.primal, state).T.product(X.dual, adjoint)
        state_work.plus(adjoint)
        state_work.times(-1.)
        dRdU(X.primal, state).T.solve(state_work, adjoint)

        # send initial point info to the user
        solver_info = current_solution(self.iter, X.primal, state, adjoint, X.dual)
        if isinstance(solver_info, str):
            self.info_file.write('\n' + solver_info + '\n')

        # BEGIN NEWTON LOOP HERE
        ###############################
        min_radius_active = False
        krylov_tol = 0.00095
        for i in xrange(self.max_iter):
            # advance iteration counter
            self.iter += 1

            # evaluate optimality, feasibility and KKT norms
            dLdX.equals_KKT_conditions(X, state, adjoint)
            # print info on current point
            self.info_file.write(
                '==========================================================\n' +
                'Beginning Major Iteration %i\n\n'%self.iter)
            self.info_file.write(
                'primal vars        = %e\n'%X.primal.norm2)
            self.info_file.write(
                'multipliers        = %e\n\n'%X.dual.norm2)

            if self.iter == 1:
                # calculate initial norms
                grad_norm0 = dLdX.primal.norm2
                feas_norm0 = max(dLdX.dual.norm2, EPS)
                kkt_norm0 = np.sqrt(feas_norm0**2 + grad_norm0**2)

                # set current norms to initial
                kkt_norm = kkt_norm0
                grad_norm = grad_norm0
                feas_norm = feas_norm0

                # print out convergence norms
                self.info_file.write(
                    'grad_norm0         = %e\n'%grad_norm0 +
                    'feas_norm0         = %e\n'%feas_norm0)

                # calculate convergence tolerances
                grad_tol = self.primal_tol * max(grad_norm0, 1e-3)
                feas_tol = self.cnstr_tol * max(feas_norm0, 1e-3)

            else:
                # calculate current norms
                grad_norm = dLdX.primal.norm2
                feas_norm = max(dLdX.dual.norm2, EPS)
                kkt_norm = np.sqrt(feas_norm**2 + grad_norm**2)

                # update the augmented Lagrangian penalty
                self.info_file.write(
                    'grad_norm          = %e (%e <-- tolerance)\n'%(
                        grad_norm, grad_tol) +
                    'feas_norm          = %e (%e <-- tolerance)\n'%(
                        feas_norm, feas_tol))

            # write convergence history
            obj_val = objective_value(X.primal, state)
            self._write_history(grad_norm, feas_norm, obj_val)

            # check for convergence
            if (grad_norm < grad_tol) and (feas_norm < feas_tol):
                converged = True
                break

            # compute krylov tolerances in order to achieve superlinear
            # convergence but to avoid oversolving
            self.krylov_tol = \
                self.tangent_KKT.krylov.rel_tol * \
                min(1.0, np.sqrt(kkt_norm/kkt_norm0))
            self.krylov_tol = \
                max(krylov_tol,
                    min(grad_tol/grad_norm, feas_tol/feas_norm))

            # linearize the normal and tangent matrices at this point
            self.normal_KKT.linearize(X, state)
            self.tangent_KKT.linearize(X, state, adjoint)

            # assemble the RHS vector the Lagrange multiplier solve
            normal_rhs.primal.equals(dLdX.primal)
            normal_rhs.dual.equals(0.0)
            normal_rhs.times(-1.)

            # solve for the Lagrange multiplier estimates
            self.normal_KKT.solve(normal_rhs, P, self.krylov_tol)

            # assemble the RHS vector for the normal-step solve
            # rhs = [0, 0, -(c-e^s)]
            normal_rhs.equals(0.0)
            normal_rhs.dual.equals(dLdX.dual)
            normal_rhs.dual.times(-1.)

            # solve for the normal step
            self.normal_KKT.solve(normal_rhs, normal_step, krylov_tol)

            # apply a trust radius check to the normal step
            normal_step_norm = normal_step.primal.norm2
            if normal_step_norm > 0.8*self.radius:
                normal_step.primal.times(0.8*self.radius/normal_step_norm)

            # set trust radius settings for the tangent step STCG solver
            self.tangent_KKT.radius = np.sqrt(
                self.radius**2 - normal_step.primal.norm2**2)

            # set up the RHS vector for the tangent-step solve
            # -(W * normal_design + dL/dDesign)
            self.tangent_KKT.multiply_W(
                normal_step.primal, tangent_rhs)
            tangent_rhs.plus(dLdX.primal)
            tangent_rhs.times(-1.)

            # solve for the tangent step
            self.tangent_KKT.solve(
                tangent_rhs, tangent_step, krylov_tol)

            # assemble the complete step
            P.primal.equals_ax_p_by(1., normal_step.primal, 1, tangent_step)

            # calculate predicted decrease in the merit function
            self.calc_pred_reduction()

            # apply globalization
            if self.globalization == 'trust':
                old_flag = min_radius_active
                success, min_radius_active = self.trust_step()

                # watchdog on trust region failures
                if min_radius_active and old_flag:
                    self.info_file.write(
                        'Trust radius breakdown! Terminating...\n')
                    break

            elif self.globalization == 'linesearch':
                old_flag = min_radius_active
                success, min_radius_active = self.backtracking_step()

                # watchdog on trust region failures
                if min_radius_active and old_flag:
                    self.info_file.write(
                        'Trust radius breakdown! Terminating...\n')
                    break

            elif self.globalization is None:
                # add the full step
                X.primal.plus(P.primal)
                X.dual.plus(P.dual)

                # solve states at the new step
                state.equals_primal_solution(X.primal)

                # if this is a matrix-based problem, tell the solver to factor
                # some important matrices to be used in the next iteration
                if self.factor_matrices and self.iter < self.max_iter:
                    factor_linear_system(X.primal, state)

                # perform an adjoint solution for the Lagrangian
                state_work.equals_objective_partial(X.primal, state)
                dCdU(X.primal, state).T.product(X.dual, adjoint)
                state_work.plus(adjoint)
                state_work.times(-1.)
                dRdU(X.primal, state).T.solve(state_work, adjoint)

            # send current solution info to the user
            solver_info = current_solution(self.iter, X.primal, state, adjoint, X.dual)
            if isinstance(solver_info, str):
                self.info_file.write('\n' + solver_info + '\n')

        ############################
        # END OF NEWTON LOOP

        self.info_file.write('\n')
        if converged:
            self.info_file.write('Optimization successful!\n')
        else:
            self.info_file.write('Optimization FAILED!\n')

        self.info_file.write(
            'Total number of nonlinear iterations: %i\n\n'%self.iter)

[docs]    def eval_merit(self, design, state, dual, cnstr):
        return objective_value(design, state) \
            + dual.inner(cnstr) \
            + 0.5*self.mu*(cnstr.norm2**2)

[docs]    def calc_pred_reduction(self):
        # calculate predicted decrease in the augmented Lagrangian
        self.normal_KKT.A.product(self.P.primal, self.dual_work)
        self.dual_work.plus(self.dLdX.dual)
        self.tangent_rhs.minus(self.dLdX.primal)
        self.tangent_rhs.times(0.5)
        self.pred_reduction = self.tangent_KKT.pred \
            + self.normal_step.primal.inner(self.tangent_rhs) \
            + 0.5*self.mu*self.dLdX.dual.norm2**2 \
            - self.P.dual.inner(self.dual_work) \
            - 0.5*self.mu*self.dual_work.norm2**2

        # calculate the new penalty parameter if necessary
        denom = 0.25*(self.dLdX.dual.norm2**2 - self.dual_work.norm2**2)
        if self.pred_reduction < self.mu*denom:
            self.mu += -self.pred_reduction/denom + self.mu_pow
            self.info_file.write('\n')
            self.info_file.write('   Mu updated -> %e\n'%self.mu)
            # recalculate the prediction with new penalty
            self.pred_reduction = self.tangent_KKT.pred \
                + self.normal_step.primal.inner(self.tangent_rhs) \
                + 0.5*self.mu*self.dLdX.dual.norm2**2 \
                - self.P.dual.inner(self.dual_work) \
                - 0.5*self.mu*self.dual_work.norm2**2

[docs]    def backtracking_step(self):
        # do some aliasing
        X = self.X
        state = self.state
        P = self.P
        adjoint = self.adjoint
        dLdX = self.dLdX
        design_work = [self.design_work, self.design_save]
        state_work = self.state_work
        adjoint_work = self.adjoint_work
        dual_work = self.dual_work
        kkt_work = self.kkt_work

        # compute the merit value at the current step
        f_init = self.eval_merit(
            X.primal, state, X.dual, dLdX.dual)

        # compute the merit function derivative w.r.t. design
        design_work[0].equals(dLdX.primal)
        dCdX(X.primal, state).T.product(dLdX.dual, design_work[1])
        design_work[1].times(self.mu)
        design_work[0].plus(design_work[1])
        dCdU(X.primal, state).T.product(dLdX.dual, state_work)
        state_work.times(-1.)
        dRdU(X.primal, state).T.solve(state_work, adjoint_work)
        dRdX(X.primal, state).T.product(adjoint_work, design_work[1])
        design_work[1].times(self.mu)
        design_work[0].plus(design_work[1])

        # compute the directional derivative for the merit function
        p_dot_grad = design_work[0].inner(P.primal)

        self.info_file.write('\n')
        self.info_file.write(
            '   primal_step    = %e\n'%P.primal.norm2 +
            '   lambda_step    = %e\n'%P.dual.norm2 +
            '\n' +
            '   p_dot_grad     = %e\n'%p_dot_grad)

        # if p_dot_grad >= 0:
        #     raise ValueError('Search direction is not a descent direction!')

        # start line search iterations
        max_iter = 5
        iters = 0
        min_radius_active = False
        converged = False
        decr_cond = 1e-4
        rdtn_factor = 0.5
        min_alpha = 1e-3
        alpha = 1.0
        self.info_file.write('\n')
        while iters <= max_iter:
            iters += 1

            self.info_file.write(
                'Back-tracking : iter %i\n'%iters +
                '   alpha      = %f\n'%alpha)

            # calculate the next step
            kkt_work.primal.equals_ax_p_by(1., X.primal, alpha, P.primal)
            kkt_work.dual.equals_ax_p_by(1., X.dual, 1., P.dual)
            kkt_work.primal.enforce_bounds()

            # solve for the states
            if state_work.equals_primal_solution(kkt_work.primal):
                # evaluate constraints
                dual_work.equals_constraints(kkt_work.primal, state_work)

                # calculate the merit function
                f_next = self.eval_merit(
                    kkt_work.primal, state_work,
                    kkt_work.dual, dual_work)

                # calculate sufficient decrease
                f_suff = f_init + decr_cond*alpha*p_dot_grad

            else:
                # state solution failed!
                f_suff = np.nan
                f_next = np.nan

            # evaluate step
            self.info_file.write(
                '   f_suff     = %e\n'%f_suff +
                '   f_next     = %e\n'%f_next)
            if f_next <= f_suff:
                self.info_file.write(
                    'Line search succeeded!\n')
                converged = True
                break
            else:
                self.info_file.write(
                    '   Bad step! Back-tracking...\n')
                alpha = max(rdtn_factor*alpha, min_alpha)
                if alpha == min_alpha:
                    self.info_file.write(
                        'Minimum step reached! Terminating...\n')
                    break

        # deal with step acceptance
        shrink = False
        if converged:
            # line search converged, so we accept step
            X.equals(kkt_work)
            state.equals(state_work)

            # if this is a matrix-based problem, tell the solver to
            # factor some important matrices to be used in the next
            # iteration
            if self.factor_matrices and self.iter < self.max_iter:
                factor_linear_system(X.primal, state)

            # perform an adjoint solution for the Lagrangian
            state_work.equals_objective_partial(X.primal, state)
            dCdU(X.primal, state).T.product(X.dual, adjoint)
            state_work.plus(adjoint)
            state_work.times(-1.)
            dRdU(X.primal, state).T.solve(state_work, adjoint)

            # flag for trust radius shrinking
            if alpha < 1:
                shrink = True
        else:
            # line search failed, need to shrink trust radius
            shrink = True

        if shrink:
            # shrink trust radius
            if self.radius > self.min_radius:
                self.radius = \
                    max(alpha*P.primal.norm2, self.min_radius)
                self.info_file.write(
                    '   Radius shrunk -> %f\n'%self.radius)
            else:
                self.info_file.write(
                    '   Reached minimum radius!\n')
                min_radius_active = True
        else:
            # increase trust radius if it was active
            if self.tangent_KKT.trust_active:
                self.info_file.write('   Trust radius active...\n')
                if self.radius < self.max_radius:
                    self.radius = \
                        min(2.*self.radius, self.max_radius)
                    self.info_file.write(
                        '   Radius increased -> %f\n'%self.radius)
                else:
                    self.info_file.write(
                        '   Max radius reached!\n')
                min_radius_active = False

        self.info_file.write('\n')
        return converged, min_radius_active

[docs]    def trust_step(self):
        # do some aliasing
        X = self.X
        P = self.P
        state = self.state
        adjoint = self.adjoint
        dLdX = self.dLdX
        dual_work = self.dual_work
        state_work = self.state_work
        kkt_work = self.kkt_work
        tangent_rhs = self.tangent_rhs
        tangent_step = self.tangent_step
        normal_step = self.normal_step

        # compute the merit value at the current step
        merit_init = self.eval_merit(X.primal, state, X.dual, dLdX.dual)

        # start trust region loop
        max_iter = 5
        iters = 0
        min_radius_active = False
        converged = False
        self.info_file.write('\n')
        while iters <= max_iter:
            iters += 1
            # save current point
            kkt_work.equals(X)

            # get the new design point
            X.primal.plus(P.primal)
            X.primal.enforce_bounds()
            X.dual.plus(P.dual)

            # solve states at the new step
            if state_work.equals_primal_solution(X.primal):
                # evaluate the constraint terms at the new step
                dual_work.equals_constraints(X.primal, state_work)

                # compute the merit value at the new step
                merit_next = self.eval_merit(X.primal, state_work, X.dual, dual_work)

                # evaluate the quality of the FLECS model
                rho = (merit_init - merit_next)/self.pred_reduction
            else:
                merit_next = np.nan
                rho = np.nan

            # reset step back
            X.equals(kkt_work)

            self.info_file.write(
                'Trust Region Step : iter %i\n'%iters +
                '   primal_step    = %e\n'%P.primal.norm2 +
                '   lambda_step    = %e\n'%P.dual.norm2 +
                '\n' +
                '   merit_init     = %e\n'%merit_init +
                '   merit_next     = %e\n'%merit_next +
                '   pred           = %e\n'%self.pred_reduction +
                '   rho            = %e\n'%rho)

            # modify radius based on model quality
            if rho <= 0.01 or np.isnan(rho):
                # model is bad! -- shrink radius and re-solve
                self.radius = max(0.5*P.primal.norm2, self.min_radius)
                if self.radius == self.min_radius:
                    self.info_file.write(
                        '      Reached minimum radius! ' +
                        'Exiting globalization...\n')
                    min_radius_active = True
                    break
                else:
                    self.info_file.write(
                        '   Re-solving with smaller radius -> ' +
                        '%f\n'%self.radius)

                    # apply a trust radius check to the normal step
                    normal_step_norm = normal_step.primal.norm2
                    if normal_step_norm > 0.8*self.radius:
                        normal_step.primal.times(
                            0.8*self.radius/normal_step_norm)

                    # calculate the tangent step radius
                    self.tangent_KKT.radius = np.sqrt(
                        self.radius**2 - normal_step.primal.norm2**2)

                    # set up the RHS vector for the tangent-step solve
                    self.tangent_KKT.multiply_W(normal_step.primal, tangent_rhs.primal)
                    tangent_rhs.plus(dLdX.primal)
                    tangent_rhs.times(-1.)

                    # solve for the tangent step
                    self.tangent_KKT.solve(
                        tangent_rhs, tangent_step, self.krylov_tol)

                    # assemble the complete step
                    P.primal.equals_ax_p_by(1., normal_step.primal, 1, tangent_step)

                    # calculate predicted decrease in the augmented Lagrangian
                    self.calc_pred_reduction()
            else:
                # model is okay -- accept primal step
                self.info_file.write('\nStep accepted!\n')
                X.primal.plus(P.primal)
                X.primal.enforce_bounds()
                X.dual.plus(P.dual)

                # solve states at the new step
                state.equals_primal_solution(X.primal)

                # if this is a matrix-based problem, tell the solver to factor
                # some important matrices to be used in the next iteration
                if self.factor_matrices and self.iter < self.max_iter:
                    factor_linear_system(X.primal, state)

                # perform an adjoint solution for the Lagrangian
                state_work.equals_objective_partial(X.primal, state)
                dCdU(X.primal, state).T.product(X.dual, adjoint)
                state_work.plus(adjoint)
                state_work.times(-1.)
                dRdU(X.primal, state).T.solve(state_work, adjoint)

                # check the trust radius
                if self.tangent_KKT.trust_active:
                    # if active, decide if we want to increase it
                    self.info_file.write('Trust radius active...\n')
                    if rho >= 0.5:
                        # model is good enough -- increase radius
                        if self.radius < self.max_radius:
                            self.radius = min(2.*self.radius, self.max_radius)
                            self.info_file.write(
                                '   Radius increased -> %f\n'%self.radius)
                        else:
                            self.info_file.write(
                                '   Max radius reached!\n')
                        min_radius_active = False

                # trust radius globalization worked, break loop
                converged = True
                self.info_file.write('\n')
                break

        return converged, min_radius_active