Source code for kona.algorithms.constrained_rsnk

from kona.algorithms.base_algorithm import OptimizationAlgorithm


[docs]class ConstrainedRSNK(OptimizationAlgorithm): """ A reduced-space Newton-Krylov optimization algorithm for PDE-governed equality constrained problems, globalized with a trust-region approach. This algorithm uses a 2nd order adjoint formulation of the KKT matrix-vector product, in conjunction with a novel Krylov-method called `FLECS<http://dx.doi.org/10.1137/140994496>`_ for non-convex saddle point problems. More information on this reduced-space Newton-Krylov appoach can be found `in this paper <http://dx.doi.org/10.1007/s00158-017-1734-0>`_. Attributes ---------- grad_norm0, feas_norm0, kkt_norm0 : float Initial optimality norms. iter : int Optimization iteration counter. factor_matrices : bool Boolean flag for matrix-based PDE solvers. radius, min_radius, max_radius : float Trust radius parameters. mu, mu_init, mu_max, mu_pow, eta : float Augmented Lagrangian constraint factor parameters. scale, grad_scale, feas_scale : float Optimality metric normalization factors. KKT_matrix : :class:`~kona.linalg.matrices.hessian.ReducedKKTVector` Matrix object defining the KKT matrix-vector product. precond : :class:`~kona.linalg.matrices.hessian.basic.BaseHessian`-like Matrix object defining the preconditioner to the KKT system. krylov : :class:`~kona.linalg.solvers.krylov.FLECS` A krylov solver object used to solve the system defined by this matrix. globalization : string Flag to determine solution globalization type. """ def __init__(self, primal_factory, state_factory, eq_factory, ineq_factory, optns=None): # trigger base class initialization super(ConstrainedRSNK, self).__init__( primal_factory, state_factory, eq_factory, ineq_factory, optns ) # number of vectors required in solve() method self.primal_factory.request_num_vectors(6 + 1) self.state_factory.request_num_vectors(3) self.eq_factory.request_num_vectors(12 + 2) # misc variables self.grad_norm0 = EPS self.feas_norm0 = EPS self.kkt_norm0 = EPS self.iter = 0 # general options ############################################################ self.factor_matrices = get_opt(self.optns, False, 'matrix_explicit') # trust radius settings ############################################################ self.radius = get_opt(self.optns, 0.5, 'trust', 'init_radius') self.min_radius = get_opt(self.optns, 0.5/(2**3), 'trust', 'min_radius') self.max_radius = get_opt(self.optns, 0.5*(2**3), 'trust', 'max_radius') # augmented Lagrangian settings ############################################################ self.mu = get_opt(self.optns, 1.0, 'penalty', 'mu_init') self.mu_init = self.mu self.mu_pow = get_opt(self.optns, 0.5, 'penalty', 'mu_pow') self.mu_max = get_opt(self.optns, 1e5, 'penalty', 'mu_max') self.eta = 1./(self.mu**0.1) # reduced KKT settings ############################################################ self.nu = get_opt(self.optns, 0.95, 'rsnk', 'nu') reduced_optns = get_opt(self.optns, {}, 'rsnk') reduced_optns['out_file'] = self.info_file self.KKT_matrix = ReducedKKTMatrix( [self.primal_factory, self.state_factory, self.eq_factory], reduced_optns) self.mat_vec = self.KKT_matrix.product # KKT system preconditiner settings ############################################################ self.precond = get_opt(self.optns, None, 'rsnk', 'precond') self.idf_schur = None if self.precond is None: # use identity matrix product as preconditioner self.eye = IdentityMatrix() self.precond = self.eye.product elif self.precond is 'idf_schur': self.idf_schur = ReducedSchurPreconditioner( [primal_factory, state_factory, eq_factory, ineq_factory]) self.precond = self.idf_schur.product else: raise BadKonaOption(self.optns, 'rsnk', 'precond') # krylov solver settings ############################################################ krylov_optns = { 'krylov_file':get_opt( self.optns, 'kona_krylov.dat', 'rsnk', 'krylov_file'), 'subspace_size':get_opt(self.optns, 10, 'rsnk', 'subspace_size'), 'check_res':get_opt(self.optns, True, 'rsnk', 'check_res'), 'rel_tol':get_opt(self.optns, 1e-2, 'rsnk', 'rel_tol'), } self.krylov = FLECS( [self.primal_factory, self.eq_factory], krylov_optns) # get globalization options ############################################################ self.globalization = get_opt(self.optns, 'trust', 'globalization') if self.globalization not in ['trust', 'filter', None]: raise TypeError( 'Invalid globalization! ' + 'Can only use \'trust\' or \'filter\'. ' + 'If you want to skip globalization, set to None.') else: if self.globalization == 'filter': self.filter = SimpleFilter() def _write_header(self): self.hist_file.write( '# Kona constrained RSNK convergence history file\n' + '# iters' + ' '*5 + ' cost' + ' '*5 + 'optimality ' + ' '*5 + 'feasibility ' + ' '*5 + 'objective ' + ' '*5 + 'mu param ' + ' '*5 + 'radius ' + '\n' ) def _write_history(self, opt, feas, obj): self.hist_file.write( '%7i'%self.iter + ' '*5 + '%7i'%self.primal_factory._memory.cost + ' '*5 + '%11e'%opt + ' '*5 + '%11e'%feas + ' '*5 + '%11e'%obj + ' '*5 + '%11e'%self.mu + ' '*5 + '%11e'%self.radius + '\n' ) def _generate_KKT_vector(self): primal = self.primal_factory.generate() dual = self.eq_factory.generate() return ReducedKKTVector(primal, dual)
[docs] def solve(self): self._write_header() self.info_file.write( '\n' + '**************************************************\n' + '*** Using FLECS-based Algorithm ***\n' + '**************************************************\n' + '\n') # generate composite KKT vectors X = self._generate_KKT_vector() P = self._generate_KKT_vector() dLdX = self._generate_KKT_vector() kkt_rhs = self._generate_KKT_vector() kkt_save = self._generate_KKT_vector() kkt_work = self._generate_KKT_vector() # generate state vectors state = self.state_factory.generate() state_work = self.state_factory.generate() adjoint = self.state_factory.generate() # generate dual vectors dual_work = self.eq_factory.generate() # initialize basic data for outer iterations converged = False # evaluate the initial design before starting outer iterations X.equals_init_guess() state.equals_primal_solution(X.primal) if self.factor_matrices and self.iter < self.max_iter: factor_linear_system(X.primal, state) # perform an adjoint solution for the Lagrangian adjoint.equals_lagrangian_adjoint(X, state, state_work) # send initial point info to the user solver_info = current_solution(self.iter, X.primal, state, adjoint, X.dual) if isinstance(solver_info, str): self.info_file.write('\n' + solver_info + '\n') # BEGIN NEWTON LOOP HERE ############################### min_radius_active = False for i in xrange(self.max_iter): # advance iteration counter self.iter += 1 # evaluate optimality, feasibility and KKT norms dLdX.equals_KKT_conditions(X, state, adjoint) # print info on current point self.info_file.write( '==========================================================\n' + 'Beginning Major Iteration %i\n\n'%self.iter) self.info_file.write( 'primal vars = %e\n'%X.primal.norm2) self.info_file.write( 'multipliers = %e\n\n'%X.dual.norm2) if self.iter == 1: # calculate initial norms self.grad_norm0 = dLdX.primal.norm2 self.feas_norm0 = max(dLdX.dual.norm2, EPS) self.kkt_norm0 = np.sqrt( self.feas_norm0**2 + self.grad_norm0**2) # set current norms to initial kkt_norm = self.kkt_norm0 grad_norm = self.grad_norm0 feas_norm = self.feas_norm0 # print out convergence norms self.info_file.write( 'grad_norm0 = %e\n'%self.grad_norm0 + 'feas_norm0 = %e\n'%self.feas_norm0) # calculate convergence tolerances grad_tol = self.primal_tol feas_tol = self.cnstr_tol else: # calculate current norms grad_norm = dLdX.primal.norm2 feas_norm = max(dLdX.dual.norm2, EPS) kkt_norm = np.sqrt(feas_norm**2 + grad_norm**2) # update the augmented Lagrangian penalty self.info_file.write( 'grad_norm = %e (%e <-- tolerance)\n'%( grad_norm, grad_tol) + 'feas_norm = %e (%e <-- tolerance)\n'%( feas_norm, feas_tol)) # update penalty term ref_norm = min(grad_norm, feas_norm) self.mu = max( self.mu, self.mu_init*((self.feas_norm0/ref_norm)**self.mu_pow)) self.mu = min(self.mu, self.mu_max) # write convergence history obj_val = objective_value(X.primal, state) self._write_history(grad_norm, feas_norm, obj_val) # check for convergence if (grad_norm < grad_tol) and (feas_norm < feas_tol): converged = True break # compute krylov tolerances in order to achieve superlinear # convergence but to avoid oversolving krylov_tol = self.krylov.rel_tol*min( 1.0, np.sqrt(kkt_norm/self.kkt_norm0)) krylov_tol = max(krylov_tol, min(grad_tol/grad_norm, feas_tol/feas_norm)) self.info_file.write('\nkrylov tol = %e\n'%krylov_tol) # set other solver and product options self.krylov.rel_tol = krylov_tol self.krylov.radius = self.radius self.krylov.mu = self.mu # linearize the KKT matrix self.KKT_matrix.linearize(X, state, adjoint) if self.idf_schur is not None: self.idf_schur.linearize(X.primal, state) # move the vector to the RHS kkt_rhs.equals(dLdX) kkt_rhs.times(-1.) # reset the primal-dual step vector P.equals(0.0) # trigger the krylov solution self.krylov.solve(self.mat_vec, kkt_rhs, P, self.precond) self.radius = self.krylov.radius # apply globalization if self.globalization == 'trust': old_flag = min_radius_active success, min_radius_active = self.trust_step( X, state, adjoint, P, kkt_rhs, state_work, dual_work, kkt_work, kkt_save) # watchdog on trust region failures if min_radius_active and old_flag: self.info_file.write( 'Trust radius breakdown! Terminating...\n') break elif self.globalization == 'filter': old_flag = min_radius_active success, min_radius_active = self.filter_step( X, state, P, kkt_rhs, kkt_work, state_work, dual_work) # watchdog on trust region failures if min_radius_active and old_flag: self.info_file.write( 'Trust radius breakdown! Terminating...\n') break # if filter was successful, compute adjoint for next iteration if success: if self.factor_matrices and self.iter < self.max_iter: factor_linear_system(X.primal, state) adjoint.equals_lagrangian_adjoint(X, state, state_work) else: # accept step X.primal.plus(P.primal) X.dual.plus(P.dual) # calculate states state.equals_primal_solution(X.primal) # if this is a matrix-based problem, tell the solver to factor # some important matrices to be used in the next iteration if self.factor_matrices and self.iter < self.max_iter: factor_linear_system(X.primal, state) # perform an adjoint solution for the Lagrangian adjoint.equals_lagrangian_adjoint(X, state, state_work) # send current solution info to the user solver_info = current_solution( self.iter, X.primal, state, adjoint, X.dual) if isinstance(solver_info, str): self.info_file.write('\n' + solver_info + '\n') ############################ # END OF NEWTON LOOP if converged: self.info_file.write('Optimization successful!\n') else: self.info_file.write('Optimization FAILED!\n') self.info_file.write( 'Total number of nonlinear iterations: %i\n\n'%self.iter)
[docs] def filter_step(self, X, state, P, kkt_rhs, kkt_work, state_work, dual_work): filter_success = False min_radius_active = False max_filter_iter = 3 for i in xrange(max_filter_iter): self.info_file.write('\nFilter Step : iter %i\n'%(i + 1)) X.plus(P) state_work.equals(state) # save state for reverting later solve_failed = False if state.equals_primal_solution(X.primal): obj = objective_value(X.primal, state) dual_work.equals_constraints(X.primal, state) cnstr_norm = dual_work.norm2 if self.filter.dominates(obj, cnstr_norm): # point is acceptable so just check radius and break out self.info_file.write( ' New point accepted!\n') if i == 0 and self.krylov.trust_active and self.radius < self.max_radius: self.radius = min(2*self.radius, self.max_radius) self.krylov.radius = self.radius self.info_file.write( ' Radius increased -> %f\n'%self.radius) filter_success = True break else: # point is not acceptable, revert step X.minus(P) state.equals(state_work) else: # state solve failed, revert step self.info_file.write( ' State-solve failed!\n') solve_failed = True X.minus(P) state.equals(state_work) # at the first iteration, try a second-order correction if i == 0 and not solve_failed: kkt_work.equals(P) self.info_file.write( ' Attempting a second order correction...') self.krylov.apply_correction(dual_work, P) X.plus(P) if state.equals_primal_solution(X.primal): obj = objective_value(X.primal, state) dual_work.equals_constraints(X.primal, state) cnstr_norm = dual_work.norm2 if self.filter.dominates(obj, cnstr_norm): # point is acceptable so just break out self.info_file.write( 'SUCCESS!\n') self.info_file.write( ' New point accepted!\n') filter_success = True break else: # point is still not acceptable, revert step X.minus(P) state.equals(state_work) self.info_file.write( 'FAILED!\n') else: # state solve failed, revert step self.info_file.write( ' State-solve failed!\n') X.minus(P) state.equals(state_work) # if we got here, filter has dominated the point self.info_file.write(' New point rejected!\n') # shrink radius and re-solve if self.radius == self.min_radius: self.info_file.write(' Reached minimum radius! Exiting filter...\n') min_radius_active = True break else: self.radius = max(0.25*self.radius, self.min_radius) self.info_file.write(' Radius reduced -> %f!\n'%self.radius) self.krylov.radius = self.radius self.info_file.write(' Re-solving step...\n') self.krylov.re_solve(kkt_rhs, P) self.info_file.write('\n') return filter_success, min_radius_active
[docs] def trust_step(self, X, state, adjoint, P, kkt_rhs, state_work, dual_work, kkt_work, kkt_save): # start trust region loop max_iter = 6 iters = 0 min_radius_active = False converged = False self.info_file.write('\n') while iters <= max_iter: iters += 1 # evaluate the constraint term at the current step dual_work.equals_constraints(X.primal, state) # compute the merit value at the current step merit_init = objective_value(X.primal, state) \ + X.dual.inner(dual_work) \ + 0.5*self.mu*(dual_work.norm2**2) # add the FLECS step kkt_work.equals_ax_p_by(1., X, 1., P) # solve states at the new step if state_work.equals_primal_solution(kkt_work.primal): # evaluate the constraint terms at the new step dual_work.equals_constraints( kkt_work.primal, state_work) # compute the merit value at the next step merit_next = objective_value(kkt_work.primal, state) \ + X.dual.inner(dual_work) \ + 0.5*self.mu*(dual_work.norm2**2) # evaluate the quality of the FLECS model rho = (merit_init - merit_next)/self.krylov.pred_aug else: merit_next = np.nan rho = np.nan self.info_file.write( 'Trust Region Step : iter %i\n'%iters + ' primal_step = %e\n'%P.primal.norm2 + ' lambda_step = %e\n'%P.dual.norm2 + '\n' + ' merit_init = %e\n'%merit_init + ' merit_next = %e\n'%merit_next + ' pred_aug = %e\n'%self.krylov.pred_aug + ' rho = %e\n'%rho) # modify radius based on model quality if rho <= 0. or np.isnan(rho): # model is bad! -- first we try a 2nd order correction if iters == 1: # save the old step in case correction fails kkt_save.equals(P) # attempt a 2nd order correction self.info_file.write( ' Attempting a second order correction...\n') self.krylov.apply_correction(dual_work, P) elif iters == 2: # if we got here, the second order correction failed # reject step self.info_file.write( ' Correction failed! Resetting step...\n') P.equals(kkt_save) else: self.radius = max(0.5*self.radius, self.min_radius) if self.radius == self.min_radius: self.info_file.write( ' Reached minimum radius! ' + 'Exiting globalization...\n') min_radius_active = True break else: self.info_file.write( ' Re-solving with smaller radius -> ' + '%f\n'%self.radius) self.krylov.radius = self.radius self.krylov.re_solve(kkt_rhs, P) # self.radius = self.krylov.radius else: if iters == 2: # 2nd order correction worked -- yay! self.info_file.write(' Correction worked!\n') # model is okay -- accept primal step self.info_file.write('\nStep accepted!\n') # accept the new step entirely X.plus(P) state.equals_primal_solution(X.primal) # if this is a matrix-based problem, tell the solver to factor # some important matrices to be used in the next iteration if self.factor_matrices and self.iter < self.max_iter: factor_linear_system(X.primal, state) # perform an adjoint solution for the Lagrangian adjoint.equals_lagrangian_adjoint(X, state, state_work) # check the trust radius if self.krylov.trust_active: # if active, decide if we want to increase it self.info_file.write('Trust radius active...\n') if rho >= 0.5: # model is good enough -- increase radius self.radius = min( max(2.*P.primal.norm2, self.min_radius), self.max_radius) # self.radius = min(2.*self.radius, self.max_radius) self.info_file.write( ' Radius increased -> %f\n'%self.radius) min_radius_active = False # trust radius globalization worked, break loop converged = True self.info_file.write('\n') break return converged, min_radius_active
# imports here to prevent circular errors import numpy as np from copy import deepcopy from kona.options import BadKonaOption, get_opt from kona.linalg.common import current_solution, objective_value from kona.linalg.common import factor_linear_system from kona.linalg.vectors.composite import ReducedKKTVector from kona.linalg.matrices.common import IdentityMatrix from kona.linalg.matrices.hessian import ReducedKKTMatrix from kona.linalg.matrices.preconds import ReducedSchurPreconditioner from kona.linalg.solvers.krylov import FLECS from kona.linalg.solvers.util import EPS from kona.algorithms.util.filter import SimpleFilter