from kona.linalg.solvers.krylov.basic import KrylovSolver
[docs]class FLECS(KrylovSolver):
"""
FLexible Equality-Constrained Subproblem Krylov iterative solver.
.. note::
For more information, see
`SIAM paper on FLECS <http://epubs.siam.org/doi/10.1137/140994496>`.
Attributes
----------
primal_factory : VectorFactory
Factory for DesignVector objects.
dual_factory : VectorFactory
Factory for DualVector objects.
mu : float
Quadratic subproblem constraint penalty factor.
grad_scale : float
Scaling coefficient for the primal space.
feas_scale : float
Scaling coefficient for the dual space.
lin_depend : boolean
Flag for Hessian linear dependence.
neg_curv : boolean
Flag for negative curvature in the search direction.
trust_active : boolean
Flag for trust-region detection.
Parameters
----------
vector_factories : tuple of VectorFactory
A pair of vector factories, one for Primal and one for Dual type.
optns : dict, optional
Optiona dictionary
"""
def __init__(self, vector_factories, optns=None):
super(FLECS, self).__init__(vector_factories, optns)
self.rel_tol = get_opt(self.optns, 1e-2, 'rel_tol')
self.abs_tol = get_opt(self.optns, 1e-12, 'abs_tol')
# set a default trust radius
# NOTE: this will be set by the optimization algorithm later
self.radius = 1.0
self.trust_active = False
# set a default quadratic subproblem constraint penalty
# NOTE: this will be set by the optimization algorithm later
self.mu = 0.1
# get scalings
self.grad_scale = get_opt(self.optns, 1.0, 'grad_scale')
self.feas_scale = get_opt(self.optns, 1.0, 'feas_scale')
# extract vector factories from the factory array
self.primal_factory = None
self.eq_factory = None
self.ineq_factory = None
for factory in vector_factories:
if factory._vec_type is DesignVector:
self.primal_factory = factory
elif factory._vec_type is DualVectorEQ:
self.eq_factory = factory
elif factory._vec_type is DualVectorINEQ:
self.ineq_factory = factory
# put in memory request
self.primal_factory.request_num_vectors(2*self.max_iter + 2)
if self.eq_factory is not None:
self.eq_factory.request_num_vectors(2*self.max_iter + 2)
if self.ineq_factory is not None:
self.ineq_factory.request_num_vectors(2*(2*self.max_iter + 2))
# initialize vector holder arrays
self.V = []
self.Z = []
def _generate_vector(self):
design = self.primal_factory.generate()
if self.eq_factory is not None and self.ineq_factory is not None:
slack = self.ineq_factory.generate()
primal = CompositePrimalVector(design, slack)
dual_eq = self.eq_factory.generate()
dual_ineq = self.ineq_factory.generate()
dual = CompositeDualVector(dual_eq, dual_ineq)
elif self.eq_factory is not None:
primal = design
dual = self.eq_factory.generate()
elif self.ineq_factory is not None:
slack = self.ineq_factory.generate()
primal = CompositePrimalVector(design, slack)
dual = self.ineq_factory.generate()
return ReducedKKTVector(primal, dual)
def _validate_options(self):
super(FLECS, self)._validate_options()
assert self.primal_factory is not None
if self.eq_factory is None:
assert self.ineq_factory is not None
def _reset(self):
# clear out all the vectors stored in V
# the data goes back to the stack and is used again later
for vector in self.V:
if self.eq_factory is not None and self.ineq_factory is not None:
del vector.primal.design
del vector.primal.slack
del vector.primal
del vector.dual.eq
del vector.dual.ineq
del vector.dual
elif self.eq_factory is not None:
del vector.primal
del vector.dual
elif self.ineq_factory is not None:
del vector.primal.design
del vector.primal.slack
del vector.dual
del vector
self.V = []
# clear out all vectors stored in Z
# the data goes back to the stack and is used again later
for vector in self.Z:
if self.eq_factory is not None and self.ineq_factory is not None:
del vector.primal.design
del vector.primal.slack
del vector.primal
del vector.dual.eq
del vector.dual.ineq
del vector.dual
elif self.eq_factory is not None:
del vector.primal
del vector.dual
elif self.ineq_factory is not None:
del vector.primal.design
del vector.primal.slack
del vector.dual
del vector
self.Z = []
# force garbage collection
gc.collect()
def _write_header(self, norm0, grad0, feas0):
self.out_file.write(
'#-------------------------------------------------\n' +
'# FLECS convergence history\n' +
'# residual tolerance target = %e\n'%self.rel_tol +
'# initial residual norm = %e\n'%norm0 +
'# initial gradient norm = %e\n'%grad0 +
'# initial constraint norm = %e\n'%feas0 +
'# iters' + ' '*5 +
'rel. res. ' + ' '*5 +
'rel. grad. ' + ' '*5 +
'rel. feas. ' + ' '*5 +
'aug. feas. ' + ' '*5 +
'pred ' + ' '*5 +
'pred. aug. ' + ' '*5 +
'mu ' + '\n'
)
def _write_history(self, res, grad, feas, feas_aug):
self.out_file.write(
' %5i'%self.iters + ' '*5 +
'%10e'%res + ' '*5 +
'%10e'%grad + ' '*5 +
'%10e'%feas + ' '*5 +
'%10e'%feas_aug + ' '*5 +
'%10e'%self.pred + ' '*5 +
'%10e'%self.pred_aug + ' '*5 +
'%10e'%self.mu + '\n'
)
[docs] def apply_correction(self, cnstr, step):
# perform some aliasing to improve readability
ZtZ_prim_r = self.ZtZ_prim[0:self.iters, 0:self.iters]
VtV_dual_r = self.VtV_dual[0:self.iters, 0:self.iters+1]
H_r = self.H[0:self.iters+1, 0:self.iters]
# construct and solve the subspace problem
VtVH = VtV_dual_r.dot(H_r)
A = ZtZ_prim_r + VtVH + VtVH.T
rhs = numpy.zeros(self.iters)
for k in xrange(self.iters):
rhs[k] = self.V[k].dual.inner(cnstr)
self.y = numpy.linalg.solve(A, rhs)
# construct the design update
# leave the dual solution untouched
for k in xrange(self.iters):
step.primal.equals_ax_p_by(
1.0, step.primal, self.y[k], self.Z[k].primal)
# trust radius check
# NOTE: THIS IS TEMPORARY
if step.primal.norm2 > self.radius:
self.trust_active = True
step.primal.times(self.radius/step.primal.norm2)
# compute a new predicted reduction associated with this step
# self.pred_aug = -self.y.dot(0.5*numpy.array(A.dot(self.y)) + rhs)
[docs] def solve_subspace_problems(self):
# extract some work arrays
y_r = self.y[0:self.iters]
g_r = self.g[0:self.iters+1]
H_r = self.H[0:self.iters+1, 0:self.iters]
VtZ_r = self.VtZ[0:self.iters+1, 0:self.iters]
VtZ_prim_r = self.VtZ_prim[0:self.iters+1, 0:self.iters]
VtZ_dual_r = self.VtZ_dual[0:self.iters+1, 0:self.iters]
VtV_dual_r = self.VtV_dual[0:self.iters+1, 0:self.iters+1]
ZtZ_prim_r = self.ZtZ_prim[0:self.iters, 0:self.iters]
# solve the reduced (primal-dual) problem (i.e.: FGMRES solution)
y_r, _ , _, _ = numpy.linalg.lstsq(H_r, g_r)
# make sure the data is persistent
self.y[0:self.iters] = y_r[:]
# compute residuals
res_red = H_r.dot(y_r) - g_r
self.beta = numpy.linalg.norm(res_red)
self.gamma = numpy.inner(res_red, VtV_dual_r.dot(res_red))
self.omega = -self.gamma
self.gamma = sqrt(max(self.gamma, 0.0))
self.omega = sqrt(max(numpy.inner(res_red, res_red) + self.omega, 0.0))
# find the Hessian of the objective and the Hessian of the augmented
# Lagrangian in the reduced space
Hess_red = 0.5*(VtZ_r.T.dot(H_r) + H_r.T.dot(VtZ_r)) \
- VtZ_dual_r.T.dot(H_r) \
- H_r.T.dot(VtZ_dual_r)
VtVH = VtV_dual_r.dot(H_r)
Hess_aug = self.mu * H_r.T.dot(VtVH)
Hess_aug += Hess_red
# compute the RHS for the augmented Lagrangian problem
rhs_aug = numpy.zeros(self.iters)
for k in xrange(self.iters):
rhs_aug[k] = -self.g[0]*(self.VtZ_prim[0, k] + self.mu*VtVH[0, k])
radius_aug = self.radius
try:
# compute the transformation to apply trust-radius directly
rhs_tmp = numpy.copy(rhs_aug)
# NOTE: Numpy Cholesky always returns a lower triangular matrix
# but UTU in C++ version contains both upper and lower information
L = numpy.linalg.cholesky(ZtZ_prim_r)
rhs_aug = solve_tri(L, rhs_tmp, lower=True)
for j in xrange(self.iters):
rhs_tmp[:] = Hess_aug[:,j]
vec_tmp = solve_tri(L, rhs_tmp, lower=True)
Hess_aug[:, j] = vec_tmp[:]
for j in xrange(self.iters):
rhs_tmp[:] = Hess_aug[j,:]
vec_tmp = solve_tri(L, rhs_tmp, lower=True)
Hess_aug[j,:] = vec_tmp[:]
vec_tmp, lamb, self.pred_aug = solve_trust_reduced(
Hess_aug, rhs_aug, radius_aug)
self.y_aug = solve_tri(L.T, vec_tmp, lower=False)
except numpy.linalg.LinAlgError:
# if Cholesky factorization fails, compute a conservative radius
eig_vals, _ = eigen_decomp(ZtZ_prim_r)
radius_aug = self.radius/sqrt(eig_vals[self.iters-1])
self.y_aug, lamb, self.pred_aug = solve_trust_reduced(
Hess_aug, rhs_aug, radius_aug)
# check if the trust-radius constraint is active
self.trust_active = False
if lamb > 10.*EPS:
self.trust_active = True
# compute residual norms for the augmented Lagrangian solution
res_red = H_r.dot(self.y_aug) - g_r
self.beta_aug = numpy.linalg.norm(res_red)
self.gamma_aug = numpy.inner(res_red, VtV_dual_r.dot(res_red))
self.gamma_aug = sqrt(max(self.gamma_aug, 0.0))
# set the dual reduced-space solution
self.y_mult.resize(self.iters)
self.y_mult[:] = y_r[:]
# compute the predicted reductions in the objective
self.pred = \
-0.5*numpy.inner(y_r, Hess_red.dot(y_r)) \
+ self.g[0]*numpy.inner(VtZ_prim_r[0, 0:self.iters], y_r)
# determine if negative curvature may be present
self.neg_curv = False
if (self.pred_aug - self.pred) > 0.05*abs(self.pred):
self.neg_curv = True
[docs] def solve(self, mat_vec, b, x, precond):
# validate solver options
self._validate_options()
# reset vector memory for a new fresh solution
self._reset()
# initialize some work data
self.g = numpy.zeros(self.max_iter + 1)
self.y = numpy.zeros(self.max_iter)
self.H = numpy.zeros((self.max_iter + 1, self.max_iter))
self.VtZ = numpy.zeros((self.max_iter + 1, self.max_iter))
self.ZtZ_prim = numpy.zeros((self.max_iter, self.max_iter))
self.VtZ_prim = numpy.zeros((self.max_iter + 1, self.max_iter))
self.VtZ_dual = numpy.zeros((self.max_iter + 1, self.max_iter))
self.VtV_dual = numpy.zeros((self.max_iter + 1, self.max_iter + 1))
self.y_aug = numpy.array([])
self.y_mult = numpy.array([])
self.iters = 0
# generate residual vector
res = self._generate_vector()
res.equals(b)
# calculate norm of rhs vector
grad0 = b.primal.norm2
feas0 = max(b.dual.norm2, EPS)
norm0 = b.norm2
# calculate initial (negative) residual and compute its norm
self.V.append(self._generate_vector())
self.V[0].equals(b)
self.V[0].primal.times(self.grad_scale)
self.V[0].dual.times(self.feas_scale)
# normalize the residual
self.beta = self.V[0].norm2
self.V[0].divide_by(self.beta)
self.VtV_dual[0, 0] = self.V[0].dual.inner(self.V[0].dual)
self.gamma = self.beta*sqrt(max(self.VtV_dual[0, 0], 0.0))
self.omega = sqrt(max(self.beta**2 - self.gamma**2, 0.0))
# initialize RHS of the reduced system
self.g[0] = self.beta
# output header information
self._write_header(norm0, grad0, feas0)
self.beta_aug = self.beta
self.gamma_aug = self.gamma
res_norm = norm0
self.pred = 0.0
self.pred_aug = 0.0
self._write_history(
res_norm/norm0,
self.omega/(self.grad_scale*grad0),
self.gamma/(self.feas_scale*feas0),
self.gamma_aug/(self.feas_scale*feas0))
# loop over all search directions
#################################
self.lin_depend = False
self.neg_curv = False
self.trust_active = False
for i in xrange(self.max_iter):
# advance iteration counter
self.iters += 1
# precondition self.V[i] and store results in self.Z[i]
self.Z.append(self._generate_vector())
precond(self.V[i], self.Z[i])
# add to Krylov subspace
self.V.append(self._generate_vector())
self.Z[i].primal.times(self.grad_scale)
self.Z[i].dual.times(self.feas_scale)
mat_vec(self.Z[i], self.V[i+1])
self.Z[i].primal.divide_by(self.grad_scale)
self.Z[i].dual.divide_by(self.feas_scale)
self.V[i+1].primal.times(self.grad_scale)
self.V[i+1].dual.times(self.feas_scale)
# modified Gram-Schmidt orthonormalization
try:
mod_GS_normalize(i, self.H, self.V)
except numpy.linalg.LinAlgError:
self.lin_depend = True
# compute new row and column of the VtZ matrix
for k in xrange(i+1):
self.VtZ_prim[k, i] = self.V[k].primal.inner(self.Z[i].primal)
self.VtZ_prim[i+1, k] = self.V[i+1].primal.inner(
self.Z[k].primal)
self.VtZ_dual[k, i] = self.V[k].dual.inner(self.Z[i].dual)
self.VtZ_dual[i+1, k] = self.V[i+1].dual.inner(self.Z[k].dual)
self.VtZ[k, i] = self.VtZ_prim[k, i] + self.VtZ_dual[k, i]
self.VtZ[i+1, k] = self.VtZ_prim[i+1, k] + self.VtZ_dual[i+1, k]
self.ZtZ_prim[k, i] = self.Z[k].primal.inner(self.Z[i].primal)
self.ZtZ_prim[i, k] = self.Z[i].primal.inner(self.Z[k].primal)
self.VtV_dual[k, i+1] = self.V[k].dual.inner(self.V[i+1].dual)
self.VtV_dual[i+1, k] = self.V[i+1].dual.inner(self.V[k].dual)
self.VtV_dual[i+1, i+1] = self.V[i+1].dual.inner(self.V[i+1].dual)
# solve the reduced problems and compute the residual
self.solve_subspace_problems()
# calculate the new residual norm
res_norm = (self.gamma/self.feas_scale)**2 + \
(self.beta**2 - self.gamma**2)/(self.grad_scale**2)
res_norm = sqrt(max(res_norm, 0.0))
# write convergence history
self._write_history(
res_norm/norm0,
self.omega/(self.grad_scale*grad0),
self.gamma/(self.feas_scale*feas0),
self.gamma_aug/(self.feas_scale*feas0))
# check for convergence
if ((self.gamma < self.rel_tol*self.feas_scale*feas0) and
(self.omega < self.rel_tol*self.grad_scale*grad0)) or \
((self.gamma < self.abs_tol) and (self.omega < self.abs_tol)):
break
# check for breakdown
if self.lin_depend:
break
#########################################
# finished looping over search directions
if self.neg_curv:
self.out_file.write('# negative curvature suspected\n')
if self.trust_active:
self.out_file.write('# trust-radius constraint active\n')
# compute solution: augmented-Lagrangian for primal, FGMRES for dual
x.equals(0.0)
for k in xrange(self.iters):
x.primal.equals_ax_p_by(
1.0, x.primal, self.y_aug[k], self.Z[k].primal)
x.dual.equals_ax_p_by(
1.0, x.dual, self.y_mult[k], self.Z[k].dual)
# scale the solution
x.primal.times(self.grad_scale)
x.dual.times(self.feas_scale)
# check residual
if self.check_res:
# calculate true residual for the solution
self.V[0].equals(0.0)
for k in xrange(self.iters):
self.V[0].equals_ax_p_by(
1.0, self.V[0], self.y_mult[k], self.Z[k])
mat_vec(self.V[0], res)
res.equals_ax_p_by(1.0, b, -1.0, res)
true_res = res.norm2
true_feas = res.dual.norm2
# print residual information
out_data = true_res/norm0
self.out_file.write(
'# FLECS final (true) rel. res. : ' +
'|res|/|res0| = %e\n'%out_data
)
# print constraint information
out_data = true_feas/feas0
self.out_file.write(
'# FLECS final (true) rel. feas. : ' +
'|feas|/|feas0| = %e\n'%out_data
)
# print warning for residual disagreement
if (abs(true_res - res_norm) > 0.01*self.rel_tol*norm0):
out_data = (true_res - res_norm)/norm0
self.out_file.write(
'# WARNING in FLECS: true residual norm and ' +
'calculated residual norm do not agree.\n' +
'# (res - beta)/res0 = %e\n'%out_data
)
# print warning for constraint disagreement
computed_feas = self.gamma/self.feas_scale
if (abs(true_feas - computed_feas) > 0.01*self.rel_tol*feas0):
out_data = (true_feas - computed_feas)/feas0
self.out_file.write(
'# WARNING in FLECS: true constraint norm and ' +
'calculated constraint norm do not agree.\n' +
'# (feas_true - feas_comp)/feas0 = %e\n'%out_data
)
[docs] def re_solve(self, b, x):
# calculate norms for the RHS vector
grad0 = b.primal.norm2
feas0 = max(b.dual.norm2, EPS)
norm0 = sqrt(grad0**2 + feas0**2)
# reset some prior data and re-solve the subspace problem
self.y_aug = numpy.array([])
self.y_mult = numpy.array([])
self.neg_curv = False
self.trust_active = False
self.radius /= self.grad_scale
self.solve_subspace_problems()
# compute residual norm
res_norm = (self.gamma/self.feas_scale)**2 + \
(self.beta**2 + self.gamma**2)/(self.grad_scale**2)
res_norm = sqrt(max(res_norm, 0.0))
# write into solution history
self.out_file.write(
'#-------------------------------------------------\n' +
'# FLECS resolving at new radius\n')
self._write_history(
res_norm/norm0,
self.omega/(self.grad_scale*grad0),
self.gamma/(self.feas_scale*feas0),
self.gamma_aug/(self.feas_scale*feas0))
if self.neg_curv:
self.out_file.write('# negative curvature suspected\n')
if self.trust_active:
self.out_file.write('# trust-radius constraint active\n')
# always use composite-step approach in re-solve
x.equals(0.0)
for k in xrange(self.iters):
x.primal.equals_ax_p_by(
1.0, x.primal, self.y_aug[k], self.Z[k].primal)
x.dual.equals_ax_p_by(
1.0, x.dual, self.y_mult[k], self.Z[k].dual)
x.primal.times(self.grad_scale)
x.dual.times(self.feas_scale)
# package imports at the bottom to prevent errors
import gc
import numpy
from numpy import sqrt
from kona.options import get_opt
from kona.linalg.vectors.common import *
from kona.linalg.vectors.composite import ReducedKKTVector
from kona.linalg.vectors.composite import CompositePrimalVector
from kona.linalg.solvers.util import \
solve_tri, solve_trust_reduced, eigen_decomp, mod_GS_normalize, EPS