Source code for kona.linalg.solvers.util

import numpy as np

try:
    from scipy.linalg import solve_triangular
    scipy_exists = True
except Exception:
    scipy_exists = False

EPS = np.finfo(np.float64).eps
EPS32 = np.finfo(np.float32).eps

[docs]def abs_sign(x, y): """ Returns the value :math:`|x|\\mathsf{sign}(y)`; used in GMRES, for example. Parameters ---------- x : float y : float Returns ------- float : :math:`|x|\\mathsf{sign}(y)` """ return abs(x)*np.sign(y)
[docs]def calc_epsilon(eval_at_norm, mult_by_norm): """ Determines the perturbation parameter for forward-difference based matrix-vector products Parameters ---------- eval_at_norm : float the norm of the vector at which the Jacobian-like matrix is evaluated mult_by_norm : float the norm of the vector that is being multiplied Returns ------- float : perturbation parameter """ if mult_by_norm < EPS*eval_at_norm or mult_by_norm < EPS: # multiplying vector is zero in a relative or absolute sense return 1.0 else: if eval_at_norm < EPS*mult_by_norm: # multiplying vector dominates, so treat eval_at vector like zero return np.sqrt(EPS)/mult_by_norm else: return np.sqrt(EPS)*eval_at_norm/mult_by_norm
[docs]def eigen_decomp(A): """ Returns the (sorted) eigenvalues and eigenvectors of the symmetric part of a square matrix. The matrix A is stored in dense format and is not assumed to be exactly symmetric. The eigenvalues are found by calling np.linalg.eig, which is given 0.5*(A^T + A) as the input matrix, not A itself. Parameters ---------- A : 2-D numpy.ndarray matrix stored in dense format; not necessarily symmetric Returns ------- eig_vals : 1-D numpy.ndarray the eigenvalues in ascending order eig_vecs : 2-D numpy.ndarray the eigenvectors sorted appropriated """ eig_vals, eig_vec = np.linalg.eig(0.5*(A + A.T)) idx = eig_vals.real.argsort() # eig doesn't sort, so we have to inv_idx = np.empty(idx.shape[0], dtype=int) inv_idx[idx] = np.arange(idx.shape[0]) return eig_vals[idx].real, eig_vec[:,inv_idx].real
[docs]def apply_givens(s, c, h1, h2): """ Applies a Givens rotation to a 2-vector Parameters ---------- s : float sine of the Givens rotation angle c : float cosine of the Givens rotation angle h1 : float first element of 2x1 vector being transformed h2 : float second element of 2x1 vector being transformed """ temp = c*h1 + s*h2 h2 = c*h2 - s*h1 h1 = temp return h1, h2
[docs]def generate_givens(dx, dy): """ Generates the Givens rotation matrix for a given 2-vector Based on givens() of SPARSKIT, which is based on p.202 of "Matrix Computations" by Golub and van Loan. Parameters ---------- dx : float element of 2x1 vector being transformed dy : float element of 2x1 vector being set to zero Returns ------- dx : float element of 2x1 vector being transformed dy : float element of 2x1 vector being set to zero s : float sine of the Givens rotation angle c : float cosine of the Givens rotation angle """ if dx == 0.0 and dy == 0.0: c = 1.0 s = 0.0 elif abs(dy) > abs(dx): tmp = dx/dy dx = np.sqrt(1.0 + tmp*tmp) s = abs_sign(1.0/dx, dy) c = tmp*s elif abs(dy) <= abs(dx): tmp = dy/dx dy = np.sqrt(1.0 + tmp*tmp) c = abs_sign(1.0/dy, dx) s = tmp*c else: # dx and/or dy must be invalid dx = 0.0 dy = 0.0 c = 1.0 s = 0.0 dx = abs(dx*dy) dy = 0.0 return dx, dy, s, c
[docs]def lanczos_tridiag(mat_vec, Q, Q_init=False): """ Uses the traditional Lanczos algorithm to compute a tridiagonalization. Since this is based on the Arnoldi's method, we only require a matrix-vector product for the matrix of interest, and not the full explicit matrix itself. Parameters ---------- mat_vec : function Matrix-vector product for a symmetric matrix. Q : List[KonaVector] Pre-allocated subspace array containing KonaVectors matching the vector-type of the product Q_init : boolean If `True`, start the V-subspace with the Q[0] already stored. If 'False', generate a vector of ones in Q[0] to start with. Returns ------- T : array_like Tri-diagonalization of the matrix """ # do a bunch of checks and raise errors if len(Q) < 1: raise ValueError('Subspace container Q too small!') # size up the problem subspace_size = len(Q) T = np.zeros((subspace_size-1, subspace_size)) if not Q_init: Q[0].equals(1.0) Q[0].divide_by(Q[0].norm2) for i in xrange(subspace_size-1): # perform the matrix vector product mat_vec(Q[i], Q[i+1]) # orthogonalize the new vector H = np.zeros((i+2, i+1)) mod_GS_normalize(i, H, Q) # extract alpha from the orthonogalization T[i, i] = H[i, i] # extract beta from the orthonogalization T[i, i+1] = H[i+1, i] if i < subspace_size - 2: T[i+1, i] = H[i+1, i] return T
[docs]def lanczos_bidiag(fwd_mat_vec, Q, q_work, rev_mat_vec, P, p_work, Q_init=False): """ Uses the bi-orthogonal Lanczos algorithm to bidiagonalize a matrix. Since this is based on the Arnoldi's method, we only require a matrix-vector product for the matrix of interest, and not the full explicit matrix itself. Parameters ---------- fwd_mat_vec : function Forward matrix-vector product for the matrix of interest Q : List[KonaVector] Pre-allocated subspace array containing KonaVectors matching the vector-type of the forward product q_work : KonaVector Work vector matching the KonaVector-type of the forward product rev_mat_vec : function Reverse (transpose) matrix-vector product for the matrix of interest P : List[KonaVector] Pre-allocated subspace array containing KonaVectors matching the vector-type of the reverse (transpose) product p_work : KonaVector Work vector matching the KonaVector-type of the reverse product Q_init : boolean If `True`, start the V-subspace with the Q[0] already stored. If 'False', generate a vector of ones in Q[0] to start with. Returns ------- B : array_like Truncated bi-diagonalization of the matrix """ # do a bunch of checks and raise errors if len(P) < 1: raise ValueError('Subspace container P too small!') if len(Q) < 2: raise ValueError('Subspace container Q too small!') if len(P) != len(Q) - 1: raise ValueError('Subspace containers have different sizes!') # size up the problem subspace_size = len(P) B = np.zeros((subspace_size, subspace_size+1)) if not Q_init: Q[0].equals(1.0) Q[0].divide_by(Q[0].norm2) for j in xrange(subspace_size): fwd_mat_vec(Q[j], P[j]) if j > 0: p_work.equals(P[j-1]) p_work.times(-B[j-1, j]) else: p_work.equals(0.0) P[j].plus(p_work) B[j, j] = P[j].norm2 # alpha P[j].divide_by(B[j, j]) rev_mat_vec(P[j], Q[j+1]) H = np.zeros((j+2, j+1)) mod_GS_normalize(j, H, Q) B[j, j+1] = H[-1, -1] # beta return B
[docs]def solve_tri(A, b, lower=False): """ Solve an upper-triangular system :math:`Ux = b` (lower=False) or lower-triangular system :math:`Lx = b` (lower=True) Parameters ---------- A : 2-D numpy.matrix a triangular matrix b : 1-D numpy.ndarray the right-hand side of the system x : 1-D numpy.ndarray on exit, the solution lower : boolean if True, A stores an lower-triangular matrix; stores an upper-triangular matrix otherwise """ if scipy_exists: x = solve_triangular(A, b, lower=lower) else: x = np.linalg.solve(A, b) return x
[docs]def solve_trust_reduced(H, g, radius): """ Solves the reduced-space trust-region subproblem (the secular equation) This assumes the reduced space objective is in the form :math:`g^Tx + \\frac{1}{2}x^T H x`. Furthermore, the case :math:`g = 0` is not handled presently. Parameters ---------- H : 2-D numpy.matrix reduced-space Hessian g : 1-D numpy.ndarray gradient in the reduced space radius : float trust-region radius Returns ------- y : 1-D numpy.ndarray solution to reduced-space trust-region problem lam : float Lagrange multiplier value pred : float predicted decrease in the objective """ if radius < 0.0: raise ValueError( 'trust-region radius must be nonnegative: radius = %f'%radius) if H.shape[0] != H.shape[1] != g.shape[0]: raise ValueError('reduced Hessian or gradient shape inconsistency') eig_vals, eig = eigen_decomp(H) eigmin = eig_vals[0] lam = 0.0 n = H.shape[0] if eigmin > 1e-12: # Hessian is semi-definite on span(Z), so solve for y and check if ||y|| # is in trust region radius y, fnc, dfnc = secular_function(H, g, lam, radius) if (fnc < 0.0): # i.e. norm_2(y) < raidus # compute predicted decrease in objective and return pred = 0.0 for i in xrange(n): for j in xrange(n): pred -= 0.5*y[i]*H[i,j]*y[j] pred -= g[i]*y[i] return y, lam, pred # if we get here, either the Hessian is semi-definite or ||y|| > radius # find upper bound for bracket, lam < lam_h max_brk = 20 dlam = 0.1*max(-eigmin, EPS) lam_h = max(-eigmin, 0.0) + dlam y, fnc_h, dfnc = secular_function(H, g, lam_h, radius) for k in xrange(max_brk): if fnc_h > 0.0: break dlam *= 0.1 lam_h = max(-eigmin, 0.0) + dlam y, fnc_h, dfnc = secular_function(H, g, lam_h, radius) # find lower bound for bracket, lam_l < lam dlam = np.sqrt(EPS) lam_l = max(-eigmin, 0.0) + dlam y, fnc_l, dfnc = secular_function(H, g, lam_l, radius) for k in xrange(max_brk): if fnc_l < 0.0: break dlam *= 100.0 lam_l = max(-eigmin, 0.0) + dlam y, fnc_l, dfnc = secular_function(H, g, lam_l, radius) # Apply (safe-guarded) Newton's method to find lam dlam_old = abs(lam_h - lam_l) dlam = dlam_old max_Newt = 50 tol = np.sqrt(EPS) lam_tol = np.sqrt(EPS)*dlam lam = 0.5*(lam_l + lam_h) y, fnc, dfnc = secular_function(H, g, lam, radius) res0 = abs(fnc) for l in xrange(max_Newt): # check if y lies on the trust region; if so, exit loop if (abs(fnc) < tol*res0) or (abs(dlam) < lam_tol): break # choose safe-guarded step if ((lam - lam_h)*dfnc - fnc)*((lam - lam_l)*dfnc - fnc) > 0.0 or \ abs(2.0*fnc) > abs(dlam_old*dfnc): # use bisection if Newton is out of range or not decreasing fast dlam_old = dlam dlam = 0.5*(lam_h - lam_l) lam = lam_l + dlam if (lam_l == lam): break else: # Newton-step is acceptable dlam_old = dlam dlam = fnc/dfnc temp = lam lam -= dlam if (temp == lam): break # evaluate function at new lambda y, fnc, dfnc = secular_function(H, g, lam, radius) if fnc < 0.0: lam_l = lam else: lam_h = lam if l+1 == max_Newt: # Newton's method failed to converge raise Exception("Newton's method failed to converge to a valid lambda") # compute predicted decrease in objective pred = -y.dot(0.5*np.array(H.dot(y)) + g) return y, lam, pred
def secular_function(H, g, lam, radius): """ Computes the secular-equation residual and its derivative for solve_trust_reduced. Parameters ---------- H : 2-D numpy.ndarray reduced-space Hessian g : 1-D numpy.ndarray gradient in the reduced space lam : float Lagrange multiplier value radius : float trust-region radius Returns ------- y : 1-D numpy.ndarray the step fnc : float the value of the secular equation dfnc : float the derivative of the secular equation with respect to lam """ if radius < 0.0: raise ValueError( 'trust-region radius must be nonnegative: radius = %f'%radius) if H.shape[0] != H.shape[1] != g.shape[0]: raise ValueError('reduced Hessian or gradient shape inconsistency') # perform Cholesky factorization, with regularization if necessary diag = max(1.0, lam)*0.01*EPS semidefinite = True reg_iter = 0 max_iter = 20 while semidefinite: reg_iter += 1 try: H_hat = H + np.eye(H.shape[0])*(lam + diag) L = np.linalg.cholesky(H_hat) semidefinite = False except np.linalg.LinAlgError: diag *= 100.0 if reg_iter >= max_iter: break if semidefinite: raise Exception('Regularization of Cholesky factorization failed') work = solve_tri(L, g, lower=True) y = solve_tri(L.T, work, lower=False) y *= -1. # compute the secular function norm_y = np.linalg.norm(y) fnc = 1.0/radius - 1.0/norm_y # compute its derivative work = solve_tri(L, y, lower=True) norm_work = np.linalg.norm(work) dfnc = -((norm_work/norm_y)**2)/norm_y return y, fnc, dfnc
[docs]def mod_GS_normalize(i, Hsbg, w): reorth = 0.98 # get the norm of the vector being orthogonalized, and find the # threshold for re-orthogonalization nrm = w[i+1].inner(w[i+1]) thr = nrm*reorth if abs(nrm) <= EPS: # norm of w[i+1] is effectively zero; it is linearly dependent # raise a LinAlgError to catch later raise np.linalg.LinAlgError elif nrm < -EPS: # the norm of w[i+1] < 0.0 raise ValueError('mod_gram_schmidt failed : w[i+1].inner(w[i+1]) < 0.0') elif np.isnan(nrm): raise ValueError('mod_gram_schmidt failed : w[i+1] = NaN') if i < 0: # just normalize and exit w[i+1].divide_by(np.sqrt(nrm)) return # begin main Gram-Schmidt loop for k in xrange(i+1): prod = w[i+1].inner(w[k]) Hsbg[k, i] = prod w[i+1].equals_ax_p_by(1.0, w[i+1], -prod, w[k]) # check if reorthogonalization is necessary if (prod**2 > thr): prod = w[i+1].inner(w[k]) Hsbg[k, i] += prod w[i+1].equals_ax_p_by(1.0, w[i+1], -prod, w[k]) # update norm and check size nrm -= Hsbg[k, i]**2 if (nrm < 0.0): nrm = 0.0 thr = nrm*reorth # test the resulting vector nrm = w[i+1].norm2 Hsbg[i+1, i] = nrm if (nrm <= 0.0): # norm of w[i+1] is effectively zero; it is linearly dependent # raise a LinAlgError to catch later raise np.linalg.LinAlgError else: # scale the resulting vector and exit w[i+1].divide_by(nrm) return
[docs]def mod_gram_schmidt(i, B, C, w, normalize=False): # this version does not normalize w, it just makes w orthogonal to the # vectors in C, and stores the coefficients in the ith column of B if len(C) <= 0: # nothing to do, exit return # get the norm of the vector being orthogonalized, and find the # threshold for re-orthogonalization reorth = 0.98 nrm = w.inner(w) thr = nrm*reorth if abs(nrm) <= EPS: # norm of w is effectively zero; it is linearly dependent # raise a LinAlgError to catch later raise np.linalg.LinAlgError elif nrm < -EPS: # the norm of w < 0.0 raise ValueError('mod_gram_schmidt failed : w.inner(w) < 0.0') elif np.isnan(nrm): raise ValueError('mod_gram_schmidt failed : w = NaN') # begin main Gram-Schmidt loop for k in xrange(len(C)): prod = w.inner(C[k]) B[k, i] = prod w.equals_ax_p_by(1.0, w, -prod, C[k]) # check if reorthogonalization is necessary if (prod**2 > thr): prod = w.inner(C[k]) B[k, i] += prod w.equals_ax_p_by(1.0, w, -prod, C[k]) # update norm and check size nrm -= B[k, i]**2 if (nrm < 0.0): nrm = 0.0 thr = nrm*reorth # test the resulting vector nrm = w.norm2 if (nrm <= 0.0): # norm of w is effectively zero; it is linearly dependent # raise a LinAlgError to catch later raise np.linalg.LinAlgError else: if normalize: w.divide_by(nrm) return
[docs]def write_header(out_file, solver_name, res_tol, res_init): """ Writes krylov solver data file header text. Parameters ---------- out_file : file File handle for write destination solver_name : string Name of Krylov solver type. res_tol : float Residual tolerance for convergence. res_init : float Initial residual norm. """ out_file.write( '# %s residual history\n'%solver_name + '# residual tolerance target = %e\n'%res_tol + '# initial residual norm = %e\n'%res_init + '# iters' + ' '*12 + 'rel. res.\n' )
[docs]def write_history(out_file, num_iter, res, res_init): """ Writes krylov solver data file iteration history. Parameters ---------- out_file : file File handle for write destination num_iter : int Current iteration count. res : float Current residual norm. res_init : float Initial residual norm. """ out_file.write( ' %6i'%num_iter + ' '*12 + '%e\n'%(res/res_init) )