Source code for kona.algorithms.util.linesearch

import sys

import numpy as np

from kona.options import get_opt
from kona.algorithms.util.merit import MeritFunction

################################################################################
#                        Step Interpolation Functions                          #
################################################################################

def quadratic_step(alpha_low, alpha_hi, f_low, f_hi, df_low):
    """
    Finds a new step between ``alpha_low`` and ``alpha_hi`` using quadratic
    interpolation.

    Parameters
    ----------
    alpha_low : float
        Lower bound for step calculations.
    alpha_hi : float
        Upper bound for step calculations.
    f_low : float
        Function value at lower bound.
    f_hi : float
        Function value at upper bound.
    df_low : float
        Function derivative at lower bound.

    Returns
    -------
    float : Interpolated step size.
    """
    dalpha = alpha_hi - alpha_low
    step = alpha_low - 0.5*df_low*(dalpha**2)/(f_hi - f_low - df_low*dalpha)

    min_alpha = min(alpha_hi, alpha_low)
    max_alpha = max(alpha_hi, alpha_low)
    if max_alpha < step < min_alpha:
        raise Exception('quadratic_step(): step = %f\n'%step +
                        'alpha_low = %f\n'%alpha_low +
                        'alpha_hi = %f\n'%alpha_hi +
                        'f_low = %f\n'%f_low +
                        'f_hi = %f\n'%f_hi +
                        'df_low = %f\n'%df_low +
                        '>> Check StrongWolfe._zoom() for bugs! <<')

    if (step - min_alpha) < 1.e-2*(max_alpha - min_alpha):
        step = 0.5*(alpha_low + alpha_hi)

    return step

################################################################################
#                           Base Line-Search Object                            #
################################################################################

[docs]class LineSearch(object): """ Base class for all line search algorithms. Provides rudimentary error-checking functionality, and an interface that should be adhered to when writing new line search functions. Attributes ---------- decr_cond : float Sufficient decrease condition. max_iter : int Maximum iterations for the line search. out_file : file File stream for writing data. Parameters ---------- optns : dict out_file : file """ def __init__(self, optns={}, out_file=sys.stdout): self.max_iter = get_opt(optns, 10, 'max_iter') self.decr_cond = get_opt(optns, 1e-4, 'decr_cond') self.out_file = out_file def _validate_options(self): if not (self.max_iter >= 1): raise ValueError('max_iter must be a positive integer') if not (0 < self.decr_cond < 1): raise ValueError('suff_cond must be 0 < suff_cond < 1')
[docs] def find_step_length(self, merit): """ Find an appropriate step size for the given merit function that leads to the minimum in the search direction. Parameters ---------- merit : MeritFunc-like Merit function object derived from the base MeritFunc class. Returns ------- float : Step size. int : Number of iterations taken for the search. """ raise NotImplementedError # pragma: no cover
################################################################################ # Back Tracking Line-Search # ################################################################################
[docs]class BackTracking(LineSearch): """ Back-tracking line search. Attributes ---------- alpha_init : float Initial step size. alpha_min : float Minimum step size. rdtn_factor : float Reduction factor for the step size at each iteration. p_dot_dfdx : float Value of :math:`\\langle p, \\nabla f \\rangle` at current step. """ def __init__(self, optns={}, out_file=sys.stdout): super(BackTracking, self).__init__(optns, out_file) self.alpha_init = get_opt(optns, 1.0, 'alpha_init') self.alpha_max = self.alpha_init self.alpha_min = get_opt(optns, 1e-3, 'alpha_min') self.rdtn_factor = get_opt(optns, 0.5, 'rdtn_factor') self.p_dot_dfdx = 0.0 def _validate_options(self): if not (0 < self.alpha_init <= 1): raise ValueError('alpha_init must be 0 < alpha_init <=1') if not (0 <= self.rdtn_factor <= 1): raise ValueError('reduction factor must be 0 <= rdtn_factor <= 1') super(BackTracking, self)._validate_options()
[docs] def find_step_length(self, merit): self._validate_options() if not isinstance(merit, MeritFunction): raise ValueError('unknown merit_function type') self.p_dot_dfdx = merit.p_dot_grad if self.p_dot_dfdx >= 0.: raise ValueError('search direction is not a descent direction') alpha = self.alpha_init self.f_init = merit.eval_func(0.0) self.out_file.write('\n') n_iter = 1 while (alpha > self.alpha_min) and (n_iter < self.max_iter): self.out_file.write(' Backtracking Linesearch : iter %i\n'%n_iter) f_sufficient = self.f_init + self.decr_cond*alpha*self.p_dot_dfdx self.f = merit.eval_func(alpha) self.out_file.write(' alpha = %f\n'%alpha) self.out_file.write(' merit val = %e\n'%self.f) self.out_file.write(' sufficient = %e\n'%f_sufficient) if self.f <= f_sufficient: self.out_file.write('\nStep found!\n') return alpha, n_iter else: alpha *= self.rdtn_factor n_iter += 1 self.out_file.write('\n') # if we got here, linesearch failed self.out_file.write('\nLinesearch failed!\n') return self.alpha_min, self.max_iter
################################################################################ # Strong Wolfe Line-Search # ################################################################################
[docs]class StrongWolfe(LineSearch): """ Strong Wolfe line search. Attributes ---------- alpha_init : float Initial step size. alpha_max : float Maximum step size. curv_cond : float Curvature condition to be satisfied. """ def __init__(self, optns={}, out_file=sys.stdout): super(StrongWolfe, self).__init__(optns, out_file) self.alpha_init = get_opt(optns, 1.0, 'alpha_init') self.alpha_max = get_opt(optns, 2.0, 'alpha_max') self.curv_cond = get_opt(optns, 0.9, 'curv_cond') def _validate_options(self): if not (self.alpha_init > 0): raise ValueError('alpha_init must be greater than zero (0)') if (self.alpha_max <= 0.) or (self.alpha_max <= self.alpha_init): raise ValueError('alpha_max must be positive and > alpha_init') if not (self.decr_cond < self.curv_cond < 1): raise ValueError('curv_cond must be suff_cond < curv_cond < 1') super(StrongWolfe, self)._validate_options() def _zoom(self, alpha_low, alpha_hi, phi_low, phi_hi, dphi_low, dphi_hi): merit = self.merit_function # if the lower and upper bounds are equal, this means we just hit # alpha_max, so we will return alpha_max if (alpha_low == alpha_hi) and (alpha_low == self.alpha_max): return self.alpha_max, 0 self.out_file.write('\n') # START OF BIG FOR-LOOP for i in xrange(self.max_iter): self.out_file.write(' Strong-Wolfe Zoom : iter %i\n'%(i+1)) # user interpolation to get the new step alpha_new = quadratic_step(alpha_low, alpha_hi, phi_low, phi_hi, dphi_low) # evaluate the merit function at the interpolated step phi_new = merit.eval_func(alpha_new) # check if this step violates sufficient decrease phi_sufficient = self.phi_init + \ self.decr_cond*alpha_new*self.dphi_init self.out_file.write(' phi low = %e\n'%phi_low) self.out_file.write(' phi hi = %e\n'%phi_hi) self.out_file.write(' phi new = %e\n'%phi_new) self.out_file.write(' phi sufficient = %e\n'%phi_sufficient) if (phi_new > phi_sufficient) and (phi_new >= phi_low): alpha_hi = alpha_new phi_hi = phi_new self.deriv_hi = False # we don't know the derivative at alpha_hi else: # now we evaluate merit grad and check curvature condition dphi_new = merit.eval_grad(alpha_new) if abs(dphi_new) <= -self.curv_cond*self.dphi_init: # we also satisfied curvature condition, so we can stop self.out_file.write('\nStep found!\n') return alpha_new, i elif dphi_new*(alpha_hi - alpha_low) >= 0: # curvature condition was not satisfied # alpha_new and alpha_low bracket a minimum alpha_hi = alpha_low phi_hi = phi_low self.deriv_hi = True # we satisfied sufficient decrease so the new step is low step alpha_low = alpha_new phi_low = phi_new dphi_low = dphi_new self.out_file.write('\n') # END OF BIG FOR-LOOP phi_sufficient = self.phi_init + self.decr_cond*alpha_new*self.dphi_init if phi_new < phi_sufficient: self.out_file.write( '\n>> WARNING : Step found but curvature condition not met! <<\n') return alpha_new, i # if we got here then we didn't find a step raise Exception('StrongWolfe._zoom(): Failed to find a step length!')
[docs] def find_step_length(self, merit): self._validate_options() if not isinstance(merit, MeritFunction): raise ValueError('unknown merit_function type') self.merit_function = merit self.phi_init = merit.eval_func(0.0) self.dphi_init = merit.eval_grad(0.0) alpha_old = 0.0 phi_old = self.phi_init dphi_old = self.dphi_init if dphi_old > 0.0: raise ValueError('search direction is not a descent direction') quad_coeff = 0.0 self.deriv_hi = False self.out_file.write('\n') # START OF BIG FOR-LOOP for i in xrange(self.max_iter): self.out_file.write(' Strong-Wolfe Line Search : iter %i\n'%(i+1)) self.out_file.write(' ----------------------------------\n') # get new step if i == 0: # set alpha to alpha_init if we're at the first step alpha_new = self.alpha_init else: # if it's not the first step, check quad_coeff if quad_coeff > 0: # if quad_coeff is positive, then step alpha forward alpha_new = alpha_old - 0.5*dphi_old/quad_coeff if (alpha_new < alpha_old) or (alpha_new > self.alpha_max): alpha_new = min(2*alpha_old, self.alpha_max) else: # if quad_coeff is negative, take some minimum between alpha_new = max(2*alpha_old, self.alpha_max) # get new function value phi_new = merit.eval_func(alpha_new) # if new step violates sufficient decrease, call zoom phi_sufficient = self.phi_init + \ self.decr_cond*alpha_new*self.dphi_init self.out_file.write(' phi old = %e\n'%(phi_old)) self.out_file.write(' phi new = %e\n'%(phi_new)) self.out_file.write(' phi sufficient = %e\n'%(phi_sufficient)) if (phi_new > phi_sufficient) or ((i > 0) and (phi_new >= phi_old)): dphi_new = 0.0 self.deriv_hi = False out = self._zoom( alpha_old, alpha_new, phi_old, phi_new, dphi_old, dphi_new) return out[0], i+out[1] # otherwise, get new gradient dphi_new = merit.eval_grad(alpha_new) # check curvature condition if abs(dphi_new) <= -self.curv_cond*self.dphi_init: # if curvature condition is satisfied, return alpha_old if self.curv_cond > 1.e-6: self.out_file.write('\nStep found!\n') return alpha_new, i # a very small curvature is supicious, check for local minimum # perturb in one direction first perturb = merit.eval_func(alpha_new - self.alpha_max*1.e-6) # if the perturbation yielded a smaller function, update if perturb < phi_new: phi_new = perturb phi_new = merit.eval_func( alpha_new - self.alpha_max*1.e-6) else: # otherwise perturb in the other direction perturb = merit.eval_func( alpha_new + self.alpha_max*1.e-6) # if the perturbation yielded a smaller function, update if perturb < phi_new: phi_new = perturb phi_new = merit.eval_func( alpha_new + self.alpha_max*1.e-6) else: # if neither perturbation worked, we have a true minimum self.out_file.write('\nStep found!\n') return alpha_new, i # check if new gradient is positive if dphi_new >= 0: # if we get this far, the curvature condition is not satisfied self.deriv_hi = True out = self._zoom( alpha_old, alpha_new, phi_old, phi_new, dphi_old, dphi_new) return out[0], i+out[1] # update old variables quad_coeff = alpha_new - alpha_old quad_coeff = ((phi_new - phi_old) - dphi_new*quad_coeff)/(quad_coeff**2) alpha_old = alpha_new phi_old = phi_new dphi_old = dphi_new self.out_file.write('\n') # END OF BIG FOR-LOOP # if we got here then we didn't find a step raise Exception('StrongWolfe.find_step_lenght(): ' + 'failed to find a step length')