/************************************************************************* Copyright Rice University, 2011. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, provided that the above copyright notice(s) and this permission notice appear in all copies of the Software and that both the above copyright notice(s) and this permission notice appear in supporting documentation. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. Except as contained in this notice, the name of a copyright holder shall not be used in advertising or otherwise to promote the sale, use or other dealings in this Software without prior written authorization of the copyright holder. **************************************************************************/ #ifndef __RVLALG_LBFGSBT_H #define __RVLALG_LBFGSBT_H #include "table.hh" #include "lbfgsalg.hh" #include "lnsrchBT.hh" #include "uminstep.hh" namespace RVLUmin { using namespace RVLAlg; using RVL::Vector; using RVL::Functional; using RVL::FunctionalEvaluation; using RVL::Table; using RVL::RVLException; /** Limited memory Broyden-Fletcher-Goldfarb-Shanno (LBFGS) quasi-Newton optimization with geometric backtracking line search globalization For details of the LBFGS algorithm, see the paper "Updating Quasi-Newton Matrices with Limited Storage" by Jorge Nocedal, Math. of Computation, Vol. 35, no. 151, p.p. 773--782. Approximates solution to unconstrained optimization problem \f[ \min_{x} f(x) \f] via generalized secant update \f[ x_+ = x_c - \alpha B \nabla f(x_c). \f] \f$B\f$ is a limited-memory secant approximation to the inverse Hessian of \f$f\f$, parametrized by maximum rank. \f$\alpha\f$ is a step length. Note that this problem setting makes sense only for functions \f$f\f$ taking real values. Accordingly, the implementation includes a compile-time check that the Scalar template parameter designates a real field (i.e. double or float). This implementation globalizes convergence to a local min via a geometric backtracking linesearch method, impemented in RVLUmin::BacktrackingLineSearchAlg. Line search methods are described in Nocedal and Wright, Numerical Optimization, Springer 1999. For additional documentation, see RVLUmin::BacktrackingLineSearchAlg docs. Structure and function: Data members of three types combine to implement LBFGS: Usage: initialize solution vector (constructor argument x) externally; construct LBFGS object; call LBFGSBT::run(). On return, x has been updated to estimated solution. Parameters for LBFGS iteration: NOTES: Parameters for backtracking line search (for detailed description and notes, see docs for RVLUmin::BacktrackingLineSearchAlg): Typical use case: see functional test source. */ template class LBFGSBT: public Algorithm { private: // parameters for LBFGSDir Scalar ihs; // inverse Hessian scale int mud; // max updates // parameters for BT line search int maxsamp; // max function evaluations bool disp; // display flag Scalar sl1; // length of first step Scalar minsteptol; // minimum permitted step length (fraction of prev step) Scalar eta1; // First GA scale: min acceptable decrease Scalar eta2; // Second GA scale: good decrease Scalar gamma1; // First BT factor: shrink step if decrease not acceptable Scalar gamma2; // Second BT factor: expand step if decrease good Scalar maxfrac; // fraction of max step to attempt // parameters for loop alg int maxits; // max number of LBFGS steps Scalar agradtol; // absolute gradient stopping threshhold Scalar rgradtol; // relative gradient stopping threshhold (to initial) // also uses disp ostream & str; FunctionalEvaluation fx; LBFGSDir dir; BacktrackingLineSearchAlg ls; GradientThresholdIterationTable ctbl; LBFGSBT(); LBFGSBT(const LBFGSBT &); public: /** constructor, first form: parameters: @param f - function to be minimized (RVL::Functional) @param x - solution RVL::Vector - initial guess on call, estimated solution on return @param _ihs - inverse Hessian scale - overall scale factor, so initial Hessian is this Scalar multiple of identity operator @param _maxits - max number of LBFGS iterations @param _mud - max stored BFGS updates - stored inverse Hessian approximation has this rank (at most) @param _maxsamp - max number of steps permitted in each line search @param _disp - verbosity flag - false = no output, true = function value, gradient norm at each iteration, report of line search @param _sl1 - first line search step @param _eta1 - lower G-A parameter @param _eta2 - upper G-A parameter @param _gamma1 - line search backtrack factor @param _gamma2 - line search extrapolation factor ("internal doubling") @param _maxfrac - fraction of max step to boundary permitted @param _agradtol - stopping tolerance for gradient norm, absolute @param _rgradtol - stopping tolerance for gradient norm, relative to initial gradient norm @param _str - verbose output unit */ LBFGSBT(Functional const & f, Vector & x, Scalar _ihs, int _mud, int _maxsamp, bool _disp, Scalar _sl1, Scalar _eta1, Scalar _eta2, Scalar _gamma1, Scalar _gamma2, Scalar _maxfrac, Scalar _minsteptol, int _maxits, Scalar _agradtol, Scalar _rgradtol, ostream & _str = cout) : ihs(_ihs), mud(_mud), maxsamp(_maxsamp), disp(_disp), sl1(_sl1), minsteptol(_minsteptol), eta1(_eta1), eta2(_eta2), gamma1(_gamma1), gamma2(_gamma2), maxfrac(_maxfrac), maxits(_maxits), agradtol(_agradtol), rgradtol(_rgradtol), str(_str), fx(f,x), dir(fx.getDomain(),ihs,mud,str), ls(maxsamp,disp,sl1,minsteptol,eta1,eta2,gamma1,gamma2,maxfrac,str), ctbl(fx,maxits,agradtol,rgradtol,str) { testRealOnly(); } /** constructor, second form - scalar arguments passed in Table. */ LBFGSBT(Functional const & f, Vector & x, Table const & t, ostream & _str = cout) : ihs(getValueFromTable(t,"BFGS_InvHessianScale")), mud(getValueFromTable(t,"BFGS_MaxUpdates")), maxsamp(getValueFromTable(t,"LS_MaxSample")), disp(getValueFromTable(t,"DispFlag")), sl1(getValueFromTable(t,"LS_FirstStep")), minsteptol(getValueFromTable(t,"LS_MinStepTol")), eta1(getValueFromTable(t,"MinDecrease")), eta2(getValueFromTable(t,"GoodDecrease")), gamma1(getValueFromTable(t,"StepDecrFactor")), gamma2(getValueFromTable(t,"StepIncrFactor")), maxfrac(getValueFromTable(t,"LS_FractionOfMaxStep")), maxits(getValueFromTable(t,"MaxItn")), agradtol(getValueFromTable(t,"AbsGradTol")), rgradtol(getValueFromTable(t,"RelGradTol")), str(_str), fx(f,x), dir(fx.getDomain(),ihs,mud,str), ls(maxsamp,disp,sl1,minsteptol,eta1,eta2,gamma1,gamma2,maxfrac,str), ctbl(fx,maxits,agradtol,rgradtol,str) { testRealOnly(); } int getCount() { return ctbl.getCount(); } void run() { try { // put together alg, terminator, and loopalg. // note that BFGS update rule is also a terminator UMinStepLS step(fx,dir,ls,str); OrTerminator stop(step,ctbl); LoopAlg Umin(step, stop); Umin.run(); } catch (RVLException & e) { e<<"\ncalled from LBFGSBT constructor\n"; throw e; } } ~LBFGSBT() {} /** supplied to provide access to any intermediate data that a subclass of Functional may make available. Can extract const reference to current Functional, as constructed by FunctionalEvaluation, via the getFcnl() method. A cast will be required to extract any further subclass attributes. */ FunctionalEvaluation const & getFunctionalEvaluation() const { return fx; } }; } #endif