/************************************************************************* Copyright Rice University, 2004. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, provided that the above copyright notice(s) and this permission notice appear in all copies of the Software and that both the above copyright notice(s) and this permission notice appear in supporting documentation. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. Except as contained in this notice, the name of a copyright holder shall not be used in advertising or otherwise to promote the sale, use or other dealings in this Software without prior written authorization of the copyright holder. **************************************************************************/ #ifndef __RVLALG_UMIN_LBFGSALG_H #define __RVLALG_UMIN_LBFGSALG_H #include "uminstep.hh" #include "linop.hh" namespace RVLUmin{ using namespace RVLAlg; using RVL::Vector; using RVL::LinearOp; using RVL::Functional; using RVL::FunctionalEvaluation; using RVL::Table; /** LMBFGSOp implements the limited memory BFGS approximation to the inverse Hessian of a twice-differentiable function. This approximation uses local changes to the gradient to gradually build up an estimate of the Hessian for use in nonlinear optimization problems. For details of the algorithm, see the paper "Updating Quasi-Newton Matrices with Limited Storage" by Jorge Nocedal, Math. of Computation, Vol. 35, no. 151, p.p. 773--782. Note that the operator is an approximation to the inverse Hessian, so the the apply method computes the quasi-Newton step. The BFGS approximation is based on successive rank-one perturbations to an initial approximation; these rank-one perturbations can be represented as outer products. This class allows the user to provide a symmetric positive definite operator to define an alternate inner product, which then changes the definition of the outer product. In the context of an optimization problem, this is equivalent to implementing the algorithm in the alternate inner product. */ template class LBFGSOp : public LinearOp { private: const Space & sp; const CartesianPowerSpace psp; ScaleOpFwd H0; int CurNum, // indicates the last vector allocated MaxNum, // Maxnum == psp.getSize() == # of vectors CurAllocated; // if CurAllocated < MaxNum, then empty slots Vector Yvec; Components Y; Vector Svec; Components S; std::vector rho; // default and copy constructors---disabled. LBFGSOp(); protected: LinearOp * clone() const { return new LBFGSOp(*this); } // apply computes the image of the operator on x, giving y. void apply(const Vector & x, Vector & y) const { try { Scalar xscale(H0.getScale()); // handle the special case of no updates if (CurAllocated==0) { y.scale(xscale,x); return; } // general case---the initial approximation has been updated std::vector alpha(CurAllocated); int i; y.copy(x); for (i=CurNum-1;i>=0;--i) { alpha[i] = rho[i]*(S[i].inner(y)); y.linComb(-alpha[i],Y[i]); } if( CurAllocated > CurNum ) for (i=CurAllocated-1;i>=CurNum;--i) { alpha[i] = rho[i]*(S[i].inner(y)); y.linComb(-alpha[i],Y[i]); } y.scale(xscale); if( CurAllocated > CurNum ) for (i=CurNum;i & y, Vector & x) const { try { // operator is by definition symmetric this->applyOp(y,x); } catch (RVLException & e) { e<<"\ncalled from LBFGSOp::applyAdj\n"; throw e; } } public: /** Usual constructor. Needs a multiple of the identity to use for the initial inverse Hessian approximation, the maximum number of updates, and, optionally, an operator to change the inner product. */ LBFGSOp(const Space & _sp, Scalar ihs, int maxnum) : sp(_sp), psp(maxnum, sp), H0(sp,ihs),CurNum(0),MaxNum(maxnum), CurAllocated(0),Yvec(psp), Y(Yvec),Svec(psp),S(Svec),rho(MaxNum) { } /** Copy constructor */ LBFGSOp(const LBFGSOp & op) : sp(op.sp), psp(op.MaxNum, sp), H0(op.H0),CurNum(op.CurNum),MaxNum(op.MaxNum), CurAllocated(op.CurAllocated),Yvec(psp), Y(Yvec),Svec(psp),S(Svec),rho(op.rho) { Yvec.copy(op.Yvec); Svec.copy(op.Svec); } // Destructor. ~LBFGSOp() {} const Space & getDomain() const { return sp; } const Space & getRange() const { return sp; } // Access to the scale, which is dynamically changed by the operator. Scalar getScale() { try { return H0.getScale(); } catch (RVLException & e) { e<<"\ncalled from LBFGSOp::getScale\n"; throw e; } } /** update requires current and next $x$ and current and next gradient. */ void update(const Vector & x, const Vector & xnext, const Vector & g, const Vector & gnext) { try { if (MaxNum==0) return; if( CurAllocated < MaxNum ) { CurAllocated++; } S[CurNum].copy(xnext); S[CurNum].linComb(-1.0,x); Y[CurNum].copy(gnext); Y[CurNum].linComb(-1.0,g); if (ProtectedDivision (1.0,S[CurNum].inner(Y[CurNum]),rho[CurNum])) { RVLException e; e<<"LBFGSOp::update\n"; e<<"zerodivide in first protected div\n"; e<<"either secant vector vanishes, or change in gradient vector vanishes, or \n"; e<<"secant is perpindicular to change in gradient\n"; e<<"----- row/col index = "< (1.0,rho[CurNum]*(Y[CurNum].normsq()),tmp)) { RVLException e; e<<"LBFGSOp::update\n"; e<<"zerodivide in second protected div\n"; e<<"change in gradient vanishes\n"; // throw e; e<<"revert to initial inv Hessian approximation\n"; reset(); } // (ProtectedDivision // (1.0,rho[CurNum]*(Y[CurNum].normsq()),tmp)); H0.setScale(tmp); CurNum++; if(CurNum == MaxNum) CurNum = 0; } catch (RVLException & e) { e<<"\ncalled from LBFGSOp::update\n"; throw e; } } // reset sets the operator to the initial inverse Hessian approximation. void reset() { CurNum = 0; CurAllocated = 0; } /// write methods to print out useful information about the object. ostream & write(ostream & str) const { str<<"LBFGS operator: current rank = "< class LBFGSDir : public UMinDir { private: LBFGSDir(); LBFGSOp H; bool ans; ostream & str; public: // Fill in the vector with the search direction // nothing should go wrong here, if H has been updated. void calcDir(Vector & dir, FunctionalEvaluation & fx) { try { // why isn't H defined to be -H? const Vector & grad = fx.getGradient(); H.applyOp(grad,dir); dir.negate(); } catch(RVLException & e) { e << "called from LBFGSStep::calcDir()\n"; throw e; } } void updateDir(LineSearchAlg const & ls) { try { H.update(ls.getBasePoint(), ls.getTrialPoint(), ls.getBaseGradient(), ls.getFunctionalEvaluation().getGradient()); } catch (RVLException & e) { e<<"\ncalled from LBFGSDir::updateDir\n"; throw e; } } void resetDir() { H.reset(); } LBFGSDir(Space const & dom, Scalar InvHessianScale = 1.0, int MaxUpdates = 5, ostream & _str=cout) : UMinDir(), H(dom, InvHessianScale, MaxUpdates), ans(false), str(_str) {} LBFGSDir(LBFGSDir const & x) : UMinDir(x), H(x.H) {} ~LBFGSDir() {} bool query() { return ans; } ostream & write(ostream & str) const { str<<"LBFGSDir\n"; // H.write(str); return str; } }; } #endif