RvlUmin: lbfgsalg.hh Source File

00001 /*************************************************************************
00002 
00003 Copyright Rice University, 2004.
00004 All rights reserved.
00005 
00006 Permission is hereby granted, free of charge, to any person obtaining a
00007 copy of this software and associated documentation files (the "Software"),
00008 to deal in the Software without restriction, including without limitation
00009 the rights to use, copy, modify, merge, publish, distribute, and/or sell
00010 copies of the Software, and to permit persons to whom the Software is
00011 furnished to do so, provided that the above copyright notice(s) and this
00012 permission notice appear in all copies of the Software and that both the
00013 above copyright notice(s) and this permission notice appear in supporting
00014 documentation.
00015 
00016 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00017 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00018 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY
00019 RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
00020 NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
00021 DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
00022 PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
00023 ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
00024 THIS SOFTWARE.
00025 
00026 Except as contained in this notice, the name of a copyright holder shall
00027 not be used in advertising or otherwise to promote the sale, use or other
00028 dealings in this Software without prior written authorization of the
00029 copyright holder.
00030 
00031 **************************************************************************/
00032 
00033 #ifndef __RVLALG_UMIN_LBFGSALG_H
00034 #define __RVLALG_UMIN_LBFGSALG_H
00035 
00036 #include "uminstep.hh"
00037 #include "linop.hh"
00038 
00039 
00040 namespace RVLUmin{
00041 
00042   using namespace RVLAlg;
00043   using RVL::Vector;
00044   using RVL::LinearOp;
00045   using RVL::Functional;
00046   using RVL::FunctionalEvaluation;
00047   using RVL::Table;
00048   
00071   template<class Scalar>
00072   class LBFGSOp : public LinearOp<Scalar> {  
00073   
00074   private:
00075 
00076     const Space<Scalar> & sp;
00077     const CartesianPowerSpace<Scalar> psp;
00078     ScaleOpFwd<Scalar> H0;
00079     int CurNum,  // indicates the last vector allocated
00080       MaxNum,    // Maxnum == psp.getSize() == # of vectors
00081       CurAllocated; // if CurAllocated < MaxNum, then empty slots
00082     Vector<Scalar> Yvec;
00083     Components<Scalar> Y;
00084     Vector<Scalar> Svec;
00085     Components<Scalar> S;
00086     std::vector<Scalar> rho;
00087 
00088     // default and copy constructors---disabled.
00089     LBFGSOp();
00090 
00091   protected:
00092   
00093     LinearOp<Scalar> * clone() const {
00094       return new LBFGSOp<Scalar>(*this);
00095     }
00096 
00097     // apply computes the image of the operator on x, giving y.
00098     void apply(const Vector<Scalar> & x,
00099            Vector<Scalar> & y) const {
00100       try {
00101       
00102     Scalar xscale(H0.getScale());
00103 
00104     // handle the special case of no updates
00105     if (CurAllocated==0) {
00106       y.scale(xscale,x);
00107       return;
00108     }
00109 
00110     // general case---the initial approximation has been updated
00111     std::vector<Scalar> alpha(CurAllocated);
00112     int i;
00113     y.copy(x);
00114     for (i=CurNum-1;i>=0;--i) {
00115       alpha[i] = rho[i]*(S[i].inner(y));
00116       y.linComb(-alpha[i],Y[i]);
00117     }
00118     if( CurAllocated > CurNum )
00119       for (i=CurAllocated-1;i>=CurNum;--i) {
00120         alpha[i] = rho[i]*(S[i].inner(y));
00121         y.linComb(-alpha[i],Y[i]);
00122       }
00123 
00124     y.scale(xscale);
00125 
00126     if( CurAllocated > CurNum )
00127       for (i=CurNum;i<CurAllocated;++i) {
00128         Scalar beta = rho[i]*(Y[i].inner(y));
00129         y.linComb(alpha[i]-beta,S[i]);
00130       }
00131 
00132     for (i=0;i<CurNum;++i) {
00133       Scalar beta = rho[i]*(Y[i].inner(y));
00134       y.linComb(alpha[i]-beta,S[i]);
00135     }
00136       }
00137       catch (RVLException & e) {
00138     e<<"\ncalled from LBFGSOp::apply\n";
00139     throw e;
00140       }
00141     }
00142 
00143     //  applyAdj computes the image of the adjoint on y, giving x.
00144     void applyAdj(const Vector<Scalar> & y,
00145             Vector<Scalar> & x) const {
00146       try {
00147     // operator is by definition symmetric
00148     this->applyOp(y,x);
00149       }
00150       catch (RVLException & e) {
00151     e<<"\ncalled from LBFGSOp::applyAdj\n";
00152     throw e;
00153       }
00154     }
00155 
00156   public:
00157 
00161     LBFGSOp(const Space<Scalar> & _sp,
00162         Scalar ihs,
00163         int maxnum)
00164       : sp(_sp), psp(maxnum, sp), H0(sp,ihs),CurNum(0),MaxNum(maxnum),
00165     CurAllocated(0),Yvec(psp), Y(Yvec),Svec(psp),S(Svec),rho(MaxNum) {
00166     }
00167 
00169     LBFGSOp(const LBFGSOp<Scalar> & op) 
00170       : sp(op.sp), psp(op.MaxNum, sp), H0(op.H0),CurNum(op.CurNum),MaxNum(op.MaxNum),
00171     CurAllocated(op.CurAllocated),Yvec(psp), Y(Yvec),Svec(psp),S(Svec),rho(op.rho) 
00172     {
00173       Yvec.copy(op.Yvec);
00174       Svec.copy(op.Svec);
00175     }
00176   
00177     // Destructor.
00178     ~LBFGSOp() {}
00179 
00180     const Space<Scalar> & getDomain() const { return sp; }
00181     const Space<Scalar> & getRange() const { return sp; }
00182 
00183     // Access to the scale, which is dynamically changed by the operator.
00184     Scalar getScale() {
00185       try {
00186     return H0.getScale();
00187       } 
00188       catch (RVLException & e) {
00189     e<<"\ncalled from LBFGSOp::getScale\n";
00190     throw e;
00191       }
00192     }
00193   
00195     void update(const Vector<Scalar> & x,
00196         const Vector<Scalar> & xnext,
00197         const Vector<Scalar> & g,
00198         const Vector<Scalar> & gnext) {
00199       try {
00200     if (MaxNum==0) return;
00201       
00202     if( CurAllocated < MaxNum ) {
00203       CurAllocated++;
00204     }
00205 
00206     S[CurNum].copy(xnext);
00207     S[CurNum].linComb(-1.0,x);
00208     Y[CurNum].copy(gnext);
00209     Y[CurNum].linComb(-1.0,g);
00210     
00211     if (ProtectedDivision<Scalar>
00212         (1.0,S[CurNum].inner(Y[CurNum]),rho[CurNum])) {
00213       RVLException e;
00214       e<<"LBFGSOp::update\n";
00215       e<<"zerodivide in first protected div\n";
00216       e<<"either secant vector vanishes, or change in gradient vector vanishes, or \n";
00217       e<<"secant is perpindicular to change in gradient\n";
00218       e<<"----- row/col index = "<<CurNum<<"\n";
00219       e<<"----- secant vector:\n";
00220       S[CurNum].write(e);
00221       e<<"----- delta grad: \n";
00222       Y[CurNum].write(e);
00223       //      throw e;
00224       e<<"revert to initial inv Hessian approximation\n";
00225       reset();
00226     }
00227 
00228     Scalar tmp=0;
00229     if (ProtectedDivision<Scalar>
00230         (1.0,rho[CurNum]*(Y[CurNum].normsq()),tmp)) {
00231       RVLException e;
00232       e<<"LBFGSOp::update\n";
00233       e<<"zerodivide in second protected div\n";
00234       e<<"change in gradient vanishes\n";
00235       //      throw e;
00236       e<<"revert to initial inv Hessian approximation\n";
00237       reset();
00238     }
00239 
00240     //  (ProtectedDivision<Scalar>
00241     //   (1.0,rho[CurNum]*(Y[CurNum].normsq()),tmp));
00242 
00243     H0.setScale(tmp);
00244     CurNum++;
00245     if(CurNum == MaxNum) CurNum = 0;
00246 
00247       }
00248       catch (RVLException & e) {
00249     e<<"\ncalled from LBFGSOp::update\n";
00250     throw e;
00251       }
00252     }
00253 
00254     // reset sets the operator to the initial inverse Hessian approximation.
00255     void reset() { CurNum = 0; CurAllocated = 0; }
00256 
00258     ostream & write(ostream & str) const {
00259       str<<"LBFGS operator: current rank = "<<CurNum<<" scale = "<<H0.getScale()<<"\n";
00260       return str;
00261     }
00262   };
00263 
00273   template<class Scalar>
00274   class LBFGSDir : public UMinDir<Scalar> {
00275 
00276   private:
00277 
00278     LBFGSDir();
00279 
00280     LBFGSOp<Scalar> H;
00281     bool ans;
00282     ostream & str;
00283 
00284   public:
00285 
00286     // Fill in the vector with the search direction
00287     // nothing should go wrong here, if H has been updated.
00288     void calcDir(Vector<Scalar> & dir,
00289          FunctionalEvaluation<Scalar> & fx) {
00290       try {
00291     // why isn't H defined to be -H?
00292     const Vector<Scalar> & grad = fx.getGradient();
00293     H.applyOp(grad,dir);
00294     dir.negate();
00295       } catch(RVLException & e) {
00296     e << "called from LBFGSStep::calcDir()\n";
00297     throw e;
00298       }
00299     }
00300 
00301     void updateDir(LineSearchAlg<Scalar> const & ls) {
00302       try {
00303     H.update(ls.getBasePoint(),
00304          ls.getTrialPoint(),
00305          ls.getBaseGradient(),
00306          ls.getFunctionalEvaluation().getGradient());
00307       }
00308       catch (RVLException & e) {
00309     e<<"\ncalled from LBFGSDir::updateDir\n";
00310     throw e;
00311       }
00312     }
00313 
00314     void resetDir() { H.reset(); }
00315     
00316     LBFGSDir(Space<Scalar> const & dom,
00317          Scalar InvHessianScale = 1.0,
00318          int MaxUpdates = 5,
00319          ostream & _str=cout)
00320       :  UMinDir<Scalar>(), 
00321      H(dom, 
00322        InvHessianScale, 
00323        MaxUpdates),  ans(false), str(_str) {}
00324   
00325     LBFGSDir(LBFGSDir<Scalar> const & x) 
00326       : UMinDir<Scalar>(x), H(x.H) {}
00327     ~LBFGSDir() {} 
00328 
00329     bool query() { return ans; }
00330 
00331     ostream & write(ostream & str) const {
00332       str<<"LBFGSDir\n";
00333       //  H.write(str);
00334       return str;
00335     }
00336   };
00337 
00338 }
00339 
00340 #endif