00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #ifndef __RVLALG_UMIN_LBFGSALG_H
00034 #define __RVLALG_UMIN_LBFGSALG_H
00035
00036 #include "uminstep.hh"
00037 #include "linop.hh"
00038
00039
00040 namespace RVLUmin{
00041
00042 using namespace RVLAlg;
00043 using RVL::Vector;
00044 using RVL::LinearOp;
00045 using RVL::Functional;
00046 using RVL::FunctionalEvaluation;
00047 using RVL::Table;
00048
00071 template<class Scalar>
00072 class LBFGSOp : public LinearOp<Scalar> {
00073
00074 private:
00075
00076 const Space<Scalar> & sp;
00077 const CartesianPowerSpace<Scalar> psp;
00078 ScaleOpFwd<Scalar> H0;
00079 int CurNum,
00080 MaxNum,
00081 CurAllocated;
00082 Vector<Scalar> Yvec;
00083 Components<Scalar> Y;
00084 Vector<Scalar> Svec;
00085 Components<Scalar> S;
00086 std::vector<Scalar> rho;
00087
00088
00089 LBFGSOp();
00090
00091 protected:
00092
00093 LinearOp<Scalar> * clone() const {
00094 return new LBFGSOp<Scalar>(*this);
00095 }
00096
00097
00098 void apply(const Vector<Scalar> & x,
00099 Vector<Scalar> & y) const {
00100 try {
00101
00102 Scalar xscale(H0.getScale());
00103
00104
00105 if (CurAllocated==0) {
00106 y.scale(xscale,x);
00107 return;
00108 }
00109
00110
00111 std::vector<Scalar> alpha(CurAllocated);
00112 int i;
00113 y.copy(x);
00114 for (i=CurNum-1;i>=0;--i) {
00115 alpha[i] = rho[i]*(S[i].inner(y));
00116 y.linComb(-alpha[i],Y[i]);
00117 }
00118 if( CurAllocated > CurNum )
00119 for (i=CurAllocated-1;i>=CurNum;--i) {
00120 alpha[i] = rho[i]*(S[i].inner(y));
00121 y.linComb(-alpha[i],Y[i]);
00122 }
00123
00124 y.scale(xscale);
00125
00126 if( CurAllocated > CurNum )
00127 for (i=CurNum;i<CurAllocated;++i) {
00128 Scalar beta = rho[i]*(Y[i].inner(y));
00129 y.linComb(alpha[i]-beta,S[i]);
00130 }
00131
00132 for (i=0;i<CurNum;++i) {
00133 Scalar beta = rho[i]*(Y[i].inner(y));
00134 y.linComb(alpha[i]-beta,S[i]);
00135 }
00136 }
00137 catch (RVLException & e) {
00138 e<<"\ncalled from LBFGSOp::apply\n";
00139 throw e;
00140 }
00141 }
00142
00143
00144 void applyAdj(const Vector<Scalar> & y,
00145 Vector<Scalar> & x) const {
00146 try {
00147
00148 this->applyOp(y,x);
00149 }
00150 catch (RVLException & e) {
00151 e<<"\ncalled from LBFGSOp::applyAdj\n";
00152 throw e;
00153 }
00154 }
00155
00156 public:
00157
00161 LBFGSOp(const Space<Scalar> & _sp,
00162 Scalar ihs,
00163 int maxnum)
00164 : sp(_sp), psp(maxnum, sp), H0(sp,ihs),CurNum(0),MaxNum(maxnum),
00165 CurAllocated(0),Yvec(psp), Y(Yvec),Svec(psp),S(Svec),rho(MaxNum) {
00166 }
00167
00169 LBFGSOp(const LBFGSOp<Scalar> & op)
00170 : sp(op.sp), psp(op.MaxNum, sp), H0(op.H0),CurNum(op.CurNum),MaxNum(op.MaxNum),
00171 CurAllocated(op.CurAllocated),Yvec(psp), Y(Yvec),Svec(psp),S(Svec),rho(op.rho)
00172 {
00173 Yvec.copy(op.Yvec);
00174 Svec.copy(op.Svec);
00175 }
00176
00177
00178 ~LBFGSOp() {}
00179
00180 const Space<Scalar> & getDomain() const { return sp; }
00181 const Space<Scalar> & getRange() const { return sp; }
00182
00183
00184 Scalar getScale() {
00185 try {
00186 return H0.getScale();
00187 }
00188 catch (RVLException & e) {
00189 e<<"\ncalled from LBFGSOp::getScale\n";
00190 throw e;
00191 }
00192 }
00193
00195 void update(const Vector<Scalar> & x,
00196 const Vector<Scalar> & xnext,
00197 const Vector<Scalar> & g,
00198 const Vector<Scalar> & gnext) {
00199 try {
00200 if (MaxNum==0) return;
00201
00202 if( CurAllocated < MaxNum ) {
00203 CurAllocated++;
00204 }
00205
00206 S[CurNum].copy(xnext);
00207 S[CurNum].linComb(-1.0,x);
00208 Y[CurNum].copy(gnext);
00209 Y[CurNum].linComb(-1.0,g);
00210
00211 if (ProtectedDivision<Scalar>
00212 (1.0,S[CurNum].inner(Y[CurNum]),rho[CurNum])) {
00213 RVLException e;
00214 e<<"LBFGSOp::update\n";
00215 e<<"zerodivide in first protected div\n";
00216 e<<"either secant vector vanishes, or change in gradient vector vanishes, or \n";
00217 e<<"secant is perpindicular to change in gradient\n";
00218 e<<"----- row/col index = "<<CurNum<<"\n";
00219 e<<"----- secant vector:\n";
00220 S[CurNum].write(e);
00221 e<<"----- delta grad: \n";
00222 Y[CurNum].write(e);
00223
00224 e<<"revert to initial inv Hessian approximation\n";
00225 reset();
00226 }
00227
00228 Scalar tmp=0;
00229 if (ProtectedDivision<Scalar>
00230 (1.0,rho[CurNum]*(Y[CurNum].normsq()),tmp)) {
00231 RVLException e;
00232 e<<"LBFGSOp::update\n";
00233 e<<"zerodivide in second protected div\n";
00234 e<<"change in gradient vanishes\n";
00235
00236 e<<"revert to initial inv Hessian approximation\n";
00237 reset();
00238 }
00239
00240
00241
00242
00243 H0.setScale(tmp);
00244 CurNum++;
00245 if(CurNum == MaxNum) CurNum = 0;
00246
00247 }
00248 catch (RVLException & e) {
00249 e<<"\ncalled from LBFGSOp::update\n";
00250 throw e;
00251 }
00252 }
00253
00254
00255 void reset() { CurNum = 0; CurAllocated = 0; }
00256
00258 ostream & write(ostream & str) const {
00259 str<<"LBFGS operator: current rank = "<<CurNum<<" scale = "<<H0.getScale()<<"\n";
00260 return str;
00261 }
00262 };
00263
00273 template<class Scalar>
00274 class LBFGSDir : public UMinDir<Scalar> {
00275
00276 private:
00277
00278 LBFGSDir();
00279
00280 LBFGSOp<Scalar> H;
00281 bool ans;
00282 ostream & str;
00283
00284 public:
00285
00286
00287
00288 void calcDir(Vector<Scalar> & dir,
00289 FunctionalEvaluation<Scalar> & fx) {
00290 try {
00291
00292 const Vector<Scalar> & grad = fx.getGradient();
00293 H.applyOp(grad,dir);
00294 dir.negate();
00295 } catch(RVLException & e) {
00296 e << "called from LBFGSStep::calcDir()\n";
00297 throw e;
00298 }
00299 }
00300
00301 void updateDir(LineSearchAlg<Scalar> const & ls) {
00302 try {
00303 H.update(ls.getBasePoint(),
00304 ls.getTrialPoint(),
00305 ls.getBaseGradient(),
00306 ls.getFunctionalEvaluation().getGradient());
00307 }
00308 catch (RVLException & e) {
00309 e<<"\ncalled from LBFGSDir::updateDir\n";
00310 throw e;
00311 }
00312 }
00313
00314 void resetDir() { H.reset(); }
00315
00316 LBFGSDir(Space<Scalar> const & dom,
00317 Scalar InvHessianScale = 1.0,
00318 int MaxUpdates = 5,
00319 ostream & _str=cout)
00320 : UMinDir<Scalar>(),
00321 H(dom,
00322 InvHessianScale,
00323 MaxUpdates), ans(false), str(_str) {}
00324
00325 LBFGSDir(LBFGSDir<Scalar> const & x)
00326 : UMinDir<Scalar>(x), H(x.H) {}
00327 ~LBFGSDir() {}
00328
00329 bool query() { return ans; }
00330
00331 ostream & write(ostream & str) const {
00332 str<<"LBFGSDir\n";
00333
00334 return str;
00335 }
00336 };
00337
00338 }
00339
00340 #endif