#ifndef RVL_UMIN_TRALG_ #define RVL_UMIN_TRALG_ #include "table.hh" #include "op.hh" #include "alg.hh" #include "ioterm.hh" #include "boolterm.hh" namespace RVLUmin { using namespace RVL; using namespace RVLAlg; /** Generic trust region (truncated GN) step. Trust region decision parameters are constants, so pass by value. Operator evaluation, Function value, absolute and relative gradient norms, predicted and actual reduction, and trust radius are all altered by this object, so it stores mutable references for these items. Parameters / data members: Outline of run() method: */ template class TRGNStep: public Algorithm { typedef typename ScalarFieldTraits::AbsType atype; public: /** here op = b-F(x), i.e. the residual - use ResidualOp wrapper NOTE: this is the "optimistic" no-copy form - i.e. assumes that the step is likely to succeed, so the restor-xsave-compute branch is unlikely, hence not costly. A "pessimistic" form would be a mirror-image. The optimal form would involve a deep copy operation on OpEvals, which is certainly possible future mod. the quadratic model is 0.5*\|b-F(x)\|^2 + p^Tg(x) + 0.5p^TH(x)p where H(x)=DF(x)^TDF(x), and H(x)p = g(x) = DF(x)^T(b-F(x)) (approx) The predicted reduction is 0.5*\|b-F(x)\|^2 - 0.5p^Tg(x) */ void run() { //cerr<<"TRGNStep::run\n"; // run then delete solver - first check whether trust region was // transgressed solver->run(); bool active = false; { Terminator & tsolver = dynamic_cast(*solver); active = tsolver.query(); } delete solver; solver=NULL; // test for nontrivial step - failure is fatal if (p.norm() eta2*predred) && active) { int dcount = 0; Vector plast(opeval.getDomain()); while (active && dcount<5) { str<<"active trust region constraint and good step - attempt to increase TR\n"; atype deltalast=pol.Delta; atype jvallast = jval; // save last update vector plast.copy(p); // stretch it p.scale(gamma2); // stretch TR pol.Delta *= gamma2; str<<"last TR = "< dp(opeval.getRange()); opeval.getDeriv().applyOp(p,dp); // create quadratic residual dp.linComb(-ScalarFieldTraits::One(),opeval.getValue()); // estimate reduction - note jvalsave is resid at xsave predred = jvalsave - 0.5*dp.normsq(); str<<"new predred="<::One(),p); // create real residual jval = 0.5*opeval.getValue().normsq(); str<<"last jval = "< jvallast) { str<<"G-A failed, revert to previous estimate, unset active flag\n"; pol.Delta = deltalast; jval=jvallast; p.copy(plast); x.copy(xsave); x.linComb(-ScalarFieldTraits::One(),p); active=false; } else { str<<"G-A passed"; } dcount++; if (dcount==5) { str<<", max number of TR expansions reached\n"; } else { str<<", try it again\n"; } } } // rejigger everything for the new point solver = pol.build(p,opeval.getDeriv(),opeval.getValue(),rnorm,nrnorm,str); agnrm=nrnorm; rgnrm=nrnorm*gnrmrecip; //cerr<<"at end of TRGNStep::run - Delta = "< 0 @param _eta2 upper G-A parameter > eta1 @param _gamma1 trust region reduction factor < 1 @param _gamma2 trust region expansion factor > 1, gamma1*gamma2 < 1 @param _minstep min permitted step as a fraction of trust radius @param _nostep true if step norm was too small rel trust radius @param _predred predicted reduction - updated reference @param _actred actual reduction - updated reference @param _jval objective function value - updated reference @param _agnrm gradient norm - updated reference @param _rgnrm gradient norm scaled by reciprocal of original (on constr) - updated reference @param _str verbose output stream Requirements on parameters:
  • Delta >= 0
  • 0 < gamma1 < 1 < gamma2
  • gamma1 * gamma2 < 1
  • 0 < eta1 < eta2
  • opeval = operator defining nonlinear least squares problem, evaluated at current iterate. used to supply RHS (getValue) and LinearOp (getDeriv) of G-N problem on call, and to evaluate residual and normal residual at G-N solution on return.
  • pol - TRLS solver policy, as described above
*/ TRGNStep(Policy const & _pol, OperatorEvaluation & _opeval, atype _eta1, atype _eta2, atype _gamma1, atype _gamma2, atype _minstep, bool & _nostep, atype & _predred, atype & _actred, atype & _jval, atype & _agnrm, atype & _rgnrm, ostream & _str) : pol(_pol), opeval(_opeval), eta1(_eta1),eta2(_eta2),gamma1(_gamma1),gamma2(_gamma2),minstep(_minstep), nostep(_nostep),predred(_predred), actred(_actred), jval(_jval), agnrm(_agnrm), rgnrm(_rgnrm), p(_opeval.getDomain()), xsave(_opeval.getDomain()), solver(NULL), str(_str) { //cerr<<"TRGNStep:: construction\n"; // sanity test params if ( (gamma1 <= ScalarFieldTraits::Zero()) || (gamma1 > ScalarFieldTraits::One()) || (gamma2 < ScalarFieldTraits::One()) || (gamma1*gamma2 > ScalarFieldTraits::One()-100.0*numeric_limits::epsilon()) || (eta1 < ScalarFieldTraits::Zero()) || (eta1 > eta2) ) { RVLException e; e<<"Error: TRTRGNStep constructor\n"; e<<"insane TR param inputs\n"; e<<"gamma1= "<::Zero(); predred = ScalarFieldTraits::Zero(); //cerr<<"TRGNStep -> Solver construction via policy\n"; // initial solver assignment solver = pol.build(p,opeval.getDeriv(),opeval.getValue(),rnorm,nrnorm,str); //cerr<<"TRGNStep -> return from Solver construction\n"; // solver policy: least squares solution of Ax=b. // pass b=opeval.getValue, A=opeval.getDeriv references - cause reinitialization if needed // on creation, initialize x=0, r=b, rnorm=b.norm, nrnorm=A^Tb.norm jval = 0.5*rnorm*rnorm; agnrm = nrnorm; // record initial reciprocal grad norm for future ref if (ProtectedDivision(ScalarFieldTraits::One(),nrnorm,gnrmrecip)) { // reset grad norms to zero agnrm = ScalarFieldTraits::Zero(); rgnrm = ScalarFieldTraits::Zero(); gnrmrecip = ScalarFieldTraits::Zero(); str<<"NOTE: TRGNStep constructor\n"; str<<" initial value of normal residual norm = "<::One(); } // don't touch initial Delta //cerr<<"Exit TRGNStep constructor\n"; } ~TRGNStep() { if (solver) delete solver; } private: Policy const & pol; OperatorEvaluation & opeval; atype & predred; atype & actred; atype & jval; atype & agnrm; atype & rgnrm; bool & nostep; atype gnrmrecip; atype eta1; atype eta2; atype gamma1; atype gamma2; atype minstep; atype rnorm; atype nrnorm; Vector p; Vector xsave; // workspace for solver mutable Algorithm * solver; ostream & str; }; /** Trust Region iteration. Approximates local solution of \f[\min_x \|f(x)\|^2\f] by a variant of the Steihaug-Toint trust region globalization of the Gauss-Newton algorithm, with flexible specification of the least-squares substep. See for instance T. Steihaug, "The conjugate gradient method and trust regions in large scale optimization", SIAM Journal on Numerical Analysis, v. 20 (1983), pp. 626-637; A. Conn, N. Gould, and P. Toint, Trust Region Methods, SIAM, 2000; and J. Nocedal and S. Wright, Numerical Optimization, Spring, 1999, esp. Ch 4. Note that this algorithm will also approximate solutions of \f[\min_x \|g(x)-b\|^2,\f] provided that \f$f(x)=g(x)-b\f$ is passed to the constructor. The iteration updates the current solution estimate \f$x_c\f$ to a new estimate \f$x_+\f$ by solving approximately the trust-region Gauss-Newton subproblem for the increment \f$\delta x\f$: \f[ \delta x = \mbox{argmin }\|Df(x_c)\delta x + f(x_c)\|^2 \mbox{ subject to } \|\delta x \| \le \Delta \f] after which the update is computed as \f$x_+=x_c + \delta x\f$. The trust radius represents a size estimate of the region near \f$x_c\f$ in which the Gauss-Newton quadratic is a good approximation to the least-squares objective. The genius of the algorithm lies in the rules for updating \f$\Delta\f$: it is decreased by a factor < 1 when the actual decrease in the objective obtained by \f$ x_c \mapsto x_+\f$ is substantially less than the decrease predicted by the Gauss-Newton model, increased by a factor > 1 when the actual decrease is close to the predicted decrease and the trust region constraint is binding, and otherwise left alone. The step algorithm RVLUmin::TRGNStep manages \f$\Delta\f$ - see its documentation for detailed information. TRGNAlg depends on a choice of least squares solver with trust region truncation. This choice is passed by policy, that is, both by inheritance (mixin) and as a template parameter. The policy type must define a public member function with following signature:

[subtype of Algorithm and Terminator] * build(...) const;

with arguments

  • Vector &, // on return, output of TR LS solve
  • OperatorEvaluation &, // eval object - offers RHS and LinearOp of GN problem
  • AbsScalar &, // on return, residual norm
  • AbsScalar &, // on return, normal residual norm
  • AbsScalar, // trust radius
  • ostream & // verbose output unit
Policies should be default-instantiated, with no arguments, and should exhibit some sensible default behaviour (likely useless). For useful behaviour, some policies may require additional information - should be supplied in driver code by calling appropriate additional methods of particular policy classes. The least squares solver built by the policy must be supplied with the attributes of both RVLAlg::Algorithm and RVLAlg::Terminator. The Algorithm::run() method changes the states of the arguments as described above. The Terminator::query() method returns true if trust region truncation was applied, else false (i.e. least squares solution approximation was interior to the trust region). For example, the RVLUmin::CGNEPolicy class implements a policy as just described. Use it to specialize TRGNAlg to create a Gauss-Newton-Krylov or Steihaug-Toint algorithm, as described in the references mentioned earlier. The conjugate gradient iteration parameters must be supplied after instantiation for nontrivial execution, so the RVLUmin::CGNEPolicy includes a suitable method (assign(...)) whichh must be called on the TRGNAlg object after construction. See the TRGN functional test source for an explicit example of this use mode. The step algorithm RVLUmin::TRGNStep manages both the solution of the least squares problem and the trust radius (data member Delta) - see documentation on RVLUmin::TRGNStep for details. Parameters supplied to constructor. Absolute value type (eg. double for complex denoted as abstype. See constructor docs for complete list:
  • x - RVL::Vector object, mutable reference. On construction, initial estimate of solution. On return, final estimate of solution returned by algorithm
  • op - RVL::Operator object, const reference - operator defining the functional equation to be solved in the least-squares sense. As noted above, the functional equation takes the form \f$f(x)=0\f$, so any nontrivial "right-hand side" must be folded into the definition of \f$f\f$.
  • _maxcount - int, maximum permitted TR-GN steps. Typical value = 10
  • _jtol - abstype, stopping threshold for objective (i.e. one-half mean square of \f$f\f$). In some cases, it may be reasonable to stop when the objective gets small enough. To disable, set = zero. Typical value = 0.0
  • _agtol - abstype, absolute stopping threshold for gradient norm, usable when scale information about the gradient is available, otherwise can be set to zero. Typical value = 0.0
  • _rgtol - abstype, relative stopping threshold for gradient norm. Does not require scale of gradient or objective to be known, but may result in failure to converge if initial solution estimate is already accurate. Typical value = 0.01
  • _initDelta - abstype, initial trust radius, requires the same kind of usually unavailable knowledge about the solution scale to set intelligently, as does the initial step in a line search method. Choose some convenient number and let the trust region algorithm take care of adjustments. Typical value = 1.0.
  • _str - ostream, verbose output
  • other parameters proper to trust region algorithm implemented in RVLUmin::TRGNStep - see its docs for details
Usage: instantiate; call post-construction initialization of least-squares solver policy parent class if necessary to assign additional attributes; call RVLUmin::TRGNAlg::run(). Typical use case: see TRGN functional test source */ template class TRGNAlg: public Algorithm, public Policy { typedef typename ScalarFieldTraits::AbsType atype; public: void run() { //cerr<<"TRGNAlg::run\n"; try { bool nostep = false; //cerr<<"TRGNAlg->TRGNStep constructor\n"; TRGNStep step(*this,opeval,eta1,eta2,gamma1,gamma2,minstep, nostep,predred,actred,jval,agnrm,rgnrm,str); BoolTerminator bstop(nostep); VectorCountingThresholdIterationTable vstop(maxcount,names,nums,tols,str); vstop.init(); atype jval0=jval; atype agnrm0=agnrm; atype delta0=step.getDelta(); OrTerminator stop(bstop,vstop); //cerr<<"TRGNAlg->LoopAlg\n"; LoopAlg doit(step,stop); doit.run(); actcount=vstop.getCount(); str<<"\n******************* TRGN Summary *******************\n"; if (nostep && maxcount > 0) { str<<"TRGN: no update from GN step at TR step "< 0) { str<<"iteration count = "<
  • Delta >= 0
  • 0 < gamma1 < 1 < gamma2
  • gamma1 * gamma2 < 1
  • 0 < eta1 < eta2
  • opeval = operator defining nonlinear least squares problem, evaluated at current iterate. used to supply RHS (getValue) and LinearOp (getDeriv) of G-N problem on call, and to evaluate residual and normal residual at G-N solution on return.
  • pol - TRLS solver policy, as described above
  • */ TRGNAlg(Operator const & op, Vector & x, int _maxcount, atype _jtol, atype _agtol, atype _rgtol, atype _eta1, atype _eta2, atype _gamma1, atype _gamma2, atype _minstep, ostream & _str) : Policy(), opeval(op,x), jtol(_jtol),agtol(_agtol),rgtol(_rgtol), eta1(_eta1),eta2(_eta2),gamma1(_gamma1),gamma2(_gamma2), maxcount(_maxcount),minstep(_minstep), names(6),nums(6),tols(6),actcount(0), str(_str) { this->inittable(); } /** second form of constructor - passes most argument via a Table - see docs for first form of constructor for items which must be included in the Table and constraints upon them. */ TRGNAlg(Operator const & op, Vector & x, Table & t, ostream & _str) : opeval(op,x), jtol(getValueFromTable(t,"ResidualTol")), agtol(getValueFromTable(t,"AbsGradTol")), rgtol(getValueFromTable(t,"RelGradTol")), eta1(getValueFromTable(t,"MinDecrease")), eta2(getValueFromTable(t,"GoodDecrease")), gamma1(getValueFromTable(t,"StepDecrFactor")), gamma2(getValueFromTable(t,"StepIncrFactor")), maxcount(getValueFromTable(t,"MaxItn")), minstep(getValueFromTable(t,"MinStepTol")), names(6),nums(6),tols(6),actcount(0), str(_str) { this->inittable(); } private: OperatorEvaluation opeval; mutable atype predred; mutable atype actred; mutable atype jval; atype jtol; mutable atype agnrm; mutable atype rgnrm; atype agtol; atype rgtol; atype eta1; atype eta2; atype gamma1; atype gamma2; int maxcount; atype minstep; vector names; vector nums; vector tols; mutable int actcount; ostream & str; void inittable() { names[0]="LS Objective"; nums[0]=&jval; tols[0]=jtol; names[1]="Gradient Norm"; nums[1]=&agnrm; tols[1]=agtol; names[2]="Rel Grad Norm"; nums[2]=&rgnrm; tols[2]=rgtol; names[3]="Actual Redn"; nums[3]=&actred; tols[3]=-numeric_limits::max(); names[4]="Predicted Redn"; nums[4]=&predred; tols[4]=ScalarFieldTraits::Zero(); names[5]="Trust Radius"; nums[5]=&(this->Delta); tols[5]=ScalarFieldTraits::Zero(); } }; } #endif