#ifndef _UMIN_TR_MAT #define _UMIN_TR_MAT #include "rnop.hh" #include "alg.hh" extern "C" { typedef struct { float r, i; } f2c_complex; typedef struct { double r, i; } f2c_dblcomplex; int sgelsy_(long *m, long *n, long *nrhs, float *a, long * lda, float * b, long * ldb, long * jpvt, float * rcond, long * rank, float * work, long * lwork, long * info); int dgelsy_(long *m, long *n, long *nrhs, double *a, long * lda, double * b, long * ldb, long * jpvt, double * rcond, long * rank, double * work, long * lwork, long * info); int cgelsy_(long *m, long *n, long *nrhs, f2c_complex *a, long * lda, f2c_complex * b, long * ldb, long * jpvt, float * rcond, long * rank, f2c_complex * work, long * lwork, float * rwork, long * info); int zgelsy_(long *m, long *n, long *nrhs, f2c_dblcomplex *a, long * lda, f2c_dblcomplex * b, long * ldb, long * jpvt, double * rcond, long * rank, f2c_dblcomplex * work, long * lwork, double * rwork, long * info); } /* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* The unblocked strategy requires that: */ /* LWORK >= MAX( MN+3*N+1, 2*MN+NRHS ), */ /* where MN = min( M, N ). */ /* The block algorithm requires that: */ /* LWORK >= MAX( MN+2*N+NB*(N+1), 2*MN+NB*NRHS ), */ /* where NB is an upper bound on the blocksize returned */ /* by ILAENV for the routines SGEQP3, STZRZF, STZRQF, SORMQR, */ /* and SORMRZ. */ // assumptions: (1) expect problems to be well-conditioned, so set rcond=1 // (3) set up for one rhs // (2) assume that block strategy is used and N \le M, set NB=N thus // LWORK = MAX(MN+3*N+N^2, 2*MN+N+1), WORK=LWORK namespace RVLUmin { using namespace RVL; using namespace RVLAlg; /** LAPACK-derived least squares solver. Uses the xgelsy family of algorithms. Since the various numeric types are opaque to C++, must provide four separate template specializations. This version is set up to conform to the CLAPACK calling conventions, and must be linked with CLAPACK. It is probably necessary to modify the interfaces slightly for Fortran-based vendor or compiled LAPACKs. assumptions:

expect problems to be well-conditioned, so set rcond=1
set up for one rhs
assume that block strategy is used and N <= M, set NB=N thus LWORK = MAX(MN+3*N+N^2, 2*MN+N+1), WORK=LWORK

notes:

CLAPACK integer type is long - this is same as int under linux, but NOT under OS-X, so use long as the integer type for all arguments and temporaries
Since LAPACK overwrites the storage for both the matrix (input matrix on call, QR factors on return) and the solution vector (right-hand side on call, solution on return), these must be copied. This function collection makes a temporary for each - for the solution, long enough to hold either the RHS or the solution - then copies as necessary. This waste of storage would have to occur either here or in the driver, and lets the input matrix be passed as consts
Even if you didn't wanted to keep the LAPACK calling convention (overwriting inputs), you couldn't for the complex case, because the CLAPACK/F2C complex types are incompatible with std::complex.

*/ template void LAPACK_LS(T * xdata, T const * Adata, T const * bdata, A * rcond, long * rank, long m, long n, long lwork, long * info); template<> void LAPACK_LS(float * xdata, float const * Adata, float const * bdata, float * rcond, long * rank, long m, long n, long lwork, long * info) { float * work = new float[lwork]; for (int i=0;i void LAPACK_LS(double * xdata, double const * Adata, double const * bdata, double * rcond, long * rank, long m, long n, long lwork, long * info) { double * work = new double[lwork]; for (int i=0;i void LAPACK_LS,float>(std::complex * xdata, std::complex const * Adata, std::complex const * bdata, float * rcond, long * rank, long m, long n, long lwork, long * info) { f2c_complex * work = new f2c_complex[lwork]; for (int i=0;i(tmp[j].r,tmp[j].i); } } delete [] jpvt; delete [] work; delete [] tmpa; delete [] tmp; delete [] rwork; } template<> void LAPACK_LS,double>(std::complex * xdata, std::complex const * Adata, std::complex const * bdata, double * rcond, long * rank, long m, long n, long lwork, long * info) { f2c_dblcomplex * work = new f2c_dblcomplex[lwork]; for (int i=0;i(tmp[i].r,tmp[i].i); } } delete [] jpvt; delete [] work; delete [] tmpa; delete [] tmp; delete [] rwork; } /** LAPACK-based least squares solver as a local FO, with the matrix stored as instance data for a GenMat */ template class LSMatFO: public BinaryLocalFunctionObject { typedef typename ScalarFieldTraits::AbsType AT; public: void operator()(LocalDataContainer & x, LocalDataContainer const & b) { try { long m = A.getNRows(); long n = A.getNCols(); AT rcond=0.00001; long rank=0; long lwork = max((m+n+3)*n,(3*m+1)*n+1); long info=0; LAPACK_LS(x.getData(), A.getData(), b.getData(), &rcond,&rank,m,n,lwork,&info); if (info<0) { RVLException e; e<<"Error: LAPACK_LS, float case\n"; e<<"SGELSY returned with nozero error flag = "<::run\n"; throw e; } } /** main constructor builds on a GenMat */ LSMatFO(GenMat const & _A): A(_A) {} string getName() const { string tmp = "LSMatFO"; return tmp; } private: GenMat const & A; }; /** LAPACK-based least squares solver with trust region truncation. Stores mutable references to

solution vector - zeroed on call, contains solution estimate on return
scalars for residual norm and normal residual norm - on construction, these are set to the norm of the right-hand side and of the right-hand side of the normal equations, respectively, so the constructor can be used in a driver to initialize these quantities. On return, residual resp. normal residual norms at estimated solution.

Applies trust region truncation to solution estimate - thus the returned solution may NOT be the least squares solution, but rather the closest point in the trust region to it! Note that trust region solvers are also set up to act as Terminators, with the query function returning true if the trust radius is exceeded. Dependent Gauss-Newton iterations stop updating their solution estimates, and update trust radius instead, when this happens. */ template class LSMatAlg: public Algorithm, public Terminator { typedef typename ScalarFieldTraits::AbsType atype; public: bool query() { return proj; } void run() { try { // initialize x x.zero(); // solve least squares problem LSMatFO mfo(A); x.eval(mfo,b); // scale to trust region boundary atype xnorm=x.norm(); proj=false; if (xnorm>Delta) { proj=true; T scfac = Delta/xnorm; x.scale(scfac); str<<"RVLUmin::LSMatAlg::run: trust region truncation applied\n"; str<<" untruncated solution norm = "<::One(),b); // note that this is Ax-b, rather than b-Ax - doesn't matter, as it's internal rnorm=r.norm(); // compute gradient A.applyAdjOp(r,g); // signs cancel so g is least-squares gradient nrnorm=g.norm(); /* str<<"LSMatAlg: LAPACK-based Least Squares solver with trust-region truncation"< & _x, GenMat const & _A, Vector const & _b, atype & _rnorm, atype & _nrnorm, atype _Delta, ostream & _str) : x(_x), A(_A), r(A.getRange()), g(A.getDomain()), b(_b), rnorm(_rnorm), nrnorm(_nrnorm), Delta(_Delta), proj(false), str(_str) { r.copy(b); rnorm=r.norm(); A.applyAdjOp(r,g); nrnorm=g.norm(); } private: Vector & x; GenMat const & A; Vector const & b; Vector r; Vector g; atype & rnorm; atype & nrnorm; atype Delta; mutable bool proj; ostream & str; }; /** Factory class for LSMatAlg. Intended as policy for least squares / trust-region solver in Steihaug-Toint algorithm implementation, hence takes OperatorEvaluation. The Operator is extracted from this on construction, and an attempt is made to cast the Operator to a GenOp, i.e. an Operator with Derivative represented by a GenMat. The resulting GenMat is passed to the build method to implement an LSMatAlg factory. Conforms to specifications described in TRGNAlg docs. */ template class LSMatPolicy { typedef typename ScalarFieldTraits::AbsType atype; public: /** Sets up Gauss-Newton problem, verifies that the derivative is actually represented by a GenMat, then builds solver. */ LSMatAlg * build(Vector & x, OperatorEvaluation & opeval, atype & rnorm, atype & nrnorm, atype Delta, ostream & str) const { try { // first make sure operator is applied - want the Operator for the current point. Vector const & rhs = opeval.getValue(); // then get Operator OpWithGenMatDeriv const & gop = dynamic_cast< OpWithGenMatDeriv const &> (opeval.getOp()); GenMat const & G = gop.getGenMat(); if (G.getNRows() < G.getNCols()) { RVLException e; e<<"Error: LSMatPolicy::build \n"; e<<"GenMat object passed as 2nd arg has more cols than rows\n"; throw e; } return new LSMatAlg(x,G,rhs,rnorm,nrnorm,Delta,str); } catch (bad_cast) { RVLException e; e<<"Error: LSMatPolicy::build \n"; e<<"input LinearOp not of GenMat type\n"; throw e; } catch (RVLException & e) { e<<"\ncalled from LSMatPolicy::build\n"; throw e; } } }; } #endif