#ifndef _UMIN_TR_MAT
#define _UMIN_TR_MAT
#include "rnop.hh"
#include "alg.hh"
extern "C" {
typedef struct { float r, i; } f2c_complex;
typedef struct { double r, i; } f2c_dblcomplex;
int sgelsy_(long *m, long *n, long *nrhs, float *a, long * lda, float * b, long * ldb, long * jpvt, float * rcond, long * rank, float * work, long * lwork, long * info);
int dgelsy_(long *m, long *n, long *nrhs, double *a, long * lda, double * b, long * ldb, long * jpvt, double * rcond, long * rank, double * work, long * lwork, long * info);
int cgelsy_(long *m, long *n, long *nrhs, f2c_complex *a, long * lda, f2c_complex * b, long * ldb, long * jpvt, float * rcond, long * rank, f2c_complex * work, long * lwork, float * rwork, long * info);
int zgelsy_(long *m, long *n, long *nrhs, f2c_dblcomplex *a, long * lda, f2c_dblcomplex * b, long * ldb, long * jpvt, double * rcond, long * rank, f2c_dblcomplex * work, long * lwork, double * rwork, long * info);
}
/* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) */
/* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
/* LWORK (input) INTEGER */
/* The dimension of the array WORK. */
/* The unblocked strategy requires that: */
/* LWORK >= MAX( MN+3*N+1, 2*MN+NRHS ), */
/* where MN = min( M, N ). */
/* The block algorithm requires that: */
/* LWORK >= MAX( MN+2*N+NB*(N+1), 2*MN+NB*NRHS ), */
/* where NB is an upper bound on the blocksize returned */
/* by ILAENV for the routines SGEQP3, STZRZF, STZRQF, SORMQR, */
/* and SORMRZ. */
// assumptions: (1) expect problems to be well-conditioned, so set rcond=1
// (3) set up for one rhs
// (2) assume that block strategy is used and N \le M, set NB=N thus
// LWORK = MAX(MN+3*N+N^2, 2*MN+N+1), WORK=LWORK
namespace RVLUmin {
using namespace RVL;
using namespace RVLAlg;
/** LAPACK-derived least squares solver. Uses the xgelsy family of
algorithms. Since the various numeric types are opaque to C++,
must provide four separate template specializations.
This version is set up to conform to the CLAPACK calling
conventions, and must be linked with CLAPACK. It is probably
necessary to modify the interfaces slightly for Fortran-based
vendor or compiled LAPACKs.
assumptions:
- expect problems to be well-conditioned, so set rcond=1
- set up for one rhs
- assume that block strategy is used and N <= M, set NB=N thus
LWORK = MAX(MN+3*N+N^2, 2*MN+N+1), WORK=LWORK
notes:
- CLAPACK integer type is long - this is same as int
under linux, but NOT under OS-X, so use long as the integer type
for all arguments and temporaries
- Since LAPACK overwrites the storage for both the matrix
(input matrix on call, QR factors on return) and the solution
vector (right-hand side on call, solution on return), these must
be copied. This function collection makes a temporary for each -
for the solution, long enough to hold either the RHS or the
solution - then copies as necessary. This waste of storage would
have to occur either here or in the driver, and lets the input
matrix be passed as consts
- Even if you didn't wanted to keep the LAPACK calling
convention (overwriting inputs), you couldn't for the complex
case, because the CLAPACK/F2C complex types are incompatible
with std::complex.
*/
template
void LAPACK_LS(T * xdata,
T const * Adata,
T const * bdata,
A * rcond, long * rank, long m, long n, long lwork, long * info);
template<>
void LAPACK_LS(float * xdata,
float const * Adata,
float const * bdata,
float * rcond, long * rank, long m, long n, long lwork, long * info) {
float * work = new float[lwork];
for (int i=0;i
void LAPACK_LS(double * xdata,
double const * Adata,
double const * bdata,
double * rcond, long * rank, long m, long n, long lwork, long * info) {
double * work = new double[lwork];
for (int i=0;i
void LAPACK_LS,float>(std::complex * xdata,
std::complex const * Adata,
std::complex const * bdata,
float * rcond, long * rank, long m,
long n, long lwork, long * info) {
f2c_complex * work = new f2c_complex[lwork];
for (int i=0;i(tmp[j].r,tmp[j].i);
}
}
delete [] jpvt;
delete [] work;
delete [] tmpa;
delete [] tmp;
delete [] rwork;
}
template<>
void LAPACK_LS,double>(std::complex * xdata,
std::complex const * Adata,
std::complex const * bdata,
double * rcond, long * rank, long m,
long n, long lwork, long * info) {
f2c_dblcomplex * work = new f2c_dblcomplex[lwork];
for (int i=0;i(tmp[i].r,tmp[i].i);
}
}
delete [] jpvt;
delete [] work;
delete [] tmpa;
delete [] tmp;
delete [] rwork;
}
/** LAPACK-based least squares solver as a local FO, with
the matrix stored as instance data for a GenMat
*/
template
class LSMatFO: public BinaryLocalFunctionObject {
typedef typename ScalarFieldTraits::AbsType AT;
public:
void operator()(LocalDataContainer & x,
LocalDataContainer const & b) {
try {
long m = A.getNRows();
long n = A.getNCols();
AT rcond=0.00001;
long rank=0;
long lwork = max((m+n+3)*n,(3*m+1)*n+1);
long info=0;
LAPACK_LS(x.getData(),
A.getData(),
b.getData(),
&rcond,&rank,m,n,lwork,&info);
if (info<0) {
RVLException e;
e<<"Error: LAPACK_LS, float case\n";
e<<"SGELSY returned with nozero error flag = "<::run\n";
throw e;
}
}
/** main constructor builds on a GenMat */
LSMatFO(GenMat const & _A): A(_A) {}
string getName() const { string tmp = "LSMatFO"; return tmp; }
private:
GenMat const & A;
};
/** LAPACK-based least squares solver with trust region truncation.
Stores mutable references to
- solution vector - zeroed on call, contains solution estimate on return
- scalars for residual norm and normal residual norm - on
construction, these are set to the norm of the right-hand side
and of the right-hand side of the normal equations,
respectively, so the constructor can be used in a driver to
initialize these quantities. On return, residual resp. normal
residual norms at estimated solution.
Applies trust region truncation to solution estimate - thus the
returned solution may NOT be the least squares solution, but
rather the closest point in the trust region to it!
Note that trust region solvers are also set up to act as
Terminators, with the query function returning true if the trust
radius is exceeded. Dependent Gauss-Newton iterations stop
updating their solution estimates, and update trust radius
instead, when this happens.
*/
template
class LSMatAlg: public Algorithm, public Terminator {
typedef typename ScalarFieldTraits::AbsType atype;
public:
bool query() { return proj; }
void run() {
try {
// initialize x
x.zero();
// solve least squares problem
LSMatFO mfo(A);
x.eval(mfo,b);
// scale to trust region boundary
atype xnorm=x.norm();
proj=false;
if (xnorm>Delta) {
proj=true;
T scfac = Delta/xnorm;
x.scale(scfac);
str<<"RVLUmin::LSMatAlg::run: trust region truncation applied\n";
str<<" untruncated solution norm = "<::One(),b);
// note that this is Ax-b, rather than b-Ax - doesn't matter, as it's internal
rnorm=r.norm();
// compute gradient
A.applyAdjOp(r,g);
// signs cancel so g is least-squares gradient
nrnorm=g.norm();
/*
str<<"LSMatAlg: LAPACK-based Least Squares solver with trust-region truncation"< & _x,
GenMat const & _A,
Vector const & _b,
atype & _rnorm,
atype & _nrnorm,
atype _Delta,
ostream & _str)
: x(_x), A(_A), r(A.getRange()), g(A.getDomain()), b(_b), rnorm(_rnorm), nrnorm(_nrnorm), Delta(_Delta), proj(false), str(_str) {
r.copy(b);
rnorm=r.norm();
A.applyAdjOp(r,g);
nrnorm=g.norm();
}
private:
Vector & x;
GenMat const & A;
Vector const & b;
Vector r;
Vector g;
atype & rnorm;
atype & nrnorm;
atype Delta;
mutable bool proj;
ostream & str;
};
/** Factory class for LSMatAlg. Intended as policy for least squares
/ trust-region solver in Steihaug-Toint algorithm
implementation, hence takes OperatorEvaluation. The Operator is
extracted from this on construction, and an attempt is made to
cast the Operator to a GenOp, i.e. an Operator with Derivative
represented by a GenMat. The resulting GenMat is passed to the
build method to implement an LSMatAlg factory.
Conforms to specifications described in TRGNAlg docs.
*/
template
class LSMatPolicy {
typedef typename ScalarFieldTraits::AbsType atype;
public:
/** Sets up Gauss-Newton problem, verifies that the derivative is
actually represented by a GenMat, then builds solver.
*/
LSMatAlg * build(Vector & x,
OperatorEvaluation & opeval,
atype & rnorm,
atype & nrnorm,
atype Delta,
ostream & str) const {
try {
// first make sure operator is applied - want the Operator for the current point.
Vector const & rhs = opeval.getValue();
// then get Operator
OpWithGenMatDeriv const & gop = dynamic_cast< OpWithGenMatDeriv const &> (opeval.getOp());
GenMat const & G = gop.getGenMat();
if (G.getNRows() < G.getNCols()) {
RVLException e;
e<<"Error: LSMatPolicy::build \n";
e<<"GenMat object passed as 2nd arg has more cols than rows\n";
throw e;
}
return new LSMatAlg(x,G,rhs,rnorm,nrnorm,Delta,str);
}
catch (bad_cast) {
RVLException e;
e<<"Error: LSMatPolicy::build \n";
e<<"input LinearOp not of GenMat type\n";
throw e;
}
catch (RVLException & e) {
e<<"\ncalled from LSMatPolicy::build\n";
throw e;
}
}
};
}
#endif