public class CG { /* Solve Ax = b using a preconditioned conjugate-gradient iteration. * * Arguments: * aMat - Matrix A, an nxn Sparse matrix on which matvec is called * mMat - Matrix M, an nxn Sparse matrix on which psolve is called * b - System right-hand side, an n-element vector * x - An n-ement vector for storing the result * rtol - Tolerance on the relative residual (||r||/||b||) * n - System size * rhist - Residual norm history. Should be at least maxiter doubles * if it is not null. * maxiter - Maximum number of iterations before returning * * Returns: * Iteration count on success, -1 on failure */ public static int precondCG(Sparse aMat, Preconditioner pMat, double [1d] single [1d] b, double [1d] single [1d] x, double single rtol, int n, double [1d] rhist, int single maxiter) { double single bnorm2; /* ||b||^2 */ double single rnorm2; /* Residual norm squared */ double rz, rzold; /* r'*z from two successive iterations */ double alpha, beta; double [1d] single [1d] s; /* Search direction */ double [1d] single [1d] r; /* Residual */ double [1d] single [1d] z; /* Temporary vector */ int thisP = Ti.thisProc(); int single numP = Ti.numProcs(); int single i; int myRowCount = PVector.numPer(n); int myLow = PVector.lowPer(n); s = new double [0:numP-1][1d]; r = new double [0:numP-1][1d]; z = new double [0:numP-1][1d]; r.exchange(new double [myLow:myLow+myRowCount-1]); s.exchange(new double [myLow:myLow+myRowCount-1]); z.exchange(new double [myLow:myLow+myRowCount-1]); bnorm2 = PVector.dot(b, b); x[thisP].set(0); r[thisP].copy(b[thisP]); Ti.barrier(); pMat.psolve(z, r); s[thisP].copy(z[thisP]); Ti.barrier(); rz = PVector.dot(r, z); rnorm2 = PVector.dot(r, r); for (i = 0; i < maxiter && rnorm2 > bnorm2 * rtol*rtol; ++i) { if (rhist != null) rhist[i] = Math.sqrt(rnorm2/bnorm2); aMat.matvec(z, s); Ti.barrier(); alpha = rz / PVector.dot(s, z); PVector.axpy(x, alpha, s, x); Ti.barrier(); PVector.axpy(r, -alpha, z, r); Ti.barrier(); pMat.psolve(z, r); Ti.barrier(); rzold = rz; rz = PVector.dot(r, z); rnorm2 = PVector.dot(r, r); beta = -rz/rzold; PVector.axpy(s, -beta, s, z); Ti.barrier(); } z = null; // Allow temporary space to be freed (not necessary) r = null; s = null; if (i >= maxiter) return -1; if (rhist != null) rhist[i] = Math.sqrt(rnorm2/bnorm2); return i; } }