#undef DOLD #ifdef DOLD void acde_2d_b(float **uc, float **ucb, float **up, float **upb, float **csq, float ***csqb, int *s, int *e, int *z, float **c, int k, int *lbc, int *rbc, float *lap) { int i0, i1, i2, ioff; int s0 = s[0]; int e0 = e[0]; float c0 = c[0][0] + c[1][0]; /* boundary conditions - note that uc[-1][i]=0 etc. */ if (rbc[0]) for (i1 = e[1]; i1 > s[1]-1; --i1) for (ioff = k-2; ioff > -1; --ioff) { upb[i1][e[0] - ioff] = upb[i1][e[0] - ioff] - upb[i1][e[0] + ioff + 2]; upb[i1][e[0] + ioff + 2] = 0.0; } if (lbc[0]) for (i1 = e[1]; i1 > s[1]-1; --i1) for (ioff = k-2; ioff > -1; --ioff) { upb[i1][s[0] + ioff] = upb[i1][s[0] + ioff] - upb[i1][s[0] - ioff - 2]; upb[i1][s[0] - ioff - 2] = 0.0; } if (rbc[1]) for (ioff = k-2; ioff > -1; --ioff) #pragma ivdep for (i0 = e0; i0 > s0-1; --i0) { upb[e[1] - ioff][i0] = upb[e[1] - ioff][i0] - upb[e[1] + 2 + ioff][i0]; upb[e[1] + 2 + ioff][i0] = 0.0; } if (lbc[1]) for (ioff = k-2; ioff > -1; --ioff) #pragma ivdep for (i0 = e0; i0 > s0-1; --i0) { upb[s[1] + ioff][i0] = upb[s[1] + ioff][i0] - upb[s[1] - ioff - 2][i0]; upb[s[1] - ioff - 2][i0] = 0.0; } /* calculate the Laplacian operator as a function of x */ for (i1 = e[1]; i1 >= s[1]; i1--) { #pragma ivdep for (i0 = s0; i0 <= e0; i0++) { lap[i0] = c0*uc[i1][i0]; } for (ioff=1; ioff < k+1; ++ioff) { #pragma ivdep for (i0 = s0; i0 <= e0; i0++) { lap[i0] += c[0][ioff]*(uc[i1][i0+ioff]+uc[i1][i0-ioff]) + c[1][ioff]*(uc[i1+ioff][i0]+uc[i1-ioff][i0]); } /* end loop over x */ } /* calculate the extended csqb operator */ for (i2 = s[2]-z[2]; i2 <= e[2]-z[2]; i2++) { int locs0 = (s[0] > s[0] + 2*i2 ? s[0] : s[0] + 2*i2); int loce0 = (e[0] < e[0] + 2*i2 ? e[0] : e[0] + 2*i2); int loci2 = i2 + z[2]; #pragma ivdep for (i0 = loce0; i0 >= locs0; i0--) { csqb[loci2][i1][i0-i2] += lap[i0-2*i2] * upb[i1][i0]; } /* end loop over x */ } /* end loop over h */ for (ioff=1; ioff < k+1; ++ioff) #pragma ivdep for (i0 = s0; i0 <= e0; i0++) { float csqupb = csq[i1][i0] * upb[i1][i0]; ucb[i1][i0 + ioff] += c[0][ioff]*csqupb; ucb[i1][i0 - ioff] += c[0][ioff]*csqupb; ucb[i1 + ioff][i0] += c[1][ioff]*csqupb; ucb[i1 - ioff][i0] += c[1][ioff]*csqupb; } /* extrapolate the previous time step */ #pragma ivdep for (i0 = e0; i0 >= s0; i0--) { float csqupb = csq[i1][i0] * upb[i1][i0]; ucb[i1][i0] += c0 * csqupb + 2.0*upb[i1][i0]; upb[i1][i0] = -upb[i1][i0]; } /* end loop over x */ } /* end loop over z */ } #else /* Generated by TAPENADE (INRIA, Ecuador team) Tapenade 3.11 (r5902M) - 15 Dec 2015 09:00 Differentiation of acde_2d in reverse (adjoint) mode: gradient of useful results: **uc **up with respect to varying inputs: ***csq **uc **up RW status of diff variables: ***csq:out **uc:incr **up:in-out Plus diff mem management of: lap:in csq:in *csq:in **csq:in uc:in *uc:in up:in *up:in csqlap:in */ void acde_2d_b(float **uc, float **ucb, float **up, float **upb, float ***csq, float ***csqb, int *s, int *e, int *z, float **c, int k, int pbg, int *lbc, int *rbc, float *lap, float *csqupb) { int i0, i1, i2; int ioff; int s0 = s[0]; int e0 = e[0]; int z2 = z[2]; // compute central fd coeff float c0 = c[0][0] + c[1][0]; float tempb; float tempb0; float tempb1; /* boundary conditions - note that uc[-1][i]=0 etc. */ if (rbc[0]) for (i1 = e[1]; i1 > s[1]-1; --i1) for (ioff = k-2; ioff > -1; --ioff) { upb[i1][e[0] - ioff] = upb[i1][e[0] - ioff] - upb[i1][e[0] + ioff + 2]; upb[i1][e[0] + ioff + 2] = 0.0; } if (lbc[0]) for (i1 = e[1]; i1 > s[1]-1; --i1) for (ioff = k-2; ioff > -1; --ioff) { upb[i1][s[0] + ioff] = upb[i1][s[0] + ioff] - upb[i1][s[0] - ioff - 2]; upb[i1][s[0] - ioff - 2] = 0.0; } if (rbc[1]) for (ioff = k-2; ioff > -1; --ioff) #pragma ivdep for (i0 = e0; i0 > s0-1; --i0) { upb[e[1] - ioff][i0] = upb[e[1] - ioff][i0] - upb[e[1] + 2 + ioff][i0]; upb[e[1] + 2 + ioff][i0] = 0.0; } if (lbc[1]) for (ioff = k-2; ioff > -1; --ioff) #pragma ivdep for (i0 = e0; i0 > s0-1; --i0) { upb[s[1] + ioff][i0] = upb[s[1] + ioff][i0] - upb[s[1] - ioff - 2][i0]; upb[s[1] - ioff - 2][i0] = 0.0; } /* calculate the Laplacian operator as a function of x */ for (i1 = s[1]; i1 < e[1]+1; ++i1) { #pragma ivdep for (i0 = s0; i0 < e0+1; ++i0) { lap[i0] = c0*uc[i1][i0]; csqupb[i0]=0.0; } for (ioff = 1; ioff < k+1; ++ioff) { #pragma ivdep for (i0 = s0; i0 < e0+1; ++i0) { lap[i0] += c[0][ioff]*(uc[i1][i0+ioff]+uc[i1][i0-ioff]) + c[1][ioff]*(uc[i1+ioff][i0]+uc[i1-ioff][i0]); } /* end loop over x */ } /* calculate the extended csqb operator */ for (i2 = s[2]-z[2]; i2 <= e[2]-z[2]; i2++) { int locs0 = (s[0] > s[0] + 2*i2 ? s[0] : s[0] + 2*i2); int loce0 = (e[0] < e[0] + 2*i2 ? e[0] : e[0] + 2*i2); int loci2 = i2 + z[2]; #pragma ivdep for (i0 = loce0; i0 >= locs0; i0--) { csqb[loci2][i1][i0-i2] += lap[i0-2*i2] * upb[i1][i0]; } /* end loop over x */ } /* end loop over h */ if (!pbg) { for (i2 = e[2]-z[2]; i2 > s[2]-z[2]-1; --i2) { int locs0 = (s[0] > s[0] + 2*i2 ? s[0] : s[0] + 2*i2); int loce0 = (e[0] < e[0] + 2*i2 ? e[0] : e[0] + 2*i2); int loci2 = i2 + z[2]; #pragma ivdep for (i0 = loce0; i0 > locs0-1; --i0) { csqupb[i0 - 2*i2] += csq[loci2][i1][i0-i2]*upb[i1][i0]; } } for (ioff = k; ioff > 0; --ioff) { #pragma ivdep for (i0 = e0; i0 > s0-1; --i0) { tempb0 = c[0][ioff]*csqupb[i0]; tempb1 = c[1][ioff]*csqupb[i0]; ucb[i1][i0 + ioff] += tempb0; ucb[i1][i0 - ioff] += tempb0; ucb[i1 + ioff][i0] += tempb1; ucb[i1 - ioff][i0] += tempb1; } } #pragma ivdep for (i0 = e0; i0 > s0-1; --i0) { ucb[i1][i0] += c0*csqupb[i0]+ 2.0*upb[i1][i0]; upb[i1][i0] = -upb[i1][i0]; } } /* end non-physical bg branch */ else { for (ioff = k; ioff > 0; --ioff) { #pragma ivdep for (i0 = e0; i0 > s0-1; --i0) { tempb = csq[z2][i1][i0]*upb[i1][i0]; tempb0 = c[0][ioff]*tempb; tempb1 = c[1][ioff]*tempb; ucb[i1][i0 + ioff] += tempb0; ucb[i1][i0 - ioff] += tempb0; ucb[i1 + ioff][i0] += tempb1; ucb[i1 - ioff][i0] += tempb1; } } #pragma ivdep for (i0 = e0; i0 > s0-1; --i0) { ucb[i1][i0] += (c0*csq[z2][i1][i0] + 2.0)*upb[i1][i0]; upb[i1][i0] = -upb[i1][i0]; } } /* end physical bg branch */ } /* end loop over z */ } #endif