#undef DOLD #ifdef DOLD void acde_2d_d(float **uc, float **ucd, float **up, float **upd, float **csq, float ***csqd, int *s, int *e, int *z, float **c, int k, int *lbc, int *rbc, float *lap, float *csqdlap) { int i0, i1, i2, ioff; int s0 = s[0]; int e0 = e[0]; float c0 = c[0][0] + c[1][0]; float lapd; /* calculate the Laplacian operator as a function of x */ for (i1 = s[1]; i1 <= e[1]; i1++) { #pragma ivdep for (i0 = s0; i0 <= e0; i0++) { lap[i0] = c0*uc[i1][i0]; } for (ioff=1; ioff < k+1; ++ioff) #pragma ivdep for (i0 = s0; i0 <= e0; i0++) { lap[i0]+= c[0][ioff]*(uc[i1][i0+ioff]+uc[i1][i0-ioff]) + c[1][ioff]*(uc[i1+ioff][i0]+uc[i1-ioff][i0]); csqdlap[i0] = 0.0; } /* end loop over x */ /* apply the extended csq operator on the laplacian */ for (i2 = s[2]-z[2]; i2 <= e[2]-z[2]; i2++) { int locs0 = (s[0] > s[0] + 2*i2 ? s[0] : s[0] + 2*i2); int loce0 = (e[0] < e[0] + 2*i2 ? e[0] : e[0] + 2*i2); int loci2 = i2 + z[2]; #pragma ivdep for (i0 = locs0; i0 <= loce0; i0++) { csqdlap[i0] += csqd[loci2][i1][i0-i2] * lap[i0-2*i2]; } /* end loop over x */ } /* end loop over h */ /* extrapolate the next time step */ /* NOTE: this version assumes csq has already been scaled by dx[2] */ #pragma ivdep for (i0 = s0; i0 <= e0; i0++) { lapd = c0*ucd[i1][i0]; } for (ioff=1; ioff < k+1; ++ioff) { #pragma ivdep for (i0 = s0; i0 <= e0; i0++) { lapd += c[0][ioff]*(ucd[i1][i0+ioff]+ucd[i1][i0-ioff]) + c[1][ioff]*(ucd[i1+ioff][i0]+ucd[i1-ioff][i0]); } } #pragma ivdep for (i0 = s0; i0 <= e0; i0++) { upd[i1][i0] = 2.0*ucd[i1][i0] - upd[i1][i0] + csqdlap[i0] + csq[i1][i0] * lapd; up[i1][i0] = 2.0*uc[i1][i0] - up[i1][i0] + csq[i1][i0] * lap[i0]; } /* end loop over x */ } /* end loop over z */ /* boundary conditions - note that uc[-1][i]=0 etc. */ if (lbc[1]) for (ioff = 0; ioff < k-1; ++ioff) #pragma ivdep for (i0 = s0; i0 < e0+1; ++i0) { upd[s[1] - ioff - 2][i0] = -upd[s[1]+ioff][i0]; up[s[1] - ioff - 2][i0] = -up[s[1]+ioff][i0]; } if (rbc[1]) for (ioff = 0; ioff < k-1; ++ioff) #pragma ivdep for (i0 = s0; i0 < e0+1; ++i0) { upd[e[1] + 2 + ioff][i0] = -upd[e[1]-ioff][i0]; up[e[1] + 2 + ioff][i0] = -up[e[1]-ioff][i0]; } if (lbc[0]) for (i1 = s[1]; i1 < e[1]+1; ++i1) for (ioff = 0; ioff < k-1; ++ioff) { upd[i1][s[0] - ioff - 2] = -upd[i1][s[0]+ioff]; up[i1][s[0] - ioff - 2] = -up[i1][s[0]+ioff]; } if (rbc[0]) for (i1 = s[1]; i1 < e[1]+1; ++i1) for (ioff = 0; ioff < k-1; ++ioff) { upd[i1][e[0] + ioff + 2] = -upd[i1][e[0]-ioff]; up[i1][e[0] + ioff + 2] = -up[i1][e[0]-ioff]; } } #else /* Generated by TAPENADE (INRIA, Ecuador team) Tapenade 3.11 (r5902M) - 15 Dec 2015 09:00 Differentiation of acde_2d in forward (tangent) mode: variations of useful results: **up with respect to varying inputs: ***csq **uc **up RW status of diff variables: ***csq:in **uc:in **up:in-out Plus diff mem management of: lap:in csq:in *csq:in **csq:in uc:in *uc:in up:in *up:in csqlap:in */ void acde_2d_d(float **uc, float **ucd, float **up, float **upd, float ***csq, float ***csqd, int *s, int *e, int *z, float **c, int k, int pbg, int *lbc, int *rbc, float *dx, float *lap, float *lapd, float *csqlap, float *csqlapd) { int i0, i1, i2; int ioff; int s0 = s[0]; int e0 = e[0]; int z2 = z[2]; // compute central fd coeff float c0 = c[0][0] + c[1][0]; /* calculate the Laplacian operator as a function of x */ // fprintf(stderr,"s0=%d s1=%d s2=%d e0=%d e1=%d e2=%d\n",s[0],s[1],s[2],e[0],e[1],e[2]); for (i1 = s[1]; i1 < e[1]+1; ++i1) { #pragma ivdep for (i0 = s0; i0 < e0+1; ++i0) { lapd[i0] = c0*ucd[i1][i0]; lap[i0] = c0*uc[i1][i0]; csqlapd[i0] = 0.0; csqlap[i0] = 0.0; } for (ioff = 1; ioff < k+1; ++ioff) #pragma ivdep for (i0 = s0; i0 < e0+1; ++i0) { lapd[i0] += c[0][ioff]*(ucd[i1][i0+ioff]+ucd[i1][i0-ioff]) + c[1][ioff]*(ucd[i1+ioff][i0]+ucd[i1-ioff][i0]); lap[i0] += c[0][ioff]*(uc[i1][i0+ioff]+uc[i1][i0-ioff]) + c[1][ioff]*(uc[i1+ioff][i0]+uc[i1-ioff][i0]); } /* end loop over x apply the extended csq operator on the laplacian Change 14.03.16 WWS: use physbg flag to switch between loop over offset and zero offset only */ // fprintf(stderr,"s[2]=%d e[2]=%d\n",s[2],e[2]); if (pbg) { #pragma ivdep for (i0 = s0; i0 < e0+1; ++i0) { csqlap[i0] += csq[z2][i1][i0]*lap[i0]; } for (i2 = s[2]-z[2]; i2 < e[2]-z[2]+1; ++i2) { int locs0 = (s[0] > s[0] + 2*i2 ? s[0] : s[0] + 2*i2); int loce0 = (e[0] < e[0] - 2*i2 ? e[0] : e[0] - 2*i2); int loci2 = i2 + z[2]; #pragma ivdep for (i0 = locs0; i0 < loce0+1; ++i0) { csqlapd[i0] += csqd[loci2][i1][i0-i2]*lap[i0-2*i2]; } } } else { for (i2 = s[2]-z[2]; i2 < e[2]-z[2]+1; ++i2) { int locs0 = (s[0] > s[0] + 2*i2 ? s[0] : s[0] + 2*i2); int loce0 = (e[0] < e[0] - 2*i2 ? e[0] : e[0] - 2*i2); int loci2 = i2 + z[2]; #pragma ivdep for (i0 = locs0; i0 < loce0+1; ++i0) { csqlapd[i0] += csqd[loci2][i1][i0-i2]*lap[i0-2*i2] + csq[loci2][i1][i0-i2]*lapd[i0-2*i2]; csqlap[i0] += csq[loci2][i1][i0-i2]*lap[i0-2*i2]; } } } /* extrapolate the next time step NOTE: this version assumes csq has already been scaled by dx[2]*/ // end loop over x // end loop over h #pragma ivdep for (i0 = s0; i0 < e0+1; ++i0) { upd[i1][i0] = 2.0*ucd[i1][i0] - upd[i1][i0] + csqlapd[i0]; up[i1][i0] = 2.0*uc[i1][i0] - up[i1][i0] + csqlap[i0]; //dx[2]*csqlap[i0]; } } /* end loop over z boundary conditions */ if (lbc[1]) for (ioff = 0; ioff < k-1; ++ioff) #pragma ivdep for (i0 = s0; i0 < e0+1; ++i0) { upd[s[1] - ioff - 2][i0] = -upd[s[1]+ioff][i0]; up[s[1] - ioff - 2][i0] = -up[s[1]+ioff][i0]; } if (rbc[1]) for (ioff = 0; ioff < k-1; ++ioff) #pragma ivdep for (i0 = s0; i0 < e0+1; ++i0) { upd[e[1] + 2 + ioff][i0] = -upd[e[1]-ioff][i0]; up[e[1] + 2 + ioff][i0] = -up[e[1]-ioff][i0]; } if (lbc[0]) for (i1 = s[1]; i1 < e[1]+1; ++i1) for (ioff = 0; ioff < k-1; ++ioff) { upd[i1][s[0] - ioff - 2] = -upd[i1][s[0]+ioff]; up[i1][s[0] - ioff - 2] = -up[i1][s[0]+ioff]; } if (rbc[0]) for (i1 = s[1]; i1 < e[1]+1; ++i1) for (ioff = 0; ioff < k-1; ++ioff) { upd[i1][e[0] + ioff + 2] = -upd[i1][e[0]-ioff]; up[i1][e[0] + ioff + 2] = -up[i1][e[0]-ioff]; } } #endif