Adaptive RKF45 subcycling for hybrid-PIC B-field advance#6844
Open
prkkumar-he wants to merge 3 commits intoBLAST-WarpX:developmentfrom
Open
Adaptive RKF45 subcycling for hybrid-PIC B-field advance#6844prkkumar-he wants to merge 3 commits intoBLAST-WarpX:developmentfrom
prkkumar-he wants to merge 3 commits intoBLAST-WarpX:developmentfrom
Conversation
Comment on lines
+745
to
+1161
| { | ||
| if (t + dt_sub > dt_half) { dt_sub = dt_half - t; } | ||
|
|
||
| // ---- Stage 1: B = B_old, FieldPush, K[comp0] = h*k1 ---- | ||
| for (int ii = 0; ii < 3; ii++) { | ||
| MultiFab::Copy(*Bfield[lev][ii], B_old[ii], 0, 0, 1, ng); | ||
| } | ||
| FieldPush(Bfield, Efield, Jfield, rhofield, eb_update_E, | ||
| dt_sub, subcycling_half, ng, nodal_sync); | ||
| for (int ii = 0; ii < 3; ii++) { | ||
| MultiFab::LinComb(K[ii], 1._rt, *Bfield[lev][ii], 0, -1._rt, B_old[ii], 0, 0, 1, ng); | ||
| } | ||
|
|
||
| // ---- Stage 2: B = B_old + a21*K[0], FieldPush, K[comp1] = h*k2 ---- | ||
| for (int ii = 0; ii < 3; ii++) { | ||
| MultiFab::LinComb(*Bfield[lev][ii], 1._rt, B_old[ii], 0, a21, K[ii], 0, 0, 1, ng); | ||
| } | ||
| FieldPush(Bfield, Efield, Jfield, rhofield, eb_update_E, | ||
| dt_sub, subcycling_half, ng, nodal_sync); | ||
| #ifdef AMREX_USE_OMP | ||
| #pragma omp parallel if (amrex::Gpu::notInLaunchRegion()) | ||
| #endif | ||
| for ( MFIter mfi(*Bfield[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) { | ||
| Array4<Real> const& Kx = K[0].array(mfi); | ||
| Array4<Real> const& Ky = K[1].array(mfi); | ||
| Array4<Real> const& Kz = K[2].array(mfi); | ||
| Array4<Real const> const& Bx = Bfield[lev][0]->const_array(mfi); | ||
| Array4<Real const> const& By = Bfield[lev][1]->const_array(mfi); | ||
| Array4<Real const> const& Bz = Bfield[lev][2]->const_array(mfi); | ||
| Array4<Real const> const& Bx_old = B_old[0].const_array(mfi); | ||
| Array4<Real const> const& By_old = B_old[1].const_array(mfi); | ||
| Array4<Real const> const& Bz_old = B_old[2].const_array(mfi); | ||
| Box const& tjx = mfi.tilebox(Bfield[lev][0]->ixType().toIntVect(), ng); | ||
| Box const& tjy = mfi.tilebox(Bfield[lev][1]->ixType().toIntVect(), ng); | ||
| Box const& tjz = mfi.tilebox(Bfield[lev][2]->ixType().toIntVect(), ng); | ||
| amrex::ParallelFor(tjx, tjy, tjz, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Kx(i, j, k, 1) = Bx(i, j, k) - Bx_old(i, j, k) - a21*Kx(i, j, k, 0); | ||
| }, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Ky(i, j, k, 1) = By(i, j, k) - By_old(i, j, k) - a21*Ky(i, j, k, 0); | ||
| }, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Kz(i, j, k, 1) = Bz(i, j, k) - Bz_old(i, j, k) - a21*Kz(i, j, k, 0); | ||
| } | ||
| ); | ||
| } | ||
|
|
||
| // ---- Stage 3: B = B_old + a31*K[0] + a32*K[1], FieldPush, K[comp2] = h*k3 ---- | ||
| #ifdef AMREX_USE_OMP | ||
| #pragma omp parallel if (amrex::Gpu::notInLaunchRegion()) | ||
| #endif | ||
| for ( MFIter mfi(*Bfield[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) { | ||
| Array4<Real> const& Bx = Bfield[lev][0]->array(mfi); | ||
| Array4<Real> const& By = Bfield[lev][1]->array(mfi); | ||
| Array4<Real> const& Bz = Bfield[lev][2]->array(mfi); | ||
| Array4<Real const> const& Kx = K[0].const_array(mfi); | ||
| Array4<Real const> const& Ky = K[1].const_array(mfi); | ||
| Array4<Real const> const& Kz = K[2].const_array(mfi); | ||
| Array4<Real const> const& Bx_old = B_old[0].const_array(mfi); | ||
| Array4<Real const> const& By_old = B_old[1].const_array(mfi); | ||
| Array4<Real const> const& Bz_old = B_old[2].const_array(mfi); | ||
| Box const& tjx = mfi.tilebox(Bfield[lev][0]->ixType().toIntVect(), ng); | ||
| Box const& tjy = mfi.tilebox(Bfield[lev][1]->ixType().toIntVect(), ng); | ||
| Box const& tjz = mfi.tilebox(Bfield[lev][2]->ixType().toIntVect(), ng); | ||
| amrex::ParallelFor(tjx, tjy, tjz, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Bx(i, j, k) = Bx_old(i, j, k) | ||
| + a31*Kx(i, j, k, 0) + a32*Kx(i, j, k, 1); | ||
| }, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| By(i, j, k) = By_old(i, j, k) | ||
| + a31*Ky(i, j, k, 0) + a32*Ky(i, j, k, 1); | ||
| }, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Bz(i, j, k) = Bz_old(i, j, k) | ||
| + a31*Kz(i, j, k, 0) + a32*Kz(i, j, k, 1); | ||
| } | ||
| ); | ||
| } | ||
| FieldPush(Bfield, Efield, Jfield, rhofield, eb_update_E, | ||
| dt_sub, subcycling_half, ng, nodal_sync); | ||
| #ifdef AMREX_USE_OMP | ||
| #pragma omp parallel if (amrex::Gpu::notInLaunchRegion()) | ||
| #endif | ||
| for ( MFIter mfi(*Bfield[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) { | ||
| Array4<Real> const& Kx = K[0].array(mfi); | ||
| Array4<Real> const& Ky = K[1].array(mfi); | ||
| Array4<Real> const& Kz = K[2].array(mfi); | ||
| Array4<Real const> const& Bx = Bfield[lev][0]->const_array(mfi); | ||
| Array4<Real const> const& By = Bfield[lev][1]->const_array(mfi); | ||
| Array4<Real const> const& Bz = Bfield[lev][2]->const_array(mfi); | ||
| Array4<Real const> const& Bx_old = B_old[0].const_array(mfi); | ||
| Array4<Real const> const& By_old = B_old[1].const_array(mfi); | ||
| Array4<Real const> const& Bz_old = B_old[2].const_array(mfi); | ||
| Box const& tjx = mfi.tilebox(Bfield[lev][0]->ixType().toIntVect(), ng); | ||
| Box const& tjy = mfi.tilebox(Bfield[lev][1]->ixType().toIntVect(), ng); | ||
| Box const& tjz = mfi.tilebox(Bfield[lev][2]->ixType().toIntVect(), ng); | ||
| amrex::ParallelFor(tjx, tjy, tjz, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Kx(i, j, k, 2) = Bx(i, j, k) - Bx_old(i, j, k) | ||
| - a31*Kx(i, j, k, 0) - a32*Kx(i, j, k, 1); | ||
| }, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Ky(i, j, k, 2) = By(i, j, k) - By_old(i, j, k) | ||
| - a31*Ky(i, j, k, 0) - a32*Ky(i, j, k, 1); | ||
| }, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Kz(i, j, k, 2) = Bz(i, j, k) - Bz_old(i, j, k) | ||
| - a31*Kz(i, j, k, 0) - a32*Kz(i, j, k, 1); | ||
| } | ||
| ); | ||
| } | ||
|
|
||
| // ---- Stage 4: B = B_old + a41*K[0] + a42*K[1] + a43*K[2], FieldPush, K[comp3] = h*k4 ---- | ||
| #ifdef AMREX_USE_OMP | ||
| #pragma omp parallel if (amrex::Gpu::notInLaunchRegion()) | ||
| #endif | ||
| for ( MFIter mfi(*Bfield[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) { | ||
| Array4<Real> const& Bx = Bfield[lev][0]->array(mfi); | ||
| Array4<Real> const& By = Bfield[lev][1]->array(mfi); | ||
| Array4<Real> const& Bz = Bfield[lev][2]->array(mfi); | ||
| Array4<Real const> const& Kx = K[0].const_array(mfi); | ||
| Array4<Real const> const& Ky = K[1].const_array(mfi); | ||
| Array4<Real const> const& Kz = K[2].const_array(mfi); | ||
| Array4<Real const> const& Bx_old = B_old[0].const_array(mfi); | ||
| Array4<Real const> const& By_old = B_old[1].const_array(mfi); | ||
| Array4<Real const> const& Bz_old = B_old[2].const_array(mfi); | ||
| Box const& tjx = mfi.tilebox(Bfield[lev][0]->ixType().toIntVect(), ng); | ||
| Box const& tjy = mfi.tilebox(Bfield[lev][1]->ixType().toIntVect(), ng); | ||
| Box const& tjz = mfi.tilebox(Bfield[lev][2]->ixType().toIntVect(), ng); | ||
| amrex::ParallelFor(tjx, tjy, tjz, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Bx(i, j, k) = Bx_old(i, j, k) | ||
| + a41*Kx(i, j, k, 0) + a42*Kx(i, j, k, 1) + a43*Kx(i, j, k, 2); | ||
| }, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| By(i, j, k) = By_old(i, j, k) | ||
| + a41*Ky(i, j, k, 0) + a42*Ky(i, j, k, 1) + a43*Ky(i, j, k, 2); | ||
| }, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Bz(i, j, k) = Bz_old(i, j, k) | ||
| + a41*Kz(i, j, k, 0) + a42*Kz(i, j, k, 1) + a43*Kz(i, j, k, 2); | ||
| } | ||
| ); | ||
| } | ||
| FieldPush(Bfield, Efield, Jfield, rhofield, eb_update_E, | ||
| dt_sub, subcycling_half, ng, nodal_sync); | ||
| #ifdef AMREX_USE_OMP | ||
| #pragma omp parallel if (amrex::Gpu::notInLaunchRegion()) | ||
| #endif | ||
| for ( MFIter mfi(*Bfield[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) { | ||
| Array4<Real> const& Kx = K[0].array(mfi); | ||
| Array4<Real> const& Ky = K[1].array(mfi); | ||
| Array4<Real> const& Kz = K[2].array(mfi); | ||
| Array4<Real const> const& Bx = Bfield[lev][0]->const_array(mfi); | ||
| Array4<Real const> const& By = Bfield[lev][1]->const_array(mfi); | ||
| Array4<Real const> const& Bz = Bfield[lev][2]->const_array(mfi); | ||
| Array4<Real const> const& Bx_old = B_old[0].const_array(mfi); | ||
| Array4<Real const> const& By_old = B_old[1].const_array(mfi); | ||
| Array4<Real const> const& Bz_old = B_old[2].const_array(mfi); | ||
| Box const& tjx = mfi.tilebox(Bfield[lev][0]->ixType().toIntVect(), ng); | ||
| Box const& tjy = mfi.tilebox(Bfield[lev][1]->ixType().toIntVect(), ng); | ||
| Box const& tjz = mfi.tilebox(Bfield[lev][2]->ixType().toIntVect(), ng); | ||
| amrex::ParallelFor(tjx, tjy, tjz, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Kx(i, j, k, 3) = Bx(i, j, k) - Bx_old(i, j, k) | ||
| - a41*Kx(i, j, k, 0) - a42*Kx(i, j, k, 1) - a43*Kx(i, j, k, 2); | ||
| }, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Ky(i, j, k, 3) = By(i, j, k) - By_old(i, j, k) | ||
| - a41*Ky(i, j, k, 0) - a42*Ky(i, j, k, 1) - a43*Ky(i, j, k, 2); | ||
| }, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Kz(i, j, k, 3) = Bz(i, j, k) - Bz_old(i, j, k) | ||
| - a41*Kz(i, j, k, 0) - a42*Kz(i, j, k, 1) - a43*Kz(i, j, k, 2); | ||
| } | ||
| ); | ||
| } | ||
|
|
||
| // ---- Stage 5: B = B_old + a51*K[0]+a52*K[1]+a53*K[2]+a54*K[3], FieldPush, K[comp4] = h*k5 ---- | ||
| #ifdef AMREX_USE_OMP | ||
| #pragma omp parallel if (amrex::Gpu::notInLaunchRegion()) | ||
| #endif | ||
| for ( MFIter mfi(*Bfield[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) { | ||
| Array4<Real> const& Bx = Bfield[lev][0]->array(mfi); | ||
| Array4<Real> const& By = Bfield[lev][1]->array(mfi); | ||
| Array4<Real> const& Bz = Bfield[lev][2]->array(mfi); | ||
| Array4<Real const> const& Kx = K[0].const_array(mfi); | ||
| Array4<Real const> const& Ky = K[1].const_array(mfi); | ||
| Array4<Real const> const& Kz = K[2].const_array(mfi); | ||
| Array4<Real const> const& Bx_old = B_old[0].const_array(mfi); | ||
| Array4<Real const> const& By_old = B_old[1].const_array(mfi); | ||
| Array4<Real const> const& Bz_old = B_old[2].const_array(mfi); | ||
| Box const& tjx = mfi.tilebox(Bfield[lev][0]->ixType().toIntVect(), ng); | ||
| Box const& tjy = mfi.tilebox(Bfield[lev][1]->ixType().toIntVect(), ng); | ||
| Box const& tjz = mfi.tilebox(Bfield[lev][2]->ixType().toIntVect(), ng); | ||
| amrex::ParallelFor(tjx, tjy, tjz, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Bx(i, j, k) = Bx_old(i, j, k) + a51*Kx(i, j, k, 0) + a52*Kx(i, j, k, 1) | ||
| + a53*Kx(i, j, k, 2) + a54*Kx(i, j, k, 3); | ||
| }, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| By(i, j, k) = By_old(i, j, k) + a51*Ky(i, j, k, 0) + a52*Ky(i, j, k, 1) | ||
| + a53*Ky(i, j, k, 2) + a54*Ky(i, j, k, 3); | ||
| }, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Bz(i, j, k) = Bz_old(i, j, k) + a51*Kz(i, j, k, 0) + a52*Kz(i, j, k, 1) | ||
| + a53*Kz(i, j, k, 2) + a54*Kz(i, j, k, 3); | ||
| } | ||
| ); | ||
| } | ||
| FieldPush(Bfield, Efield, Jfield, rhofield, eb_update_E, | ||
| dt_sub, subcycling_half, ng, nodal_sync); | ||
| #ifdef AMREX_USE_OMP | ||
| #pragma omp parallel if (amrex::Gpu::notInLaunchRegion()) | ||
| #endif | ||
| for ( MFIter mfi(*Bfield[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) { | ||
| Array4<Real> const& Kx = K[0].array(mfi); | ||
| Array4<Real> const& Ky = K[1].array(mfi); | ||
| Array4<Real> const& Kz = K[2].array(mfi); | ||
| Array4<Real const> const& Bx = Bfield[lev][0]->const_array(mfi); | ||
| Array4<Real const> const& By = Bfield[lev][1]->const_array(mfi); | ||
| Array4<Real const> const& Bz = Bfield[lev][2]->const_array(mfi); | ||
| Array4<Real const> const& Bx_old = B_old[0].const_array(mfi); | ||
| Array4<Real const> const& By_old = B_old[1].const_array(mfi); | ||
| Array4<Real const> const& Bz_old = B_old[2].const_array(mfi); | ||
| Box const& tjx = mfi.tilebox(Bfield[lev][0]->ixType().toIntVect(), ng); | ||
| Box const& tjy = mfi.tilebox(Bfield[lev][1]->ixType().toIntVect(), ng); | ||
| Box const& tjz = mfi.tilebox(Bfield[lev][2]->ixType().toIntVect(), ng); | ||
| amrex::ParallelFor(tjx, tjy, tjz, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Kx(i, j, k, 4) = Bx(i, j, k) - Bx_old(i, j, k) | ||
| - a51*Kx(i, j, k, 0) - a52*Kx(i, j, k, 1) | ||
| - a53*Kx(i, j, k, 2) - a54*Kx(i, j, k, 3); | ||
| }, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Ky(i, j, k, 4) = By(i, j, k) - By_old(i, j, k) | ||
| - a51*Ky(i, j, k, 0) - a52*Ky(i, j, k, 1) | ||
| - a53*Ky(i, j, k, 2) - a54*Ky(i, j, k, 3); | ||
| }, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Kz(i, j, k, 4) = Bz(i, j, k) - Bz_old(i, j, k) | ||
| - a51*Kz(i, j, k, 0) - a52*Kz(i, j, k, 1) | ||
| - a53*Kz(i, j, k, 2) - a54*Kz(i, j, k, 3); | ||
| } | ||
| ); | ||
| } | ||
|
|
||
| // ---- Stage 6: B = B_old + a61*K[0]+a62*K[1]+a63*K[2]+a64*K[3]+a65*K[4] ---- | ||
| // FieldPush, then overwrite K[comp1] with h*k6 (reads old K[comp1]=h*k2 in same kernel) | ||
| #ifdef AMREX_USE_OMP | ||
| #pragma omp parallel if (amrex::Gpu::notInLaunchRegion()) | ||
| #endif | ||
| for ( MFIter mfi(*Bfield[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) { | ||
| Array4<Real> const& Bx = Bfield[lev][0]->array(mfi); | ||
| Array4<Real> const& By = Bfield[lev][1]->array(mfi); | ||
| Array4<Real> const& Bz = Bfield[lev][2]->array(mfi); | ||
| Array4<Real const> const& Kx = K[0].const_array(mfi); | ||
| Array4<Real const> const& Ky = K[1].const_array(mfi); | ||
| Array4<Real const> const& Kz = K[2].const_array(mfi); | ||
| Array4<Real const> const& Bx_old = B_old[0].const_array(mfi); | ||
| Array4<Real const> const& By_old = B_old[1].const_array(mfi); | ||
| Array4<Real const> const& Bz_old = B_old[2].const_array(mfi); | ||
| Box const& tjx = mfi.tilebox(Bfield[lev][0]->ixType().toIntVect(), ng); | ||
| Box const& tjy = mfi.tilebox(Bfield[lev][1]->ixType().toIntVect(), ng); | ||
| Box const& tjz = mfi.tilebox(Bfield[lev][2]->ixType().toIntVect(), ng); | ||
| amrex::ParallelFor(tjx, tjy, tjz, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Bx(i, j, k) = Bx_old(i, j, k) + a61*Kx(i, j, k, 0) + a62*Kx(i, j, k, 1) | ||
| + a63*Kx(i, j, k, 2) + a64*Kx(i, j, k, 3) + a65*Kx(i, j, k, 4); | ||
| }, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| By(i, j, k) = By_old(i, j, k) + a61*Ky(i, j, k, 0) + a62*Ky(i, j, k, 1) | ||
| + a63*Ky(i, j, k, 2) + a64*Ky(i, j, k, 3) + a65*Ky(i, j, k, 4); | ||
| }, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Bz(i, j, k) = Bz_old(i, j, k) + a61*Kz(i, j, k, 0) + a62*Kz(i, j, k, 1) | ||
| + a63*Kz(i, j, k, 2) + a64*Kz(i, j, k, 3) + a65*Kz(i, j, k, 4); | ||
| } | ||
| ); | ||
| } | ||
| FieldPush(Bfield, Efield, Jfield, rhofield, eb_update_E, | ||
| dt_sub, subcycling_half, ng, nodal_sync); | ||
| // K[comp1] is overwritten here: reads h*k2 (old value) then writes h*k6 in each cell. | ||
| // This is safe since the read and write are on the same cell with no cross-cell dependency. | ||
| #ifdef AMREX_USE_OMP | ||
| #pragma omp parallel if (amrex::Gpu::notInLaunchRegion()) | ||
| #endif | ||
| for ( MFIter mfi(*Bfield[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) { | ||
| Array4<Real> const& Kx = K[0].array(mfi); | ||
| Array4<Real> const& Ky = K[1].array(mfi); | ||
| Array4<Real> const& Kz = K[2].array(mfi); | ||
| Array4<Real const> const& Bx = Bfield[lev][0]->const_array(mfi); | ||
| Array4<Real const> const& By = Bfield[lev][1]->const_array(mfi); | ||
| Array4<Real const> const& Bz = Bfield[lev][2]->const_array(mfi); | ||
| Array4<Real const> const& Bx_old = B_old[0].const_array(mfi); | ||
| Array4<Real const> const& By_old = B_old[1].const_array(mfi); | ||
| Array4<Real const> const& Bz_old = B_old[2].const_array(mfi); | ||
| Box const& tjx = mfi.tilebox(Bfield[lev][0]->ixType().toIntVect(), ng); | ||
| Box const& tjy = mfi.tilebox(Bfield[lev][1]->ixType().toIntVect(), ng); | ||
| Box const& tjz = mfi.tilebox(Bfield[lev][2]->ixType().toIntVect(), ng); | ||
| amrex::ParallelFor(tjx, tjy, tjz, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Kx(i, j, k, 1) = Bx(i, j, k) - Bx_old(i, j, k) | ||
| - a61*Kx(i, j, k, 0) - a62*Kx(i, j, k, 1) | ||
| - a63*Kx(i, j, k, 2) - a64*Kx(i, j, k, 3) - a65*Kx(i, j, k, 4); | ||
| }, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Ky(i, j, k, 1) = By(i, j, k) - By_old(i, j, k) | ||
| - a61*Ky(i, j, k, 0) - a62*Ky(i, j, k, 1) | ||
| - a63*Ky(i, j, k, 2) - a64*Ky(i, j, k, 3) - a65*Ky(i, j, k, 4); | ||
| }, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Kz(i, j, k, 1) = Bz(i, j, k) - Bz_old(i, j, k) | ||
| - a61*Kz(i, j, k, 0) - a62*Kz(i, j, k, 1) | ||
| - a63*Kz(i, j, k, 2) - a64*Kz(i, j, k, 3) - a65*Kz(i, j, k, 4); | ||
| } | ||
| ); | ||
| } | ||
|
|
||
| // ---- Assemble 4th-order solution B4 into Bfield ---- | ||
| // K[comp1] now holds h*k6; b2=0 so k2 (overwritten) is not needed for B4. | ||
| // Ghost cells are updated (matching existing BfieldEvolveRK convention). | ||
| #ifdef AMREX_USE_OMP | ||
| #pragma omp parallel if (amrex::Gpu::notInLaunchRegion()) | ||
| #endif | ||
| for ( MFIter mfi(*Bfield[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) { | ||
| Array4<Real> const& Bx = Bfield[lev][0]->array(mfi); | ||
| Array4<Real> const& By = Bfield[lev][1]->array(mfi); | ||
| Array4<Real> const& Bz = Bfield[lev][2]->array(mfi); | ||
| Array4<Real const> const& Kx = K[0].const_array(mfi); | ||
| Array4<Real const> const& Ky = K[1].const_array(mfi); | ||
| Array4<Real const> const& Kz = K[2].const_array(mfi); | ||
| Array4<Real const> const& Bx_old = B_old[0].const_array(mfi); | ||
| Array4<Real const> const& By_old = B_old[1].const_array(mfi); | ||
| Array4<Real const> const& Bz_old = B_old[2].const_array(mfi); | ||
| Box const& tjx = mfi.tilebox(Bfield[lev][0]->ixType().toIntVect(), ng); | ||
| Box const& tjy = mfi.tilebox(Bfield[lev][1]->ixType().toIntVect(), ng); | ||
| Box const& tjz = mfi.tilebox(Bfield[lev][2]->ixType().toIntVect(), ng); | ||
| amrex::ParallelFor(tjx, tjy, tjz, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Bx(i, j, k) = Bx_old(i, j, k) + b1*Kx(i, j, k, 0) + b3*Kx(i, j, k, 2) | ||
| + b4*Kx(i, j, k, 3) + b5*Kx(i, j, k, 4); | ||
| }, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| By(i, j, k) = By_old(i, j, k) + b1*Ky(i, j, k, 0) + b3*Ky(i, j, k, 2) | ||
| + b4*Ky(i, j, k, 3) + b5*Ky(i, j, k, 4); | ||
| }, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Bz(i, j, k) = Bz_old(i, j, k) + b1*Kz(i, j, k, 0) + b3*Kz(i, j, k, 2) | ||
| + b4*Kz(i, j, k, 3) + b5*Kz(i, j, k, 4); | ||
| } | ||
| ); | ||
| } | ||
|
|
||
| // ---- Assemble error = B5 - B4 into err_scratch (valid cells only) ---- | ||
| #ifdef AMREX_USE_OMP | ||
| #pragma omp parallel if (amrex::Gpu::notInLaunchRegion()) | ||
| #endif | ||
| for ( MFIter mfi(*Bfield[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) { | ||
| Array4<Real> const& Ex = err_scratch[0].array(mfi); | ||
| Array4<Real> const& Ey = err_scratch[1].array(mfi); | ||
| Array4<Real> const& Ez = err_scratch[2].array(mfi); | ||
| Array4<Real const> const& Kx = K[0].const_array(mfi); | ||
| Array4<Real const> const& Ky = K[1].const_array(mfi); | ||
| Array4<Real const> const& Kz = K[2].const_array(mfi); | ||
| Box const& tjx = mfi.tilebox(Bfield[lev][0]->ixType().toIntVect()); | ||
| Box const& tjy = mfi.tilebox(Bfield[lev][1]->ixType().toIntVect()); | ||
| Box const& tjz = mfi.tilebox(Bfield[lev][2]->ixType().toIntVect()); | ||
| amrex::ParallelFor(tjx, tjy, tjz, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Ex(i, j, k) = e1*Kx(i, j, k, 0) + e3*Kx(i, j, k, 2) + e4*Kx(i, j, k, 3) | ||
| + e5*Kx(i, j, k, 4) + e6*Kx(i, j, k, 1); | ||
| }, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Ey(i, j, k) = e1*Ky(i, j, k, 0) + e3*Ky(i, j, k, 2) + e4*Ky(i, j, k, 3) | ||
| + e5*Ky(i, j, k, 4) + e6*Ky(i, j, k, 1); | ||
| }, | ||
| [=] AMREX_GPU_DEVICE (int i, int j, int k){ | ||
| Ez(i, j, k) = e1*Kz(i, j, k, 0) + e3*Kz(i, j, k, 2) + e4*Kz(i, j, k, 3) | ||
| + e5*Kz(i, j, k, 4) + e6*Kz(i, j, k, 1); | ||
| } | ||
| ); | ||
| } | ||
|
|
||
| // ---- Error norm and adaptive step control ---- | ||
| // norm0() with local=false (default) performs the MPI AllReduce internally | ||
| amrex::Real err_norm = 0._rt; | ||
| amrex::Real B4_norm = 0._rt; | ||
| for (int ii = 0; ii < 3; ii++) { | ||
| err_norm = std::max(err_norm, err_scratch[ii].norm0()); | ||
| B4_norm = std::max(B4_norm, Bfield[lev][ii]->norm0()); | ||
| } | ||
| const amrex::Real err_scalar = err_norm / (m_substep_atol + m_substep_rtol * B4_norm); | ||
| const amrex::Real factor = m_substep_safety * std::pow(err_scalar + 1.e-10_rt, -0.2_rt); | ||
|
|
||
| if (err_scalar <= 1._rt) { | ||
| t += dt_sub; | ||
| ++n_accepted; | ||
| for (int ii = 0; ii < 3; ii++) { | ||
| MultiFab::Copy(B_old[ii], *Bfield[lev][ii], 0, 0, 1, ng); | ||
| } | ||
| dt_sub *= std::min(m_substep_max_growth, factor); | ||
| } else { | ||
| for (int ii = 0; ii < 3; ii++) { | ||
| MultiFab::Copy(*Bfield[lev][ii], B_old[ii], 0, 0, 1, ng); | ||
| } | ||
| dt_sub *= std::max(0.1_rt, factor); | ||
| } | ||
|
|
||
| WARPX_ALWAYS_ASSERT_WITH_MESSAGE( | ||
| ++n_attempts <= m_max_substep_attempts, | ||
| "BfieldEvolveRKF45: exceeded max substep attempts; " | ||
| "consider relaxing hybrid_pic_model.substep_rtol/substep_atol." | ||
| ); | ||
| } |
clarkse-he
approved these changes
May 7, 2026
Contributor
There was a problem hiding this comment.
One potential optimization could be to implement the storage of the final k to use as the first k in the next iteration to eliminate one of the evaluations.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
This PR reduces total B-field evaluation cost by taking large adaptive substeps when physics allows, rather than a fixed number of substeps, while guaranteeing a user-specified error tolerance.
Adds BfieldEvolveRKF45 implementing the classical Runge-Kutta-Fehlberg 4(5) method with adaptive step-size control (mixed atol/rtol error norm, safety factor, step growth limit). Enabled by (use_rkf45=True); original fixed-step RK4 remains the default.
New input parameters: use_rkf45, substep_rtol, substep_atol, substep_safety, substep_max_growth, max_substep_attempts — exposed in both C++ and PICMI.
substeps now sets the initial substep count guess for RKF45 (instead of fixed count); recommended value is 1–4.
Prints accepted/rejected substep counts per half-step for diagnostics.
Unrelated: Extends plasma_resistivity parser from eta(rho, J) to eta(rho, J, t), enabling time-dependent resistivity expressions.