Skip to content

Adaptive RKF45 subcycling for hybrid-PIC B-field advance#6844

Open
prkkumar-he wants to merge 3 commits intoBLAST-WarpX:developmentfrom
prkkumar-he:impl_rkf45_adaptive_substeps
Open

Adaptive RKF45 subcycling for hybrid-PIC B-field advance#6844
prkkumar-he wants to merge 3 commits intoBLAST-WarpX:developmentfrom
prkkumar-he:impl_rkf45_adaptive_substeps

Conversation

@prkkumar-he
Copy link
Copy Markdown

@prkkumar-he prkkumar-he commented May 7, 2026

This PR reduces total B-field evaluation cost by taking large adaptive substeps when physics allows, rather than a fixed number of substeps, while guaranteeing a user-specified error tolerance.

  • Adds BfieldEvolveRKF45 implementing the classical Runge-Kutta-Fehlberg 4(5) method with adaptive step-size control (mixed atol/rtol error norm, safety factor, step growth limit). Enabled by (use_rkf45=True); original fixed-step RK4 remains the default.

  • New input parameters: use_rkf45, substep_rtol, substep_atol, substep_safety, substep_max_growth, max_substep_attempts — exposed in both C++ and PICMI.

  • substeps now sets the initial substep count guess for RKF45 (instead of fixed count); recommended value is 1–4.

  • Prints accepted/rejected substep counts per half-step for diagnostics.

  • Unrelated: Extends plasma_resistivity parser from eta(rho, J) to eta(rho, J, t), enabling time-dependent resistivity expressions.

Comment on lines +745 to +1161
{
if (t + dt_sub > dt_half) { dt_sub = dt_half - t; }

// ---- Stage 1: B = B_old, FieldPush, K[comp0] = h*k1 ----
for (int ii = 0; ii < 3; ii++) {
MultiFab::Copy(*Bfield[lev][ii], B_old[ii], 0, 0, 1, ng);
}
FieldPush(Bfield, Efield, Jfield, rhofield, eb_update_E,
dt_sub, subcycling_half, ng, nodal_sync);
for (int ii = 0; ii < 3; ii++) {
MultiFab::LinComb(K[ii], 1._rt, *Bfield[lev][ii], 0, -1._rt, B_old[ii], 0, 0, 1, ng);
}

// ---- Stage 2: B = B_old + a21*K[0], FieldPush, K[comp1] = h*k2 ----
for (int ii = 0; ii < 3; ii++) {
MultiFab::LinComb(*Bfield[lev][ii], 1._rt, B_old[ii], 0, a21, K[ii], 0, 0, 1, ng);
}
FieldPush(Bfield, Efield, Jfield, rhofield, eb_update_E,
dt_sub, subcycling_half, ng, nodal_sync);
#ifdef AMREX_USE_OMP
#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
#endif
for ( MFIter mfi(*Bfield[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
Array4<Real> const& Kx = K[0].array(mfi);
Array4<Real> const& Ky = K[1].array(mfi);
Array4<Real> const& Kz = K[2].array(mfi);
Array4<Real const> const& Bx = Bfield[lev][0]->const_array(mfi);
Array4<Real const> const& By = Bfield[lev][1]->const_array(mfi);
Array4<Real const> const& Bz = Bfield[lev][2]->const_array(mfi);
Array4<Real const> const& Bx_old = B_old[0].const_array(mfi);
Array4<Real const> const& By_old = B_old[1].const_array(mfi);
Array4<Real const> const& Bz_old = B_old[2].const_array(mfi);
Box const& tjx = mfi.tilebox(Bfield[lev][0]->ixType().toIntVect(), ng);
Box const& tjy = mfi.tilebox(Bfield[lev][1]->ixType().toIntVect(), ng);
Box const& tjz = mfi.tilebox(Bfield[lev][2]->ixType().toIntVect(), ng);
amrex::ParallelFor(tjx, tjy, tjz,
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Kx(i, j, k, 1) = Bx(i, j, k) - Bx_old(i, j, k) - a21*Kx(i, j, k, 0);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Ky(i, j, k, 1) = By(i, j, k) - By_old(i, j, k) - a21*Ky(i, j, k, 0);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Kz(i, j, k, 1) = Bz(i, j, k) - Bz_old(i, j, k) - a21*Kz(i, j, k, 0);
}
);
}

// ---- Stage 3: B = B_old + a31*K[0] + a32*K[1], FieldPush, K[comp2] = h*k3 ----
#ifdef AMREX_USE_OMP
#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
#endif
for ( MFIter mfi(*Bfield[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
Array4<Real> const& Bx = Bfield[lev][0]->array(mfi);
Array4<Real> const& By = Bfield[lev][1]->array(mfi);
Array4<Real> const& Bz = Bfield[lev][2]->array(mfi);
Array4<Real const> const& Kx = K[0].const_array(mfi);
Array4<Real const> const& Ky = K[1].const_array(mfi);
Array4<Real const> const& Kz = K[2].const_array(mfi);
Array4<Real const> const& Bx_old = B_old[0].const_array(mfi);
Array4<Real const> const& By_old = B_old[1].const_array(mfi);
Array4<Real const> const& Bz_old = B_old[2].const_array(mfi);
Box const& tjx = mfi.tilebox(Bfield[lev][0]->ixType().toIntVect(), ng);
Box const& tjy = mfi.tilebox(Bfield[lev][1]->ixType().toIntVect(), ng);
Box const& tjz = mfi.tilebox(Bfield[lev][2]->ixType().toIntVect(), ng);
amrex::ParallelFor(tjx, tjy, tjz,
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Bx(i, j, k) = Bx_old(i, j, k)
+ a31*Kx(i, j, k, 0) + a32*Kx(i, j, k, 1);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k){
By(i, j, k) = By_old(i, j, k)
+ a31*Ky(i, j, k, 0) + a32*Ky(i, j, k, 1);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Bz(i, j, k) = Bz_old(i, j, k)
+ a31*Kz(i, j, k, 0) + a32*Kz(i, j, k, 1);
}
);
}
FieldPush(Bfield, Efield, Jfield, rhofield, eb_update_E,
dt_sub, subcycling_half, ng, nodal_sync);
#ifdef AMREX_USE_OMP
#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
#endif
for ( MFIter mfi(*Bfield[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
Array4<Real> const& Kx = K[0].array(mfi);
Array4<Real> const& Ky = K[1].array(mfi);
Array4<Real> const& Kz = K[2].array(mfi);
Array4<Real const> const& Bx = Bfield[lev][0]->const_array(mfi);
Array4<Real const> const& By = Bfield[lev][1]->const_array(mfi);
Array4<Real const> const& Bz = Bfield[lev][2]->const_array(mfi);
Array4<Real const> const& Bx_old = B_old[0].const_array(mfi);
Array4<Real const> const& By_old = B_old[1].const_array(mfi);
Array4<Real const> const& Bz_old = B_old[2].const_array(mfi);
Box const& tjx = mfi.tilebox(Bfield[lev][0]->ixType().toIntVect(), ng);
Box const& tjy = mfi.tilebox(Bfield[lev][1]->ixType().toIntVect(), ng);
Box const& tjz = mfi.tilebox(Bfield[lev][2]->ixType().toIntVect(), ng);
amrex::ParallelFor(tjx, tjy, tjz,
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Kx(i, j, k, 2) = Bx(i, j, k) - Bx_old(i, j, k)
- a31*Kx(i, j, k, 0) - a32*Kx(i, j, k, 1);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Ky(i, j, k, 2) = By(i, j, k) - By_old(i, j, k)
- a31*Ky(i, j, k, 0) - a32*Ky(i, j, k, 1);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Kz(i, j, k, 2) = Bz(i, j, k) - Bz_old(i, j, k)
- a31*Kz(i, j, k, 0) - a32*Kz(i, j, k, 1);
}
);
}

// ---- Stage 4: B = B_old + a41*K[0] + a42*K[1] + a43*K[2], FieldPush, K[comp3] = h*k4 ----
#ifdef AMREX_USE_OMP
#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
#endif
for ( MFIter mfi(*Bfield[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
Array4<Real> const& Bx = Bfield[lev][0]->array(mfi);
Array4<Real> const& By = Bfield[lev][1]->array(mfi);
Array4<Real> const& Bz = Bfield[lev][2]->array(mfi);
Array4<Real const> const& Kx = K[0].const_array(mfi);
Array4<Real const> const& Ky = K[1].const_array(mfi);
Array4<Real const> const& Kz = K[2].const_array(mfi);
Array4<Real const> const& Bx_old = B_old[0].const_array(mfi);
Array4<Real const> const& By_old = B_old[1].const_array(mfi);
Array4<Real const> const& Bz_old = B_old[2].const_array(mfi);
Box const& tjx = mfi.tilebox(Bfield[lev][0]->ixType().toIntVect(), ng);
Box const& tjy = mfi.tilebox(Bfield[lev][1]->ixType().toIntVect(), ng);
Box const& tjz = mfi.tilebox(Bfield[lev][2]->ixType().toIntVect(), ng);
amrex::ParallelFor(tjx, tjy, tjz,
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Bx(i, j, k) = Bx_old(i, j, k)
+ a41*Kx(i, j, k, 0) + a42*Kx(i, j, k, 1) + a43*Kx(i, j, k, 2);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k){
By(i, j, k) = By_old(i, j, k)
+ a41*Ky(i, j, k, 0) + a42*Ky(i, j, k, 1) + a43*Ky(i, j, k, 2);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Bz(i, j, k) = Bz_old(i, j, k)
+ a41*Kz(i, j, k, 0) + a42*Kz(i, j, k, 1) + a43*Kz(i, j, k, 2);
}
);
}
FieldPush(Bfield, Efield, Jfield, rhofield, eb_update_E,
dt_sub, subcycling_half, ng, nodal_sync);
#ifdef AMREX_USE_OMP
#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
#endif
for ( MFIter mfi(*Bfield[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
Array4<Real> const& Kx = K[0].array(mfi);
Array4<Real> const& Ky = K[1].array(mfi);
Array4<Real> const& Kz = K[2].array(mfi);
Array4<Real const> const& Bx = Bfield[lev][0]->const_array(mfi);
Array4<Real const> const& By = Bfield[lev][1]->const_array(mfi);
Array4<Real const> const& Bz = Bfield[lev][2]->const_array(mfi);
Array4<Real const> const& Bx_old = B_old[0].const_array(mfi);
Array4<Real const> const& By_old = B_old[1].const_array(mfi);
Array4<Real const> const& Bz_old = B_old[2].const_array(mfi);
Box const& tjx = mfi.tilebox(Bfield[lev][0]->ixType().toIntVect(), ng);
Box const& tjy = mfi.tilebox(Bfield[lev][1]->ixType().toIntVect(), ng);
Box const& tjz = mfi.tilebox(Bfield[lev][2]->ixType().toIntVect(), ng);
amrex::ParallelFor(tjx, tjy, tjz,
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Kx(i, j, k, 3) = Bx(i, j, k) - Bx_old(i, j, k)
- a41*Kx(i, j, k, 0) - a42*Kx(i, j, k, 1) - a43*Kx(i, j, k, 2);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Ky(i, j, k, 3) = By(i, j, k) - By_old(i, j, k)
- a41*Ky(i, j, k, 0) - a42*Ky(i, j, k, 1) - a43*Ky(i, j, k, 2);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Kz(i, j, k, 3) = Bz(i, j, k) - Bz_old(i, j, k)
- a41*Kz(i, j, k, 0) - a42*Kz(i, j, k, 1) - a43*Kz(i, j, k, 2);
}
);
}

// ---- Stage 5: B = B_old + a51*K[0]+a52*K[1]+a53*K[2]+a54*K[3], FieldPush, K[comp4] = h*k5 ----
#ifdef AMREX_USE_OMP
#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
#endif
for ( MFIter mfi(*Bfield[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
Array4<Real> const& Bx = Bfield[lev][0]->array(mfi);
Array4<Real> const& By = Bfield[lev][1]->array(mfi);
Array4<Real> const& Bz = Bfield[lev][2]->array(mfi);
Array4<Real const> const& Kx = K[0].const_array(mfi);
Array4<Real const> const& Ky = K[1].const_array(mfi);
Array4<Real const> const& Kz = K[2].const_array(mfi);
Array4<Real const> const& Bx_old = B_old[0].const_array(mfi);
Array4<Real const> const& By_old = B_old[1].const_array(mfi);
Array4<Real const> const& Bz_old = B_old[2].const_array(mfi);
Box const& tjx = mfi.tilebox(Bfield[lev][0]->ixType().toIntVect(), ng);
Box const& tjy = mfi.tilebox(Bfield[lev][1]->ixType().toIntVect(), ng);
Box const& tjz = mfi.tilebox(Bfield[lev][2]->ixType().toIntVect(), ng);
amrex::ParallelFor(tjx, tjy, tjz,
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Bx(i, j, k) = Bx_old(i, j, k) + a51*Kx(i, j, k, 0) + a52*Kx(i, j, k, 1)
+ a53*Kx(i, j, k, 2) + a54*Kx(i, j, k, 3);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k){
By(i, j, k) = By_old(i, j, k) + a51*Ky(i, j, k, 0) + a52*Ky(i, j, k, 1)
+ a53*Ky(i, j, k, 2) + a54*Ky(i, j, k, 3);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Bz(i, j, k) = Bz_old(i, j, k) + a51*Kz(i, j, k, 0) + a52*Kz(i, j, k, 1)
+ a53*Kz(i, j, k, 2) + a54*Kz(i, j, k, 3);
}
);
}
FieldPush(Bfield, Efield, Jfield, rhofield, eb_update_E,
dt_sub, subcycling_half, ng, nodal_sync);
#ifdef AMREX_USE_OMP
#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
#endif
for ( MFIter mfi(*Bfield[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
Array4<Real> const& Kx = K[0].array(mfi);
Array4<Real> const& Ky = K[1].array(mfi);
Array4<Real> const& Kz = K[2].array(mfi);
Array4<Real const> const& Bx = Bfield[lev][0]->const_array(mfi);
Array4<Real const> const& By = Bfield[lev][1]->const_array(mfi);
Array4<Real const> const& Bz = Bfield[lev][2]->const_array(mfi);
Array4<Real const> const& Bx_old = B_old[0].const_array(mfi);
Array4<Real const> const& By_old = B_old[1].const_array(mfi);
Array4<Real const> const& Bz_old = B_old[2].const_array(mfi);
Box const& tjx = mfi.tilebox(Bfield[lev][0]->ixType().toIntVect(), ng);
Box const& tjy = mfi.tilebox(Bfield[lev][1]->ixType().toIntVect(), ng);
Box const& tjz = mfi.tilebox(Bfield[lev][2]->ixType().toIntVect(), ng);
amrex::ParallelFor(tjx, tjy, tjz,
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Kx(i, j, k, 4) = Bx(i, j, k) - Bx_old(i, j, k)
- a51*Kx(i, j, k, 0) - a52*Kx(i, j, k, 1)
- a53*Kx(i, j, k, 2) - a54*Kx(i, j, k, 3);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Ky(i, j, k, 4) = By(i, j, k) - By_old(i, j, k)
- a51*Ky(i, j, k, 0) - a52*Ky(i, j, k, 1)
- a53*Ky(i, j, k, 2) - a54*Ky(i, j, k, 3);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Kz(i, j, k, 4) = Bz(i, j, k) - Bz_old(i, j, k)
- a51*Kz(i, j, k, 0) - a52*Kz(i, j, k, 1)
- a53*Kz(i, j, k, 2) - a54*Kz(i, j, k, 3);
}
);
}

// ---- Stage 6: B = B_old + a61*K[0]+a62*K[1]+a63*K[2]+a64*K[3]+a65*K[4] ----
// FieldPush, then overwrite K[comp1] with h*k6 (reads old K[comp1]=h*k2 in same kernel)
#ifdef AMREX_USE_OMP
#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
#endif
for ( MFIter mfi(*Bfield[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
Array4<Real> const& Bx = Bfield[lev][0]->array(mfi);
Array4<Real> const& By = Bfield[lev][1]->array(mfi);
Array4<Real> const& Bz = Bfield[lev][2]->array(mfi);
Array4<Real const> const& Kx = K[0].const_array(mfi);
Array4<Real const> const& Ky = K[1].const_array(mfi);
Array4<Real const> const& Kz = K[2].const_array(mfi);
Array4<Real const> const& Bx_old = B_old[0].const_array(mfi);
Array4<Real const> const& By_old = B_old[1].const_array(mfi);
Array4<Real const> const& Bz_old = B_old[2].const_array(mfi);
Box const& tjx = mfi.tilebox(Bfield[lev][0]->ixType().toIntVect(), ng);
Box const& tjy = mfi.tilebox(Bfield[lev][1]->ixType().toIntVect(), ng);
Box const& tjz = mfi.tilebox(Bfield[lev][2]->ixType().toIntVect(), ng);
amrex::ParallelFor(tjx, tjy, tjz,
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Bx(i, j, k) = Bx_old(i, j, k) + a61*Kx(i, j, k, 0) + a62*Kx(i, j, k, 1)
+ a63*Kx(i, j, k, 2) + a64*Kx(i, j, k, 3) + a65*Kx(i, j, k, 4);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k){
By(i, j, k) = By_old(i, j, k) + a61*Ky(i, j, k, 0) + a62*Ky(i, j, k, 1)
+ a63*Ky(i, j, k, 2) + a64*Ky(i, j, k, 3) + a65*Ky(i, j, k, 4);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Bz(i, j, k) = Bz_old(i, j, k) + a61*Kz(i, j, k, 0) + a62*Kz(i, j, k, 1)
+ a63*Kz(i, j, k, 2) + a64*Kz(i, j, k, 3) + a65*Kz(i, j, k, 4);
}
);
}
FieldPush(Bfield, Efield, Jfield, rhofield, eb_update_E,
dt_sub, subcycling_half, ng, nodal_sync);
// K[comp1] is overwritten here: reads h*k2 (old value) then writes h*k6 in each cell.
// This is safe since the read and write are on the same cell with no cross-cell dependency.
#ifdef AMREX_USE_OMP
#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
#endif
for ( MFIter mfi(*Bfield[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
Array4<Real> const& Kx = K[0].array(mfi);
Array4<Real> const& Ky = K[1].array(mfi);
Array4<Real> const& Kz = K[2].array(mfi);
Array4<Real const> const& Bx = Bfield[lev][0]->const_array(mfi);
Array4<Real const> const& By = Bfield[lev][1]->const_array(mfi);
Array4<Real const> const& Bz = Bfield[lev][2]->const_array(mfi);
Array4<Real const> const& Bx_old = B_old[0].const_array(mfi);
Array4<Real const> const& By_old = B_old[1].const_array(mfi);
Array4<Real const> const& Bz_old = B_old[2].const_array(mfi);
Box const& tjx = mfi.tilebox(Bfield[lev][0]->ixType().toIntVect(), ng);
Box const& tjy = mfi.tilebox(Bfield[lev][1]->ixType().toIntVect(), ng);
Box const& tjz = mfi.tilebox(Bfield[lev][2]->ixType().toIntVect(), ng);
amrex::ParallelFor(tjx, tjy, tjz,
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Kx(i, j, k, 1) = Bx(i, j, k) - Bx_old(i, j, k)
- a61*Kx(i, j, k, 0) - a62*Kx(i, j, k, 1)
- a63*Kx(i, j, k, 2) - a64*Kx(i, j, k, 3) - a65*Kx(i, j, k, 4);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Ky(i, j, k, 1) = By(i, j, k) - By_old(i, j, k)
- a61*Ky(i, j, k, 0) - a62*Ky(i, j, k, 1)
- a63*Ky(i, j, k, 2) - a64*Ky(i, j, k, 3) - a65*Ky(i, j, k, 4);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Kz(i, j, k, 1) = Bz(i, j, k) - Bz_old(i, j, k)
- a61*Kz(i, j, k, 0) - a62*Kz(i, j, k, 1)
- a63*Kz(i, j, k, 2) - a64*Kz(i, j, k, 3) - a65*Kz(i, j, k, 4);
}
);
}

// ---- Assemble 4th-order solution B4 into Bfield ----
// K[comp1] now holds h*k6; b2=0 so k2 (overwritten) is not needed for B4.
// Ghost cells are updated (matching existing BfieldEvolveRK convention).
#ifdef AMREX_USE_OMP
#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
#endif
for ( MFIter mfi(*Bfield[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
Array4<Real> const& Bx = Bfield[lev][0]->array(mfi);
Array4<Real> const& By = Bfield[lev][1]->array(mfi);
Array4<Real> const& Bz = Bfield[lev][2]->array(mfi);
Array4<Real const> const& Kx = K[0].const_array(mfi);
Array4<Real const> const& Ky = K[1].const_array(mfi);
Array4<Real const> const& Kz = K[2].const_array(mfi);
Array4<Real const> const& Bx_old = B_old[0].const_array(mfi);
Array4<Real const> const& By_old = B_old[1].const_array(mfi);
Array4<Real const> const& Bz_old = B_old[2].const_array(mfi);
Box const& tjx = mfi.tilebox(Bfield[lev][0]->ixType().toIntVect(), ng);
Box const& tjy = mfi.tilebox(Bfield[lev][1]->ixType().toIntVect(), ng);
Box const& tjz = mfi.tilebox(Bfield[lev][2]->ixType().toIntVect(), ng);
amrex::ParallelFor(tjx, tjy, tjz,
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Bx(i, j, k) = Bx_old(i, j, k) + b1*Kx(i, j, k, 0) + b3*Kx(i, j, k, 2)
+ b4*Kx(i, j, k, 3) + b5*Kx(i, j, k, 4);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k){
By(i, j, k) = By_old(i, j, k) + b1*Ky(i, j, k, 0) + b3*Ky(i, j, k, 2)
+ b4*Ky(i, j, k, 3) + b5*Ky(i, j, k, 4);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Bz(i, j, k) = Bz_old(i, j, k) + b1*Kz(i, j, k, 0) + b3*Kz(i, j, k, 2)
+ b4*Kz(i, j, k, 3) + b5*Kz(i, j, k, 4);
}
);
}

// ---- Assemble error = B5 - B4 into err_scratch (valid cells only) ----
#ifdef AMREX_USE_OMP
#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
#endif
for ( MFIter mfi(*Bfield[lev][0], TilingIfNotGPU()); mfi.isValid(); ++mfi ) {
Array4<Real> const& Ex = err_scratch[0].array(mfi);
Array4<Real> const& Ey = err_scratch[1].array(mfi);
Array4<Real> const& Ez = err_scratch[2].array(mfi);
Array4<Real const> const& Kx = K[0].const_array(mfi);
Array4<Real const> const& Ky = K[1].const_array(mfi);
Array4<Real const> const& Kz = K[2].const_array(mfi);
Box const& tjx = mfi.tilebox(Bfield[lev][0]->ixType().toIntVect());
Box const& tjy = mfi.tilebox(Bfield[lev][1]->ixType().toIntVect());
Box const& tjz = mfi.tilebox(Bfield[lev][2]->ixType().toIntVect());
amrex::ParallelFor(tjx, tjy, tjz,
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Ex(i, j, k) = e1*Kx(i, j, k, 0) + e3*Kx(i, j, k, 2) + e4*Kx(i, j, k, 3)
+ e5*Kx(i, j, k, 4) + e6*Kx(i, j, k, 1);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Ey(i, j, k) = e1*Ky(i, j, k, 0) + e3*Ky(i, j, k, 2) + e4*Ky(i, j, k, 3)
+ e5*Ky(i, j, k, 4) + e6*Ky(i, j, k, 1);
},
[=] AMREX_GPU_DEVICE (int i, int j, int k){
Ez(i, j, k) = e1*Kz(i, j, k, 0) + e3*Kz(i, j, k, 2) + e4*Kz(i, j, k, 3)
+ e5*Kz(i, j, k, 4) + e6*Kz(i, j, k, 1);
}
);
}

// ---- Error norm and adaptive step control ----
// norm0() with local=false (default) performs the MPI AllReduce internally
amrex::Real err_norm = 0._rt;
amrex::Real B4_norm = 0._rt;
for (int ii = 0; ii < 3; ii++) {
err_norm = std::max(err_norm, err_scratch[ii].norm0());
B4_norm = std::max(B4_norm, Bfield[lev][ii]->norm0());
}
const amrex::Real err_scalar = err_norm / (m_substep_atol + m_substep_rtol * B4_norm);
const amrex::Real factor = m_substep_safety * std::pow(err_scalar + 1.e-10_rt, -0.2_rt);

if (err_scalar <= 1._rt) {
t += dt_sub;
++n_accepted;
for (int ii = 0; ii < 3; ii++) {
MultiFab::Copy(B_old[ii], *Bfield[lev][ii], 0, 0, 1, ng);
}
dt_sub *= std::min(m_substep_max_growth, factor);
} else {
for (int ii = 0; ii < 3; ii++) {
MultiFab::Copy(*Bfield[lev][ii], B_old[ii], 0, 0, 1, ng);
}
dt_sub *= std::max(0.1_rt, factor);
}

WARPX_ALWAYS_ASSERT_WITH_MESSAGE(
++n_attempts <= m_max_substep_attempts,
"BfieldEvolveRKF45: exceeded max substep attempts; "
"consider relaxing hybrid_pic_model.substep_rtol/substep_atol."
);
}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One potential optimization could be to implement the storage of the final k to use as the first k in the next iteration to eliminate one of the evaluations.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants