From 78487a44f87526da3aefd3f5cdca8247506ed459 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Sun, 19 Oct 2025 08:22:12 -0400 Subject: [PATCH 01/50] start of support for reduced precision Jacobian --- integration/BackwardEuler/be_type.H | 2 +- integration/VODE/vode_type.H | 4 ++-- integration/integrator_data.H | 2 +- integration/utils/circle_theorem.H | 2 +- interfaces/ArrayUtilities.H | 4 ++-- interfaces/burn_type.H | 6 +++++- networks/rhs.H | 4 ++-- unit_test/test_linear_algebra/test_linear_algebra.H | 2 +- unit_test/test_rhs/rhs_zones.H | 2 +- 9 files changed, 16 insertions(+), 12 deletions(-) diff --git a/integration/BackwardEuler/be_type.H b/integration/BackwardEuler/be_type.H index 0efae612a7..04d7d8f190 100644 --- a/integration/BackwardEuler/be_type.H +++ b/integration/BackwardEuler/be_type.H @@ -48,7 +48,7 @@ struct be_t { amrex::Real rtol_enuc; amrex::Array1D y; - ArrayUtil::MathArray2D<1, int_neqs, 1, int_neqs> jac; + ArrayUtil::MathArray2D jac; short jacobian_type; }; diff --git a/integration/VODE/vode_type.H b/integration/VODE/vode_type.H index 3c246bfb50..a1df1f9bf2 100644 --- a/integration/VODE/vode_type.H +++ b/integration/VODE/vode_type.H @@ -143,11 +143,11 @@ struct dvode_t amrex::Array1D y; // Jacobian - ArrayUtil::MathArray2D<1, int_neqs, 1, int_neqs> jac; + ArrayUtil::MathArray2D jac; #ifdef ALLOW_JACOBIAN_CACHING // Saved Jacobian - ArrayUtil::MathArray2D<1, int_neqs, 1, int_neqs> jac_save; + ArrayUtil::MathArray2D jac_save; #endif // the Nordsieck history array diff --git a/integration/integrator_data.H b/integration/integrator_data.H index 2d4f7795cd..fb698318aa 100644 --- a/integration/integrator_data.H +++ b/integration/integrator_data.H @@ -52,6 +52,6 @@ constexpr int integrator_neqs () using IArray1D = amrex::Array1D; using RArray1D = amrex::Array1D; -using RArray2D = ArrayUtil::MathArray2D<1, INT_NEQS, 1, INT_NEQS>; +using RArray2D = ArrayUtil::MathArray2D; #endif diff --git a/integration/utils/circle_theorem.H b/integration/utils/circle_theorem.H index 957a2f3ada..85afc59d7e 100644 --- a/integration/utils/circle_theorem.H +++ b/integration/utils/circle_theorem.H @@ -17,7 +17,7 @@ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void circle_theorem_sprad(const amrex::Real time, BurnT& state, T& int_state, amrex::Real& sprad) { - ArrayUtil::MathArray2D<1, INT_NEQS, 1, INT_NEQS> jac_array; + ArrayUtil::MathArray2D jac_array; if (integrator_rp::jacobian == 1) { jac(time, state, int_state, jac_array); diff --git a/interfaces/ArrayUtilities.H b/interfaces/ArrayUtilities.H index 37f6b51963..6de1190db5 100644 --- a/interfaces/ArrayUtilities.H +++ b/interfaces/ArrayUtilities.H @@ -34,7 +34,7 @@ namespace ArrayUtil amrex::Real arr[(XHI-XLO+1)]; }; - template + template struct MathArray2D { AMREX_GPU_HOST_DEVICE AMREX_INLINE @@ -95,7 +95,7 @@ namespace ArrayUtil return arr[i+j*(XHI-XLO+1)-(YLO*(XHI-XLO+1)+XLO)]; } - amrex::Real arr[(XHI-XLO+1)*(YHI-YLO+1)]; + T arr[(XHI-XLO+1)*(YHI-YLO+1)]; }; namespace Math diff --git a/interfaces/burn_type.H b/interfaces/burn_type.H index 316053530d..e669738e40 100644 --- a/interfaces/burn_type.H +++ b/interfaces/burn_type.H @@ -58,7 +58,11 @@ const int SVAR_EVOLVE = SFX; // this is the data type of the dense Jacobian that the network wants. // it is not the same size as the Jacobian that VODE cares about when // we are doing simplified-SDC -using JacNetArray2D = ArrayUtil::MathArray2D<1, neqs, 1, neqs>; + +// the Jacobian can have a different size that our system +using jac_t = amrex::Real; + +using JacNetArray2D = ArrayUtil::MathArray2D; struct burn_t { diff --git a/networks/rhs.H b/networks/rhs.H index c096cc6d99..9d417cd2f3 100644 --- a/networks/rhs.H +++ b/networks/rhs.H @@ -1357,7 +1357,7 @@ void rhs (burn_t& burn_state, amrex::Array1D& ydot) // Analytical Jacobian AMREX_GPU_HOST_DEVICE AMREX_INLINE -void jac (burn_t& burn_state, ArrayUtil::MathArray2D<1, neqs, 1, neqs>& jac) +void jac (burn_t& burn_state, ArrayUtil::MathArray2D& jac) { rhs_state_t rhs_state; @@ -1521,7 +1521,7 @@ void actual_rhs (burn_t& state, amrex::Array1D& ydot) } AMREX_GPU_HOST_DEVICE AMREX_INLINE -void actual_jac (burn_t& state, ArrayUtil::MathArray2D<1, neqs, 1, neqs>& jac) +void actual_jac (burn_t& state, ArrayUtil::MathArray2D& jac) { RHS::jac(state, jac); } diff --git a/unit_test/test_linear_algebra/test_linear_algebra.H b/unit_test/test_linear_algebra/test_linear_algebra.H index 0762163da3..caf5677f84 100644 --- a/unit_test/test_linear_algebra/test_linear_algebra.H +++ b/unit_test/test_linear_algebra/test_linear_algebra.H @@ -175,7 +175,7 @@ void linear_algebra() { // now try to output a Jacobian mask based on the actual Jacobian - ArrayUtil::MathArray2D<1, INT_NEQS, 1, INT_NEQS> jac; + ArrayUtil::MathArray2D jac; burn_t burn_state; burn_state.rho = 1.e8; diff --git a/unit_test/test_rhs/rhs_zones.H b/unit_test/test_rhs/rhs_zones.H index 87a3ec559a..54a7bfaba0 100644 --- a/unit_test/test_rhs/rhs_zones.H +++ b/unit_test/test_rhs/rhs_zones.H @@ -41,7 +41,7 @@ bool do_rhs (int i, int j, int k, amrex::Array4 const& state, const amrex::Array1D ydot; - ArrayUtil::MathArray2D<1, neqs, 1, neqs> jac; + ArrayUtil::MathArray2D jac; #ifdef NEW_NETWORK_IMPLEMENTATION RHS::rhs(burn_state, ydot); From 13d0b7b874675c01c210a077d1d272aa3903f836 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Sun, 19 Oct 2025 10:49:01 -0400 Subject: [PATCH 02/50] fix some types --- interfaces/ArrayUtilities.H | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/interfaces/ArrayUtilities.H b/interfaces/ArrayUtilities.H index 6de1190db5..9b000eb77e 100644 --- a/interfaces/ArrayUtilities.H +++ b/interfaces/ArrayUtilities.H @@ -71,7 +71,7 @@ namespace ArrayUtil } [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - amrex::Real get (const int i, const int j) const noexcept { + T get (const int i, const int j) const noexcept { AMREX_ASSERT(i >= XLO && i <= XHI && j >= YLO && j <= YHI); return arr[i+j*(XHI-XLO+1)-(YLO*(XHI-XLO+1)+XLO)]; } @@ -84,13 +84,13 @@ namespace ArrayUtil } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - const amrex::Real& operator() (int i, int j) const noexcept { + const T& operator() (int i, int j) const noexcept { AMREX_ASSERT(i >= XLO && i <= XHI && j >= YLO && j <= YHI); return arr[i+j*(XHI-XLO+1)-(YLO*(XHI-XLO+1)+XLO)]; } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - amrex::Real& operator() (int i, int j) noexcept { + T& operator() (int i, int j) noexcept { AMREX_ASSERT(i >= XLO && i <= XHI && j >= YLO && j <= YHI); return arr[i+j*(XHI-XLO+1)-(YLO*(XHI-XLO+1)+XLO)]; } From 4c2e7d8cadb641eaafa659a45f2fdbc64093b8f2 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Sun, 19 Oct 2025 11:35:02 -0400 Subject: [PATCH 03/50] add control --- integration/Make.package | 3 +++ interfaces/burn_type.H | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/integration/Make.package b/integration/Make.package index 81f5c94887..9b8ab665f6 100644 --- a/integration/Make.package +++ b/integration/Make.package @@ -26,6 +26,9 @@ else CEXE_headers += integrator_setup_strang.H endif +ifeq ($(USE_SINGLE_PRECISION_JACOBIAN), TRUE) + DEFINES += -DSINGLE_PRECISION_JACOBIAN +endif ifeq ($(USE_ALL_NSE), TRUE) ifeq ($(USE_ALL_SDC), TRUE) diff --git a/interfaces/burn_type.H b/interfaces/burn_type.H index e669738e40..558603b8a7 100644 --- a/interfaces/burn_type.H +++ b/interfaces/burn_type.H @@ -60,7 +60,11 @@ const int SVAR_EVOLVE = SFX; // we are doing simplified-SDC // the Jacobian can have a different size that our system +#ifdef SINGLE_PRECISION_JACOBIAN +using jac_t = float; +#else using jac_t = amrex::Real; +#endif using JacNetArray2D = ArrayUtil::MathArray2D; From cb600d6b5130cce541bd881dfb1fef1a165fcea4 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Sun, 19 Oct 2025 12:00:19 -0400 Subject: [PATCH 04/50] update defines --- .github/workflows/good_defines.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/good_defines.txt b/.github/workflows/good_defines.txt index 9cbd6c8ede..a084e38fcc 100644 --- a/.github/workflows/good_defines.txt +++ b/.github/workflows/good_defines.txt @@ -24,6 +24,7 @@ SCREENING SCREEN_METHOD SDC SIMPLIFIED_SDC +SINGLE_PRECISION_JACOBIAN STRANG TRUE_SDC _OPENMP From f16ee32015348bc24ddcc99175701cc5e32aedf4 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Mon, 20 Oct 2025 11:48:40 -0400 Subject: [PATCH 05/50] add docs --- Docs/source/ode_integrators.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Docs/source/ode_integrators.rst b/Docs/source/ode_integrators.rst index 6ee5bc5804..4644a347a5 100644 --- a/Docs/source/ode_integrators.rst +++ b/Docs/source/ode_integrators.rst @@ -113,6 +113,23 @@ For the other networks (usually pynucastro networks), the implementation is provided in ``Microphysics/util/linpack.H`` and is templated on the number of equations. Pivoting can be disabled by setting ``integrator.linalg_do_pivoting=0``. +.. index:: USE_SINGLE_PRECISION_JACOBIAN + +.. tip:: + + The storage for the Jacobian can take up the most memory when + integrating the reaction system. It is possible to store the + Jacobian as single-precision, by building with: + + :: + + USE_SINGLE_PRECISION_JACOBIAN=TRUE + + This can speed up the integration prevent the code from running out + of memory when run on GPUs. + + + Integration errors ================== From ceceb518ec7d69312415c41b6e39eefd0cb1ee7c Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Thu, 23 Oct 2025 22:02:22 -0400 Subject: [PATCH 06/50] add AGENTS --- AGENTS.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 AGENTS.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000000..256143394a --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,19 @@ +# Repository Guidelines + +## Project Structure & Module Organization +The repository is organized around modular physics kernels in `EOS/`, `networks/`, `integration/`, and companion directories such as `conductivity/`, `neutrinos/`, `opacity/`, and `rates/`. Shared math utilities and build scripts remain in `util/`, while reusable constants sit in `constants/`. Interfaces consumed by AMReX-based application codes are in `interfaces/`. Unit regression assets reside under `unit_test/`, and Sphinx documentation sources are in `Docs/` with release history tracked in `CHANGES.md`. + +## Build, Test, and Development Commands +Export `AMREX_HOME` and `MICROPHYSICS_HOME` before building so the GNU make stubs locate AMReX sources. Build a standalone unit test in place, e.g. `cd unit_test/burn_cell && make -j4`, which yields `main3d.gnu.ex`. Select physics at compile time with flags like `make NETWORK_DIR=aprox19 EOS_DIR=helmholtz`, and reset a directory via `make clean`. + +## Coding Style & Naming Conventions +C++ sources follow `.editorconfig`: four-space indentation, LF line endings, and tabs only inside Makefiles. Headers keep the uppercase `.H` suffix and implementations use `.cpp`. Favor the existing snake_case routine pattern (`nse_check`, `burn_cell`) and guard autogenerated directories such as `util/autodiff`. Run `.clang-tidy` with `make USE_CLANG_TIDY=TRUE` on significant changes. + +## Testing Guidelines +Each directory beneath `unit_test/` ships a `GNUmakefile`, `inputs_*` controls, and README notes; run the built binary with the matching inputs file (for example `./main3d.gnu.ex inputs_aprox13`). For new coverage, clone an existing `test_*` layout, describe the scenario, and rerun the most relevant case before posting. Capture scratch artifacts via `.gitignore` rather than committing them. + +## Commit & Pull Request Guidelines +Develop on topic branches and open pull requests against `development`; merges to `main` occur monthly. Keep commit subjects concise and imperative, mirroring history such as `add NSE network compatibility docs (#1852)`. Update `CHANGES.md` for user-facing fixes, link issues, and note required AMReX revisions. PRs should summarize physics choices, list tests run, and attach plots or logs when behavior shifts. + +## Documentation & Automation +Sphinx sources in `Docs/` back the published guide; update them with code changes and ensure `make -C Docs html` finishes cleanly. GitHub Actions publishes the result and exercises unit tests, so keep workflows green by limiting optional dependencies and recording new Python requirements in `requirements.txt`. From 4aa880640108a78cee97f66c136b19001543095b Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Thu, 23 Oct 2025 22:12:48 -0400 Subject: [PATCH 07/50] one-zone ParallelFor --- unit_test/burn_cell_primordial_chem/burn_cell.H | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index f5ced4cb5e..d0aa6327ff 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -5,6 +5,8 @@ #include #include +#include +#include #include #include #include @@ -190,7 +192,18 @@ auto burn_cell_c() -> int { state.rho *= dd / dd1; // do the actual integration - burner(state, dt); + { + amrex::AsyncArray state_device(&state, 1); + burn_t* state_ptr = state_device.data(); + + amrex::ParallelFor(1, + [state_ptr, dt] AMREX_GPU_DEVICE(int idx) noexcept { + burner(state_ptr[idx], dt); + }); + + state_device.copyToHost(&state, 1); + amrex::Gpu::streamSynchronize(); + } // ensure positivity and normalize amrex::Real inmfracs[NumSpec] = {-1.0}; From 140b4a75942acb0f28f268f5402bfebc0b634435 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Thu, 23 Oct 2025 22:50:54 -0400 Subject: [PATCH 08/50] add multi-zone burn --- .../burn_cell_primordial_chem/burn_cell.H | 260 ++++++++++++++---- unit_test/burn_cell_primordial_chem/main.cpp | 7 +- 2 files changed, 212 insertions(+), 55 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index d0aa6327ff..966ddd15d7 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -1,12 +1,17 @@ #ifndef BURN_CELL_H #define BURN_CELL_H +#include +#include #include +#include #include +#include +#include -#include #include #include +#include #include #include #include @@ -14,10 +19,11 @@ amrex::Real grav_constant = 6.674e-8; -AMREX_INLINE -auto burn_cell_c() -> int { +namespace { - burn_t state; +AMREX_INLINE +void init_burn_state(burn_t &state, amrex::Real density_scale, + amrex::Real temp_offset, bool print_initial) { amrex::Real numdens[NumSpec] = {-1.0}; @@ -69,23 +75,28 @@ auto burn_cell_c() -> int { } } - // Echo initial conditions at burn and fill burn state input - - std::cout << "Maximum Time (s): " << unit_test_rp::tmax << std::endl; - std::cout << "State Temperature (K): " << unit_test_rp::temperature << std::endl; for (int n = 0; n < NumSpec; ++n) { - std::cout << "Number Density input (" << short_spec_names_cxx[n] - << "): " << numdens[n] << std::endl; + numdens[n] *= density_scale; + } + + if (print_initial) { + std::cout << "Maximum Time (s): " << unit_test_rp::tmax << std::endl; + std::cout << "State Temperature (K): " + << unit_test_rp::temperature + temp_offset << std::endl; + for (int n = 0; n < NumSpec; ++n) { + std::cout << "Number Density input (" << short_spec_names_cxx[n] + << "): " << numdens[n] << std::endl; + } } - state.T = unit_test_rp::temperature; + state = burn_t{}; + state.T = unit_test_rp::temperature + temp_offset; // find the density in g/cm^3 amrex::Real rhotot = 0.0_rt; for (int n = 0; n < NumSpec; ++n) { state.xn[n] = numdens[n]; - rhotot += state.xn[n] * spmasses[n]; // spmasses contains the masses of all - // species, defined in EOS + rhotot += state.xn[n] * spmasses[n]; } state.rho = rhotot; @@ -107,16 +118,64 @@ auto burn_cell_c() -> int { } #ifdef DEBUG - for (int n = 0; n < NumSpec; ++n) { - std::cout << "Mass fractions (" << short_spec_names_cxx[n] - << "): " << mfracs[n] << std::endl; - std::cout << "Number Density conserved (" << short_spec_names_cxx[n] - << "): " << state.xn[n] << std::endl; + if (print_initial) { + for (int n = 0; n < NumSpec; ++n) { + std::cout << "Mass fractions (" << short_spec_names_cxx[n] + << "): " << mfracs[n] << std::endl; + std::cout << "Number Density conserved (" << short_spec_names_cxx[n] + << "): " << state.xn[n] << std::endl; + } } #endif - // call the EOS to set initial internal energy e eos(eos_input_rt, state); +} + +AMREX_INLINE +void enforce_post_burn_constraints(burn_t &state) { + + amrex::Real inmfracs[NumSpec] = {-1.0}; + amrex::Real insum = 0.0_rt; + for (int nn = 0; nn < NumSpec; ++nn) { + state.xn[nn] = amrex::max(state.xn[nn], small_x); + inmfracs[nn] = spmasses[nn] * state.xn[nn] / state.rho; + insum += inmfracs[nn]; + } + + for (int nn = 0; nn < NumSpec; ++nn) { + inmfracs[nn] /= insum; + // update the number densities with conserved mass fractions + state.xn[nn] = inmfracs[nn] * state.rho / spmasses[nn]; + } + + // update the number density of electrons due to charge conservation + balance_charge(state); + + // reconserve mass fractions post charge conservation + insum = 0.0_rt; + for (int nn = 0; nn < NumSpec; ++nn) { + state.xn[nn] = amrex::max(state.xn[nn], small_x); + inmfracs[nn] = spmasses[nn] * state.xn[nn] / state.rho; + insum += inmfracs[nn]; + } + + for (int nn = 0; nn < NumSpec; ++nn) { + inmfracs[nn] /= insum; + // update the number densities with conserved mass fractions + state.xn[nn] = inmfracs[nn] * state.rho / spmasses[nn]; + } + + // get the updated T + eos(eos_input_re, state); +} + +} // namespace + +AMREX_INLINE +auto burn_cell_c() -> int { + + burn_t state{}; + init_burn_state(state, 1.0_rt, 0.0_rt, true); // name of output file std::ofstream state_over_time("state_over_time.txt"); @@ -147,7 +206,7 @@ auto burn_cell_c() -> int { // loop over steps, burn, and output the current state // the loop below is similar to that used in krome and pynucastro - amrex::Real dd = rhotot; + amrex::Real dd = state.rho; amrex::Real dd1 = 0.0_rt; for (int n = 0; n < unit_test_rp::nsteps; n++) { @@ -205,40 +264,7 @@ auto burn_cell_c() -> int { amrex::Gpu::streamSynchronize(); } - // ensure positivity and normalize - amrex::Real inmfracs[NumSpec] = {-1.0}; - amrex::Real insum = 0.0_rt; - for (int nn = 0; nn < NumSpec; ++nn) { - state.xn[nn] = amrex::max(state.xn[nn], small_x); - inmfracs[nn] = spmasses[nn] * state.xn[nn] / state.rho; - insum += inmfracs[nn]; - } - - for (int nn = 0; nn < NumSpec; ++nn) { - inmfracs[nn] /= insum; - // update the number densities with conserved mass fractions - state.xn[nn] = inmfracs[nn] * state.rho / spmasses[nn]; - } - - // update the number density of electrons due to charge conservation - balance_charge(state); - - // reconserve mass fractions post charge conservation - insum = 0; - for (int nn = 0; nn < NumSpec; ++nn) { - state.xn[nn] = amrex::max(state.xn[nn], small_x); - inmfracs[nn] = spmasses[nn] * state.xn[nn] / state.rho; - insum += inmfracs[nn]; - } - - for (int nn = 0; nn < NumSpec; ++nn) { - inmfracs[nn] /= insum; - // update the number densities with conserved mass fractions - state.xn[nn] = inmfracs[nn] * state.rho / spmasses[nn]; - } - - // get the updated T - eos(eos_input_re, state); + enforce_post_burn_constraints(state); t += dt; @@ -272,4 +298,130 @@ auto burn_cell_c() -> int { return state.success; } + +AMREX_INLINE +auto burn_cell_multi_c() -> int { + + constexpr int n_zones = 128; + + amrex::Gpu::HostVector states_h(n_zones); + amrex::Gpu::HostVector dd_h(n_zones); + amrex::Gpu::HostVector dt_h(n_zones); + amrex::Gpu::HostVector time_h(n_zones); + std::vector active_h(n_zones, 1); + + for (int iz = 0; iz < n_zones; ++iz) { + amrex::Real temp_offset = + 0.0001_rt * unit_test_rp::temperature * + (static_cast(iz) / static_cast(n_zones - 1) - + 0.5_rt); + init_burn_state(states_h[iz], 1.0_rt, temp_offset, false); + dd_h[iz] = states_h[iz].rho; + dt_h[iz] = 0.0_rt; + time_h[iz] = 0.0_rt; + } + + amrex::Gpu::DeviceVector states_d(n_zones); + amrex::Gpu::DeviceVector dt_d(n_zones); + + constexpr amrex::Real multi_zone_dt_scale = 0.25_rt; + + for (int step = 0; step < unit_test_rp::nsteps; ++step) { + bool any_active = false; + + for (int iz = 0; iz < n_zones; ++iz) { + if (active_h[iz] == 0) { + dt_h[iz] = 0.0_rt; + continue; + } + + amrex::Real dd1 = dd_h[iz]; + amrex::Real rhotmp = 0.0_rt; + for (int nn = 0; nn < NumSpec; ++nn) { + rhotmp += states_h[iz].xn[nn] * spmasses[nn]; + } + amrex::Real tff = + std::sqrt(M_PI * 3.0_rt / (32.0_rt * rhotmp * grav_constant)); + amrex::Real dt = multi_zone_dt_scale * unit_test_rp::tff_reduc * tff; + amrex::Real dd_new = dd1 + dt * (dd1 / tff); + + if (dt < 10.0_rt || dd_new > 2.0e-6_rt) { + active_h[iz] = 0; + dt_h[iz] = 0.0_rt; + continue; + } + + amrex::Real ratio = dd_new / dd1; + for (int nn = 0; nn < NumSpec; ++nn) { + states_h[iz].xn[nn] *= ratio; + } + states_h[iz].rho *= ratio; + + dd_h[iz] = dd_new; + dt_h[iz] = dt; + any_active = true; + } + + if (!any_active) { + break; + } + + amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, states_h.begin(), + states_h.end(), states_d.begin()); + amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, dt_h.begin(), + dt_h.end(), dt_d.begin()); + amrex::Gpu::streamSynchronize(); + + burn_t *states_ptr = states_d.data(); + const amrex::Real *dt_ptr = dt_d.data(); + + amrex::ParallelFor( + n_zones, [states_ptr, dt_ptr] AMREX_GPU_DEVICE(int idx) noexcept { + amrex::Real dt = dt_ptr[idx]; + if (dt <= 0.0_rt) { + return; + } + burner(states_ptr[idx], dt); + }); + amrex::Gpu::streamSynchronize(); + + amrex::Gpu::copyAsync(amrex::Gpu::deviceToHost, states_d.begin(), + states_d.end(), states_h.begin()); + amrex::Gpu::streamSynchronize(); + + for (int iz = 0; iz < n_zones; ++iz) { + if (dt_h[iz] <= 0.0_rt) { + continue; + } + enforce_post_burn_constraints(states_h[iz]); + time_h[iz] += dt_h[iz]; + } + } + + int failures = 0; + amrex::Real t_min = std::numeric_limits::max(); + amrex::Real t_max = -std::numeric_limits::max(); + bool any_success = false; + + for (int iz = 0; iz < n_zones; ++iz) { + if (!states_h[iz].success) { + ++failures; + continue; + } + any_success = true; + t_min = std::min(t_min, states_h[iz].T); + t_max = std::max(t_max, states_h[iz].T); + } + + std::cout << "multi-zone burn complete; zones failed = " << failures + << std::endl; + if (any_success) { + std::cout << "multi-zone temperature range: [" << t_min << ", " << t_max + << "]" << std::endl; + } else { + std::cout << "multi-zone burn had no successful zones" << std::endl; + } + + return (failures == 0); +} #endif diff --git a/unit_test/burn_cell_primordial_chem/main.cpp b/unit_test/burn_cell_primordial_chem/main.cpp index 5e458f8936..35b6b01946 100644 --- a/unit_test/burn_cell_primordial_chem/main.cpp +++ b/unit_test/burn_cell_primordial_chem/main.cpp @@ -39,7 +39,12 @@ int main(int argc, char *argv[]) { // C++ Network, RHS, screening, rates initialization network_init(); - success = burn_cell_c(); + int single_success = burn_cell_c(); + + std::cout << "starting the multi-zone burn..." << std::endl; + int multi_success = burn_cell_multi_c(); + + success = single_success && multi_success; } amrex::Finalize(); From 3bb58985959897799c76a8c284078b56251bd26e Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Thu, 23 Oct 2025 23:03:02 -0400 Subject: [PATCH 09/50] fix multi-zone burn --- .../burn_cell_primordial_chem/burn_cell.H | 51 +++++++++++++++++-- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index 966ddd15d7..5aeb66f958 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -308,6 +309,7 @@ auto burn_cell_multi_c() -> int { amrex::Gpu::HostVector dd_h(n_zones); amrex::Gpu::HostVector dt_h(n_zones); amrex::Gpu::HostVector time_h(n_zones); + std::vector states_before(n_zones); std::vector active_h(n_zones, 1); for (int iz = 0; iz < n_zones; ++iz) { @@ -324,8 +326,6 @@ auto burn_cell_multi_c() -> int { amrex::Gpu::DeviceVector states_d(n_zones); amrex::Gpu::DeviceVector dt_d(n_zones); - constexpr amrex::Real multi_zone_dt_scale = 0.25_rt; - for (int step = 0; step < unit_test_rp::nsteps; ++step) { bool any_active = false; @@ -342,7 +342,7 @@ auto burn_cell_multi_c() -> int { } amrex::Real tff = std::sqrt(M_PI * 3.0_rt / (32.0_rt * rhotmp * grav_constant)); - amrex::Real dt = multi_zone_dt_scale * unit_test_rp::tff_reduc * tff; + amrex::Real dt = unit_test_rp::tff_reduc * tff; amrex::Real dd_new = dd1 + dt * (dd1 / tff); if (dt < 10.0_rt || dd_new > 2.0e-6_rt) { @@ -357,6 +357,8 @@ auto burn_cell_multi_c() -> int { } states_h[iz].rho *= ratio; + states_before[iz] = states_h[iz]; + dd_h[iz] = dd_new; dt_h[iz] = dt; any_active = true; @@ -389,11 +391,50 @@ auto burn_cell_multi_c() -> int { states_d.end(), states_h.begin()); amrex::Gpu::streamSynchronize(); + const int max_split_depth = 6; + constexpr amrex::Real min_split_dt = 1.0_rt; + std::function integrate_with_retries; + integrate_with_retries = [&](burn_t& state, amrex::Real dt, + int depth) -> bool { + burn_t backup = state; + state.time = 0.0_rt; + burner(state, dt); + if (state.success && state.e > 0.0_rt) { + return true; + } + if (depth >= max_split_depth || 0.5_rt * dt <= min_split_dt) { + state = backup; + return false; + } + amrex::Real half_dt = 0.5_rt * dt; + state = backup; + if (!integrate_with_retries(state, half_dt, depth + 1)) { + return false; + } + return integrate_with_retries(state, half_dt, depth + 1); + }; + for (int iz = 0; iz < n_zones; ++iz) { if (dt_h[iz] <= 0.0_rt) { continue; } - enforce_post_burn_constraints(states_h[iz]); + + auto& state = states_h[iz]; + + if (!state.success || state.e <= 0.0_rt) { + burn_t retry_state = states_before[iz]; + if (integrate_with_retries(retry_state, dt_h[iz], 0)) { + state = retry_state; + } else { + state = states_before[iz]; + state.success = false; + active_h[iz] = 0; + dt_h[iz] = 0.0_rt; + continue; + } + } + + enforce_post_burn_constraints(state); time_h[iz] += dt_h[iz]; } } @@ -404,7 +445,7 @@ auto burn_cell_multi_c() -> int { bool any_success = false; for (int iz = 0; iz < n_zones; ++iz) { - if (!states_h[iz].success) { + if (!states_h[iz].success || states_h[iz].e <= 0.0_rt) { ++failures; continue; } From 002ff3760f84d3736dc21e6808a10a3a3e779165 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Thu, 23 Oct 2025 23:17:28 -0400 Subject: [PATCH 10/50] suppress verbosity --- integration/VODE/vode_dvode.H | 14 +++++++++++--- integration/integrator_setup_strang.H | 2 +- interfaces/burn_type.H | 3 +++ unit_test/burn_cell_primordial_chem/burn_cell.H | 11 +++++++++++ 4 files changed, 26 insertions(+), 4 deletions(-) diff --git a/integration/VODE/vode_dvode.H b/integration/VODE/vode_dvode.H index bbbf414398..aeb69725b1 100644 --- a/integration/VODE/vode_dvode.H +++ b/integration/VODE/vode_dvode.H @@ -170,14 +170,18 @@ int dvode (BurnT& state, DvodeT& vstate) if (vstate.n_step == 0) { #ifndef AMREX_USE_GPU + if (!state.suppress_failure_output) { std::cout << amrex::Font::Bold << amrex::FGColor::Red << "DVODE: too much accuracy requested at start of integration" << amrex::ResetDisplay << std::endl; + } #endif return IERR_TOO_MUCH_ACCURACY_REQUESTED; } // Too much accuracy requested for machine precision. #ifndef AMREX_USE_GPU - std::cout << "DVODE: too much accuracy requested" << std::endl; + if (!state.suppress_failure_output) { + std::cout << "DVODE: too much accuracy requested" << std::endl; + } #endif for (int i = 1; i <= int_neqs; ++i) { vstate.y(i) = vstate.yh(i,1); @@ -197,7 +201,9 @@ int dvode (BurnT& state, DvodeT& vstate) if (kflag == -1) { // Error test failed repeatedly or with ABS(H) = HMIN. #ifndef AMREX_USE_GPU - std::cout << amrex::Font::Bold << amrex::FGColor::Red << "DVODE: error test failed repeatedly or with abs(H) = HMIN" << amrex::ResetDisplay << std::endl; + if (!state.suppress_failure_output) { + std::cout << amrex::Font::Bold << amrex::FGColor::Red << "DVODE: error test failed repeatedly or with abs(H) = HMIN" << amrex::ResetDisplay << std::endl; + } #endif // Set Y array, T, and optional output. for (int i = 1; i <= int_neqs; ++i) { @@ -211,7 +217,9 @@ int dvode (BurnT& state, DvodeT& vstate) else if (kflag == -2) { // Convergence failed repeatedly or with ABS(H) = HMIN. #ifndef AMREX_USE_GPU - std::cout << amrex::Font::Bold << amrex::FGColor::Red << "DVODE: corrector convergence failed repeatedly or with abs(H) = HMIN" << amrex::ResetDisplay << std::endl; + if (!state.suppress_failure_output) { + std::cout << amrex::Font::Bold << amrex::FGColor::Red << "DVODE: corrector convergence failed repeatedly or with abs(H) = HMIN" << amrex::ResetDisplay << std::endl; + } #endif // Set Y array, T, and optional output. for (int i = 1; i <= int_neqs; ++i) { diff --git a/integration/integrator_setup_strang.H b/integration/integrator_setup_strang.H index b18b6d5136..d63f246c1c 100644 --- a/integration/integrator_setup_strang.H +++ b/integration/integrator_setup_strang.H @@ -181,7 +181,7 @@ void integrator_cleanup (IntegratorT& int_state, BurnT& state, // If we failed, print out the current state of the integration. - if (! state.success) { + if (! state.success && !state.suppress_failure_output) { #ifndef AMREX_USE_GPU std::cout << amrex::Font::Bold << amrex::FGColor::Red << "[ERROR] integration failed in net" << amrex::ResetDisplay << std::endl; std::cout << "istate = " << istate << std::endl; diff --git a/interfaces/burn_type.H b/interfaces/burn_type.H index 316053530d..28c80286a4 100644 --- a/interfaces/burn_type.H +++ b/interfaces/burn_type.H @@ -156,6 +156,9 @@ struct burn_t // integrator error code short error_code{}; + // suppress verbose failure diagnostics (useful for GPU retries) + bool suppress_failure_output{}; + }; diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index 5aeb66f958..a64bb6e694 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -253,6 +253,7 @@ auto burn_cell_c() -> int { // do the actual integration { + state.suppress_failure_output = true; amrex::AsyncArray state_device(&state, 1); burn_t* state_ptr = state_device.data(); @@ -263,6 +264,7 @@ auto burn_cell_c() -> int { state_device.copyToHost(&state, 1); amrex::Gpu::streamSynchronize(); + state.suppress_failure_output = false; } enforce_post_burn_constraints(state); @@ -311,6 +313,7 @@ auto burn_cell_multi_c() -> int { amrex::Gpu::HostVector time_h(n_zones); std::vector states_before(n_zones); std::vector active_h(n_zones, 1); + int retry_total = 0; for (int iz = 0; iz < n_zones; ++iz) { amrex::Real temp_offset = @@ -357,6 +360,8 @@ auto burn_cell_multi_c() -> int { } states_h[iz].rho *= ratio; + states_h[iz].suppress_failure_output = true; + states_before[iz] = states_h[iz]; dd_h[iz] = dd_new; @@ -398,8 +403,10 @@ auto burn_cell_multi_c() -> int { int depth) -> bool { burn_t backup = state; state.time = 0.0_rt; + state.suppress_failure_output = true; burner(state, dt); if (state.success && state.e > 0.0_rt) { + state.suppress_failure_output = false; return true; } if (depth >= max_split_depth || 0.5_rt * dt <= min_split_dt) { @@ -423,6 +430,8 @@ auto burn_cell_multi_c() -> int { if (!state.success || state.e <= 0.0_rt) { burn_t retry_state = states_before[iz]; + retry_state.suppress_failure_output = false; + ++retry_total; if (integrate_with_retries(retry_state, dt_h[iz], 0)) { state = retry_state; } else { @@ -434,6 +443,7 @@ auto burn_cell_multi_c() -> int { } } + state.suppress_failure_output = false; enforce_post_burn_constraints(state); time_h[iz] += dt_h[iz]; } @@ -459,6 +469,7 @@ auto burn_cell_multi_c() -> int { if (any_success) { std::cout << "multi-zone temperature range: [" << t_min << ", " << t_max << "]" << std::endl; + std::cout << "multi-zone retries applied: " << retry_total << std::endl; } else { std::cout << "multi-zone burn had no successful zones" << std::endl; } From 0459efda0cbb7480ba874d97bb385db2c6b713b0 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Thu, 23 Oct 2025 23:19:59 -0400 Subject: [PATCH 11/50] suppress more verbosity --- unit_test/burn_cell_primordial_chem/burn_cell.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index a64bb6e694..93040a0211 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -241,7 +241,7 @@ auto burn_cell_c() -> int { break; } - std::cout << "step " << n << " done" << std::endl; + // std::cout << "step " << n << " done" << std::endl; // scale the number densities for (int nn = 0; nn < NumSpec; ++nn) { From 1bd729e7871ff377f5c7ca83ea61dc958b21a8c9 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Thu, 23 Oct 2025 23:21:39 -0400 Subject: [PATCH 12/50] formatting --- unit_test/burn_cell_primordial_chem/main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/main.cpp b/unit_test/burn_cell_primordial_chem/main.cpp index 35b6b01946..d3f71f2cf3 100644 --- a/unit_test/burn_cell_primordial_chem/main.cpp +++ b/unit_test/burn_cell_primordial_chem/main.cpp @@ -26,7 +26,7 @@ int main(int argc, char *argv[]) { AMREX_ALWAYS_ASSERT_WITH_MESSAGE(run_prefix == input_run_prefix, "input file is missing or incorrect!"); - std::cout << "starting the single zone burn..." << std::endl; + std::cout << "\nstarting the single zone burn..." << std::endl; ParmParse ppa("amr"); @@ -41,7 +41,7 @@ int main(int argc, char *argv[]) { int single_success = burn_cell_c(); - std::cout << "starting the multi-zone burn..." << std::endl; + std::cout << "\nstarting the multi-zone burn..." << std::endl; int multi_success = burn_cell_multi_c(); success = single_success && multi_success; From 7cb6281dadd53d961c799e39fd33ddd8d150fe3c Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Thu, 23 Oct 2025 23:29:38 -0400 Subject: [PATCH 13/50] remove verbosity --- .../burn_cell_primordial_chem/burn_cell.H | 36 +++++++++++++------ 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index 93040a0211..cd6d8edca2 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -176,7 +177,7 @@ AMREX_INLINE auto burn_cell_c() -> int { burn_t state{}; - init_burn_state(state, 1.0_rt, 0.0_rt, true); + init_burn_state(state, 1.0_rt, 0.0_rt, false); // name of output file std::ofstream state_over_time("state_over_time.txt"); @@ -285,19 +286,34 @@ auto burn_cell_c() -> int { std::cout << "------------------------------------" << std::endl; std::cout << "successful? " << state.success << std::endl; - std::cout << "------------------------------------" << std::endl; - std::cout << "T initial = " << state_in.T << std::endl; - std::cout << "T final = " << state.T << std::endl; - std::cout << "Eint initial = " << state_in.e << std::endl; - std::cout << "Eint final = " << state.e << std::endl; - std::cout << "rho initial = " << state_in.rho << std::endl; - std::cout << "rho final = " << state.rho << std::endl; + constexpr int column_width = 44; + auto print_state_row = [&](const std::string &label, amrex::Real initial_value, + amrex::Real final_value) { + std::ostringstream left_stream; + left_stream << label << " " << std::scientific << std::setprecision(6) + << initial_value; + std::ostringstream right_stream; + right_stream << label << " " << std::scientific << std::setprecision(6) + << final_value; + std::cout << std::left << std::setw(column_width) << left_stream.str() + << std::left << std::setw(column_width) << right_stream.str() + << std::endl; + }; std::cout << "------------------------------------" << std::endl; - std::cout << "New number densities: " << std::endl; + std::cout << std::left << std::setw(column_width) << "Initial State" + << std::left << std::setw(column_width) << "Final State" + << std::endl; + std::cout << std::string(2 * column_width, '-') << std::endl; + print_state_row("Temperature (K):", state_in.T, state.T); + print_state_row("Density (g/cm^3):", state_in.rho, state.rho); + print_state_row("Eint (erg/g):", state_in.e, state.e); for (int n = 0; n < NumSpec; ++n) { - std::cout << state.xn[n] << std::endl; + const std::string label = "n(" + short_spec_names_cxx[n] + "):"; + print_state_row(label, state_in.xn[n], state.xn[n]); } + std::cout << std::string(2 * column_width, '-') << std::endl; + std::cout << std::right; return state.success; } From f73248ec216f6ef556b7cccdb08e7f86736dbfed Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Thu, 23 Oct 2025 23:38:50 -0400 Subject: [PATCH 14/50] adjust inputs --- unit_test/burn_cell_primordial_chem/burn_cell.H | 2 +- unit_test/burn_cell_primordial_chem/inputs_primordial_chem | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index cd6d8edca2..1086fbdd64 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -333,7 +333,7 @@ auto burn_cell_multi_c() -> int { for (int iz = 0; iz < n_zones; ++iz) { amrex::Real temp_offset = - 0.0001_rt * unit_test_rp::temperature * + 0.5_rt * unit_test_rp::temperature * (static_cast(iz) / static_cast(n_zones - 1) - 0.5_rt); init_burn_state(states_h[iz], 1.0_rt, temp_offset, false); diff --git a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem index 27305ba634..8db5c8d51d 100644 --- a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem +++ b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem @@ -43,7 +43,7 @@ integrator.X_reject_buffer = 1e100 # Set which jacobian to use # 1 = analytic jacobian # 2 = numerical jacobian -integrator.jacobian = 1 +integrator.jacobian = 2 # do you want to normalize abundances within VODE? (you don't!) integrator.renormalize_abundances = 0 # tolerances From 4e67db2b7d86abc7436ceeab541208c9d0137adf Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Thu, 23 Oct 2025 23:43:45 -0400 Subject: [PATCH 15/50] do burn retries on GPU --- .../burn_cell_primordial_chem/burn_cell.H | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index 1086fbdd64..82e657901a 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -414,13 +414,27 @@ auto burn_cell_multi_c() -> int { const int max_split_depth = 6; constexpr amrex::Real min_split_dt = 1.0_rt; + auto burn_on_gpu = [](burn_t &state, amrex::Real dt) { + state.time = 0.0_rt; + state.suppress_failure_output = true; + + amrex::AsyncArray state_device(&state, 1); + burn_t *state_ptr = state_device.data(); + + amrex::ParallelFor( + 1, [state_ptr, dt] AMREX_GPU_DEVICE(int idx) noexcept { + burner(state_ptr[idx], dt); + }); + + state_device.copyToHost(&state, 1); + amrex::Gpu::streamSynchronize(); + }; + std::function integrate_with_retries; integrate_with_retries = [&](burn_t& state, amrex::Real dt, int depth) -> bool { burn_t backup = state; - state.time = 0.0_rt; - state.suppress_failure_output = true; - burner(state, dt); + burn_on_gpu(state, dt); if (state.success && state.e > 0.0_rt) { state.suppress_failure_output = false; return true; From 141e251fb646e19c3cb50a579999c34685f2656c Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Thu, 23 Oct 2025 23:56:29 -0400 Subject: [PATCH 16/50] compare CPU/GPU runs --- .../burn_cell_primordial_chem/burn_cell.H | 400 ++++++++++++------ 1 file changed, 267 insertions(+), 133 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index 82e657901a..bd7e1a84ce 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -322,188 +322,322 @@ AMREX_INLINE auto burn_cell_multi_c() -> int { constexpr int n_zones = 128; + constexpr amrex::Real rel_tol = 1.0e-10_rt; + constexpr amrex::Real abs_tol = 1.0e-12_rt; + + struct MultiZoneSummary { + int failures = 0; + int retries = 0; + amrex::Real t_min = std::numeric_limits::max(); + amrex::Real t_max = -std::numeric_limits::max(); + bool any_success = false; + }; - amrex::Gpu::HostVector states_h(n_zones); - amrex::Gpu::HostVector dd_h(n_zones); - amrex::Gpu::HostVector dt_h(n_zones); - amrex::Gpu::HostVector time_h(n_zones); - std::vector states_before(n_zones); - std::vector active_h(n_zones, 1); - int retry_total = 0; - - for (int iz = 0; iz < n_zones; ++iz) { - amrex::Real temp_offset = - 0.5_rt * unit_test_rp::temperature * - (static_cast(iz) / static_cast(n_zones - 1) - - 0.5_rt); - init_burn_state(states_h[iz], 1.0_rt, temp_offset, false); - dd_h[iz] = states_h[iz].rho; - dt_h[iz] = 0.0_rt; - time_h[iz] = 0.0_rt; - } - - amrex::Gpu::DeviceVector states_d(n_zones); - amrex::Gpu::DeviceVector dt_d(n_zones); + auto run_multi_zone = [&](bool use_gpu, std::vector &final_states, + std::vector &final_times, + MultiZoneSummary &summary) -> bool { + amrex::Gpu::HostVector states_h(n_zones); + amrex::Gpu::HostVector dd_h(n_zones); + amrex::Gpu::HostVector dt_h(n_zones); + amrex::Gpu::HostVector time_h(n_zones); + std::vector states_before(n_zones); + std::vector active_h(n_zones, 1); - for (int step = 0; step < unit_test_rp::nsteps; ++step) { - bool any_active = false; + summary = MultiZoneSummary{}; + int retry_total = 0; for (int iz = 0; iz < n_zones; ++iz) { - if (active_h[iz] == 0) { - dt_h[iz] = 0.0_rt; - continue; - } - - amrex::Real dd1 = dd_h[iz]; - amrex::Real rhotmp = 0.0_rt; - for (int nn = 0; nn < NumSpec; ++nn) { - rhotmp += states_h[iz].xn[nn] * spmasses[nn]; - } - amrex::Real tff = - std::sqrt(M_PI * 3.0_rt / (32.0_rt * rhotmp * grav_constant)); - amrex::Real dt = unit_test_rp::tff_reduc * tff; - amrex::Real dd_new = dd1 + dt * (dd1 / tff); - - if (dt < 10.0_rt || dd_new > 2.0e-6_rt) { - active_h[iz] = 0; - dt_h[iz] = 0.0_rt; - continue; - } - - amrex::Real ratio = dd_new / dd1; - for (int nn = 0; nn < NumSpec; ++nn) { - states_h[iz].xn[nn] *= ratio; - } - states_h[iz].rho *= ratio; - - states_h[iz].suppress_failure_output = true; - - states_before[iz] = states_h[iz]; - - dd_h[iz] = dd_new; - dt_h[iz] = dt; - any_active = true; + amrex::Real temp_offset = + 0.5_rt * unit_test_rp::temperature * + (static_cast(iz) / + static_cast(n_zones - 1) - + 0.5_rt); + init_burn_state(states_h[iz], 1.0_rt, temp_offset, false); + dd_h[iz] = states_h[iz].rho; + dt_h[iz] = 0.0_rt; + time_h[iz] = 0.0_rt; + active_h[iz] = 1; } - if (!any_active) { - break; + amrex::Gpu::DeviceVector states_d; + amrex::Gpu::DeviceVector dt_d; + if (use_gpu) { + states_d.resize(n_zones); + dt_d.resize(n_zones); } - amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, states_h.begin(), - states_h.end(), states_d.begin()); - amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, dt_h.begin(), - dt_h.end(), dt_d.begin()); - amrex::Gpu::streamSynchronize(); - - burn_t *states_ptr = states_d.data(); - const amrex::Real *dt_ptr = dt_d.data(); - - amrex::ParallelFor( - n_zones, [states_ptr, dt_ptr] AMREX_GPU_DEVICE(int idx) noexcept { - amrex::Real dt = dt_ptr[idx]; - if (dt <= 0.0_rt) { - return; - } - burner(states_ptr[idx], dt); - }); - amrex::Gpu::streamSynchronize(); - - amrex::Gpu::copyAsync(amrex::Gpu::deviceToHost, states_d.begin(), - states_d.end(), states_h.begin()); - amrex::Gpu::streamSynchronize(); - const int max_split_depth = 6; constexpr amrex::Real min_split_dt = 1.0_rt; - auto burn_on_gpu = [](burn_t &state, amrex::Real dt) { + + auto burn_backend = [&](burn_t &state, amrex::Real dt) { state.time = 0.0_rt; state.suppress_failure_output = true; - amrex::AsyncArray state_device(&state, 1); - burn_t *state_ptr = state_device.data(); + if (use_gpu) { + amrex::AsyncArray state_device(&state, 1); + burn_t *state_ptr = state_device.data(); - amrex::ParallelFor( - 1, [state_ptr, dt] AMREX_GPU_DEVICE(int idx) noexcept { - burner(state_ptr[idx], dt); - }); + amrex::ParallelFor( + 1, [state_ptr, dt] AMREX_GPU_DEVICE(int idx) noexcept { + burner(state_ptr[idx], dt); + }); - state_device.copyToHost(&state, 1); - amrex::Gpu::streamSynchronize(); + state_device.copyToHost(&state, 1); + amrex::Gpu::streamSynchronize(); + } else { + burner(state, dt); + } }; - std::function integrate_with_retries; - integrate_with_retries = [&](burn_t& state, amrex::Real dt, + std::function integrate_with_retries; + integrate_with_retries = [&](burn_t &state, amrex::Real dt, int depth) -> bool { burn_t backup = state; - burn_on_gpu(state, dt); + burn_backend(state, dt); if (state.success && state.e > 0.0_rt) { state.suppress_failure_output = false; return true; } if (depth >= max_split_depth || 0.5_rt * dt <= min_split_dt) { state = backup; + state.suppress_failure_output = false; return false; } amrex::Real half_dt = 0.5_rt * dt; state = backup; if (!integrate_with_retries(state, half_dt, depth + 1)) { + state.suppress_failure_output = false; return false; } - return integrate_with_retries(state, half_dt, depth + 1); + bool status = integrate_with_retries(state, half_dt, depth + 1); + state.suppress_failure_output = false; + return status; }; - for (int iz = 0; iz < n_zones; ++iz) { - if (dt_h[iz] <= 0.0_rt) { - continue; - } + for (int step = 0; step < unit_test_rp::nsteps; ++step) { + bool any_active = false; - auto& state = states_h[iz]; - - if (!state.success || state.e <= 0.0_rt) { - burn_t retry_state = states_before[iz]; - retry_state.suppress_failure_output = false; - ++retry_total; - if (integrate_with_retries(retry_state, dt_h[iz], 0)) { - state = retry_state; - } else { - state = states_before[iz]; - state.success = false; + for (int iz = 0; iz < n_zones; ++iz) { + if (active_h[iz] == 0) { + dt_h[iz] = 0.0_rt; + continue; + } + + amrex::Real dd1 = dd_h[iz]; + amrex::Real rhotmp = 0.0_rt; + for (int nn = 0; nn < NumSpec; ++nn) { + rhotmp += states_h[iz].xn[nn] * spmasses[nn]; + } + amrex::Real tff = + std::sqrt(M_PI * 3.0_rt / (32.0_rt * rhotmp * grav_constant)); + amrex::Real dt = unit_test_rp::tff_reduc * tff; + amrex::Real dd_new = dd1 + dt * (dd1 / tff); + + if (dt < 10.0_rt || dd_new > 2.0e-6_rt) { active_h[iz] = 0; dt_h[iz] = 0.0_rt; continue; } + + amrex::Real ratio = dd_new / dd1; + for (int nn = 0; nn < NumSpec; ++nn) { + states_h[iz].xn[nn] *= ratio; + } + states_h[iz].rho *= ratio; + + states_h[iz].suppress_failure_output = true; + states_before[iz] = states_h[iz]; + + dd_h[iz] = dd_new; + dt_h[iz] = dt; + any_active = true; } - state.suppress_failure_output = false; - enforce_post_burn_constraints(state); - time_h[iz] += dt_h[iz]; + if (!any_active) { + break; + } + + if (use_gpu) { + amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, states_h.begin(), + states_h.end(), states_d.begin()); + amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, dt_h.begin(), + dt_h.end(), dt_d.begin()); + amrex::Gpu::streamSynchronize(); + + burn_t *states_ptr = states_d.data(); + const amrex::Real *dt_ptr = dt_d.data(); + + amrex::ParallelFor( + n_zones, [states_ptr, dt_ptr] AMREX_GPU_DEVICE(int idx) noexcept { + amrex::Real dt = dt_ptr[idx]; + if (dt <= 0.0_rt) { + return; + } + burner(states_ptr[idx], dt); + }); + amrex::Gpu::streamSynchronize(); + + amrex::Gpu::copyAsync(amrex::Gpu::deviceToHost, states_d.begin(), + states_d.end(), states_h.begin()); + amrex::Gpu::streamSynchronize(); + } else { + for (int iz = 0; iz < n_zones; ++iz) { + amrex::Real dt = dt_h[iz]; + if (dt <= 0.0_rt) { + continue; + } + burn_backend(states_h[iz], dt); + } + } + + for (int iz = 0; iz < n_zones; ++iz) { + if (dt_h[iz] <= 0.0_rt) { + continue; + } + + auto &state = states_h[iz]; + + if (!state.success || state.e <= 0.0_rt) { + burn_t retry_state = states_before[iz]; + retry_state.suppress_failure_output = false; + ++retry_total; + if (integrate_with_retries(retry_state, dt_h[iz], 0)) { + state = retry_state; + } else { + state = states_before[iz]; + state.success = false; + active_h[iz] = 0; + dt_h[iz] = 0.0_rt; + continue; + } + } + + state.suppress_failure_output = false; + enforce_post_burn_constraints(state); + time_h[iz] += dt_h[iz]; + } } - } - int failures = 0; - amrex::Real t_min = std::numeric_limits::max(); - amrex::Real t_max = -std::numeric_limits::max(); - bool any_success = false; + summary.failures = 0; + summary.t_min = std::numeric_limits::max(); + summary.t_max = -std::numeric_limits::max(); + summary.any_success = false; + summary.retries = retry_total; + + for (int iz = 0; iz < n_zones; ++iz) { + if (!states_h[iz].success || states_h[iz].e <= 0.0_rt) { + ++summary.failures; + continue; + } + summary.any_success = true; + summary.t_min = std::min(summary.t_min, states_h[iz].T); + summary.t_max = std::max(summary.t_max, states_h[iz].T); + } + + final_states.assign(states_h.begin(), states_h.end()); + final_times.assign(time_h.begin(), time_h.end()); + + return (summary.failures == 0); + }; + + std::vector cpu_states; + std::vector cpu_times; + MultiZoneSummary cpu_summary; + bool cpu_success = run_multi_zone(false, cpu_states, cpu_times, cpu_summary); + + std::vector gpu_states; + std::vector gpu_times; + MultiZoneSummary gpu_summary; + bool gpu_success = run_multi_zone(true, gpu_states, gpu_times, gpu_summary); + + auto print_summary = [&](const std::string &label, + const MultiZoneSummary &summary) { + std::cout << label << " multi-zone burn complete; zones failed = " + << summary.failures << std::endl; + if (summary.any_success) { + std::cout << label << " multi-zone temperature range: [" + << summary.t_min << ", " << summary.t_max << "]" + << std::endl; + std::cout << label << " multi-zone retries applied: " + << summary.retries << std::endl; + } else { + std::cout << label << " multi-zone burn had no successful zones" + << std::endl; + } + }; + + print_summary("CPU", cpu_summary); + print_summary("GPU", gpu_summary); + + auto within_tol = [&](amrex::Real a, amrex::Real b) -> bool { + amrex::Real diff = std::abs(a - b); + amrex::Real scale = std::max(std::abs(a), std::abs(b)); + return diff <= (abs_tol + rel_tol * scale); + }; + + auto report_mismatch = [&](int zone, const std::string &field, + amrex::Real cpu_value, amrex::Real gpu_value) { + std::cout << "zone " << zone << " " << field + << " mismatch: CPU = " << cpu_value + << ", GPU = " << gpu_value << std::endl; + }; - for (int iz = 0; iz < n_zones; ++iz) { - if (!states_h[iz].success || states_h[iz].e <= 0.0_rt) { - ++failures; + bool states_match = true; + for (int iz = 0; iz < n_zones && states_match; ++iz) { + bool cpu_zone_success = + cpu_states[iz].success && cpu_states[iz].e > 0.0_rt; + bool gpu_zone_success = + gpu_states[iz].success && gpu_states[iz].e > 0.0_rt; + + if (cpu_zone_success != gpu_zone_success) { + std::cout << "zone " << iz + << " mismatch in success flag between CPU and GPU runs" + << std::endl; + states_match = false; + break; + } + if (!cpu_zone_success) { continue; } - any_success = true; - t_min = std::min(t_min, states_h[iz].T); - t_max = std::max(t_max, states_h[iz].T); + + if (!within_tol(cpu_times[iz], gpu_times[iz])) { + report_mismatch(iz, "time", cpu_times[iz], gpu_times[iz]); + states_match = false; + break; + } + + if (!within_tol(cpu_states[iz].rho, gpu_states[iz].rho)) { + report_mismatch(iz, "rho", cpu_states[iz].rho, gpu_states[iz].rho); + states_match = false; + break; + } + if (!within_tol(cpu_states[iz].T, gpu_states[iz].T)) { + report_mismatch(iz, "T", cpu_states[iz].T, gpu_states[iz].T); + states_match = false; + break; + } + if (!within_tol(cpu_states[iz].e, gpu_states[iz].e)) { + report_mismatch(iz, "e", cpu_states[iz].e, gpu_states[iz].e); + states_match = false; + break; + } + for (int n = 0; n < NumSpec; ++n) { + if (!within_tol(cpu_states[iz].xn[n], gpu_states[iz].xn[n])) { + std::string label = + "xn(" + std::string(short_spec_names_cxx[n]) + ")"; + report_mismatch(iz, label, cpu_states[iz].xn[n], + gpu_states[iz].xn[n]); + states_match = false; + break; + } + } } - std::cout << "multi-zone burn complete; zones failed = " << failures - << std::endl; - if (any_success) { - std::cout << "multi-zone temperature range: [" << t_min << ", " << t_max - << "]" << std::endl; - std::cout << "multi-zone retries applied: " << retry_total << std::endl; - } else { - std::cout << "multi-zone burn had no successful zones" << std::endl; + if (states_match) { + std::cout << "CPU/GPU multi-zone states agree within tolerances" + << std::endl; } - return (failures == 0); + return (cpu_success && gpu_success && states_match); } #endif From e2d0ad0af383d1046a3458d6f70263671a566b81 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Fri, 24 Oct 2025 12:04:57 -0400 Subject: [PATCH 17/50] summary statistics --- .../burn_cell_primordial_chem/burn_cell.H | 75 ++++++++++++------- 1 file changed, 47 insertions(+), 28 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index bd7e1a84ce..c0d9b1cce9 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -575,69 +576,87 @@ auto burn_cell_multi_c() -> int { return diff <= (abs_tol + rel_tol * scale); }; - auto report_mismatch = [&](int zone, const std::string &field, + struct MismatchStats { + int count = 0; + amrex::Real max_abs_diff = 0.0_rt; + int max_zone = -1; + amrex::Real cpu_value_at_max = 0.0_rt; + amrex::Real gpu_value_at_max = 0.0_rt; + }; + + std::map mismatch_summary; + + auto record_mismatch = [&](int zone, const std::string &field, amrex::Real cpu_value, amrex::Real gpu_value) { - std::cout << "zone " << zone << " " << field - << " mismatch: CPU = " << cpu_value - << ", GPU = " << gpu_value << std::endl; + auto &stats = mismatch_summary[field]; + stats.count++; + amrex::Real diff = std::abs(cpu_value - gpu_value); + if (stats.max_zone < 0 || diff > stats.max_abs_diff) { + stats.max_abs_diff = diff; + stats.max_zone = zone; + stats.cpu_value_at_max = cpu_value; + stats.gpu_value_at_max = gpu_value; + } }; - bool states_match = true; - for (int iz = 0; iz < n_zones && states_match; ++iz) { + for (int iz = 0; iz < n_zones; ++iz) { bool cpu_zone_success = cpu_states[iz].success && cpu_states[iz].e > 0.0_rt; bool gpu_zone_success = gpu_states[iz].success && gpu_states[iz].e > 0.0_rt; if (cpu_zone_success != gpu_zone_success) { - std::cout << "zone " << iz - << " mismatch in success flag between CPU and GPU runs" - << std::endl; - states_match = false; - break; + record_mismatch(iz, "success_flag", + cpu_zone_success ? 1.0_rt : 0.0_rt, + gpu_zone_success ? 1.0_rt : 0.0_rt); + continue; } if (!cpu_zone_success) { continue; } if (!within_tol(cpu_times[iz], gpu_times[iz])) { - report_mismatch(iz, "time", cpu_times[iz], gpu_times[iz]); - states_match = false; - break; + record_mismatch(iz, "time", cpu_times[iz], gpu_times[iz]); } if (!within_tol(cpu_states[iz].rho, gpu_states[iz].rho)) { - report_mismatch(iz, "rho", cpu_states[iz].rho, gpu_states[iz].rho); - states_match = false; - break; + record_mismatch(iz, "rho", cpu_states[iz].rho, gpu_states[iz].rho); } if (!within_tol(cpu_states[iz].T, gpu_states[iz].T)) { - report_mismatch(iz, "T", cpu_states[iz].T, gpu_states[iz].T); - states_match = false; - break; + record_mismatch(iz, "T", cpu_states[iz].T, gpu_states[iz].T); } if (!within_tol(cpu_states[iz].e, gpu_states[iz].e)) { - report_mismatch(iz, "e", cpu_states[iz].e, gpu_states[iz].e); - states_match = false; - break; + record_mismatch(iz, "e", cpu_states[iz].e, gpu_states[iz].e); } for (int n = 0; n < NumSpec; ++n) { if (!within_tol(cpu_states[iz].xn[n], gpu_states[iz].xn[n])) { std::string label = "xn(" + std::string(short_spec_names_cxx[n]) + ")"; - report_mismatch(iz, label, cpu_states[iz].xn[n], + record_mismatch(iz, label, cpu_states[iz].xn[n], gpu_states[iz].xn[n]); - states_match = false; - break; } } } - if (states_match) { + if (mismatch_summary.empty()) { std::cout << "CPU/GPU multi-zone states agree within tolerances" << std::endl; + } else { + std::cout << "CPU/GPU mismatches detected:" << std::endl; + for (const auto &entry : mismatch_summary) { + const auto &field = entry.first; + const auto &stats = entry.second; + std::cout << " field " << field << " mismatched in " << stats.count + << " zone(s)"; + if (stats.max_zone >= 0) { + std::cout << "; largest difference at zone " << stats.max_zone + << " (CPU = " << stats.cpu_value_at_max + << ", GPU = " << stats.gpu_value_at_max << ")"; + } + std::cout << std::endl; + } } - return (cpu_success && gpu_success && states_match); + return (cpu_success && gpu_success && mismatch_summary.empty()); } #endif From 77b6d79c8eaeafea71bf657689f8868f5b59457b Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Fri, 24 Oct 2025 13:54:50 -0400 Subject: [PATCH 18/50] compute diff norms --- .../burn_cell_primordial_chem/burn_cell.H | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index c0d9b1cce9..3c5542987d 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -586,6 +586,20 @@ auto burn_cell_multi_c() -> int { std::map mismatch_summary; + struct NormAccumulator { + amrex::Real l1 = 0.0_rt; + amrex::Real l2_sq = 0.0_rt; + amrex::Real linf = 0.0_rt; + void record(amrex::Real diff) { + amrex::Real abs_diff = std::abs(diff); + l1 += abs_diff; + l2_sq += diff * diff; + linf = std::max(linf, abs_diff); + } + }; + + std::map field_norms; + auto record_mismatch = [&](int zone, const std::string &field, amrex::Real cpu_value, amrex::Real gpu_value) { auto &stats = mismatch_summary[field]; @@ -599,6 +613,14 @@ auto burn_cell_multi_c() -> int { } }; + auto record_difference = [&](const std::string &field, amrex::Real cpu_value, + amrex::Real gpu_value, bool track_norm) { + if (!track_norm) { + return; + } + field_norms[field].record(cpu_value - gpu_value); + }; + for (int iz = 0; iz < n_zones; ++iz) { bool cpu_zone_success = cpu_states[iz].success && cpu_states[iz].e > 0.0_rt; @@ -609,6 +631,9 @@ auto burn_cell_multi_c() -> int { record_mismatch(iz, "success_flag", cpu_zone_success ? 1.0_rt : 0.0_rt, gpu_zone_success ? 1.0_rt : 0.0_rt); + record_difference("success_flag", + cpu_zone_success ? 1.0_rt : 0.0_rt, + gpu_zone_success ? 1.0_rt : 0.0_rt, false); continue; } if (!cpu_zone_success) { @@ -618,16 +643,20 @@ auto burn_cell_multi_c() -> int { if (!within_tol(cpu_times[iz], gpu_times[iz])) { record_mismatch(iz, "time", cpu_times[iz], gpu_times[iz]); } + record_difference("time", cpu_times[iz], gpu_times[iz], false); if (!within_tol(cpu_states[iz].rho, gpu_states[iz].rho)) { record_mismatch(iz, "rho", cpu_states[iz].rho, gpu_states[iz].rho); } + record_difference("rho", cpu_states[iz].rho, gpu_states[iz].rho, false); if (!within_tol(cpu_states[iz].T, gpu_states[iz].T)) { record_mismatch(iz, "T", cpu_states[iz].T, gpu_states[iz].T); } + record_difference("temperature", cpu_states[iz].T, gpu_states[iz].T, true); if (!within_tol(cpu_states[iz].e, gpu_states[iz].e)) { record_mismatch(iz, "e", cpu_states[iz].e, gpu_states[iz].e); } + record_difference("e", cpu_states[iz].e, gpu_states[iz].e, false); for (int n = 0; n < NumSpec; ++n) { if (!within_tol(cpu_states[iz].xn[n], gpu_states[iz].xn[n])) { std::string label = @@ -635,7 +664,36 @@ auto burn_cell_multi_c() -> int { record_mismatch(iz, label, cpu_states[iz].xn[n], gpu_states[iz].xn[n]); } + std::string norm_label = + "number_density(" + std::string(short_spec_names_cxx[n]) + ")"; + record_difference(norm_label, cpu_states[iz].xn[n], + gpu_states[iz].xn[n], true); + } + } + + if (!field_norms.empty()) { + std::cout << "CPU/GPU multi-zone difference norms:" << std::endl; + auto print_norm = [&](const std::string &label, + const NormAccumulator &acc) { + std::cout << " " << label << ": L1 = " << acc.l1 + << ", L2 = " << std::sqrt(acc.l2_sq) + << ", L_inf = " << acc.linf << std::endl; + }; + auto temp_it = field_norms.find("temperature"); + if (temp_it != field_norms.end()) { + print_norm("temperature", temp_it->second); + } + for (int n = 0; n < NumSpec; ++n) { + std::string label = + "number_density(" + std::string(short_spec_names_cxx[n]) + ")"; + auto it = field_norms.find(label); + if (it != field_norms.end()) { + print_norm(label, it->second); + } } + } else { + std::cout << "No comparable CPU/GPU multi-zone states to compute norms" + << std::endl; } if (mismatch_summary.empty()) { From 02626693f7cc50fd6adb3a951ce417ac864d76b1 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Fri, 24 Oct 2025 14:03:20 -0400 Subject: [PATCH 19/50] remove single-zone burn --- .../burn_cell_primordial_chem/burn_cell.H | 144 ------------------ unit_test/burn_cell_primordial_chem/main.cpp | 6 +- 2 files changed, 1 insertion(+), 149 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index 3c5542987d..a76ee3b9ee 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -174,150 +174,6 @@ void enforce_post_burn_constraints(burn_t &state) { } // namespace -AMREX_INLINE -auto burn_cell_c() -> int { - - burn_t state{}; - init_burn_state(state, 1.0_rt, 0.0_rt, false); - - // name of output file - std::ofstream state_over_time("state_over_time.txt"); - - // save the initial state -- we'll use this to determine - // how much things changed over the entire burn - burn_t state_in = state; - - // output the data in columns, one line per timestep - state_over_time << std::setw(10) << "# Time"; - state_over_time << std::setw(15) << "Density"; - state_over_time << std::setw(15) << "Temperature"; - for (int x = 0; x < NumSpec; ++x) { - const std::string &element = short_spec_names_cxx[x]; - state_over_time << std::setw(15) << element; - } - state_over_time << std::endl; - - amrex::Real t = 0.0; - - state_over_time << std::setw(10) << t; - state_over_time << std::setw(15) << state.rho; - state_over_time << std::setw(15) << state.T; - for (int x = 0; x < NumSpec; ++x) { - state_over_time << std::setw(15) << state.xn[x]; - } - state_over_time << std::endl; - - // loop over steps, burn, and output the current state - // the loop below is similar to that used in krome and pynucastro - amrex::Real dd = state.rho; - amrex::Real dd1 = 0.0_rt; - - for (int n = 0; n < unit_test_rp::nsteps; n++) { - - dd1 = dd; - - amrex::Real rhotmp = 0.0_rt; - - for (int nn = 0; nn < NumSpec; ++nn) { - rhotmp += state.xn[nn] * spmasses[nn]; - } - - // find the freefall time - amrex::Real tff = std::sqrt(M_PI * 3.0 / (32.0 * rhotmp * grav_constant)); - amrex::Real dt = unit_test_rp::tff_reduc * tff; - // scale the density - dd += dt * (dd / tff); - -#ifdef DEBUG - std::cout << "step params " << dd << ", " << tff << ", " << rhotmp - << std::endl; -#endif - - // stop the test if dt is very small - if (dt < 10) { - break; - } - - // stop the test if we have reached very high densities - if (dd > 2e-6) { - break; - } - - // std::cout << "step " << n << " done" << std::endl; - - // scale the number densities - for (int nn = 0; nn < NumSpec; ++nn) { - state.xn[nn] *= dd / dd1; - } - - // input the scaled density in burn state - state.rho *= dd / dd1; - - // do the actual integration - { - state.suppress_failure_output = true; - amrex::AsyncArray state_device(&state, 1); - burn_t* state_ptr = state_device.data(); - - amrex::ParallelFor(1, - [state_ptr, dt] AMREX_GPU_DEVICE(int idx) noexcept { - burner(state_ptr[idx], dt); - }); - - state_device.copyToHost(&state, 1); - amrex::Gpu::streamSynchronize(); - state.suppress_failure_output = false; - } - - enforce_post_burn_constraints(state); - - t += dt; - - state_over_time << std::setw(10) << t; - state_over_time << std::setw(15) << state.rho; - state_over_time << std::setw(12) << state.T; - for (int x = 0; x < NumSpec; ++x) { - state_over_time << std::setw(15) << state.xn[x]; - } - state_over_time << std::endl; - } - state_over_time.close(); - - // output diagnostics to the terminal - std::cout << "------------------------------------" << std::endl; - std::cout << "successful? " << state.success << std::endl; - - constexpr int column_width = 44; - auto print_state_row = [&](const std::string &label, amrex::Real initial_value, - amrex::Real final_value) { - std::ostringstream left_stream; - left_stream << label << " " << std::scientific << std::setprecision(6) - << initial_value; - std::ostringstream right_stream; - right_stream << label << " " << std::scientific << std::setprecision(6) - << final_value; - std::cout << std::left << std::setw(column_width) << left_stream.str() - << std::left << std::setw(column_width) << right_stream.str() - << std::endl; - }; - - std::cout << "------------------------------------" << std::endl; - std::cout << std::left << std::setw(column_width) << "Initial State" - << std::left << std::setw(column_width) << "Final State" - << std::endl; - std::cout << std::string(2 * column_width, '-') << std::endl; - print_state_row("Temperature (K):", state_in.T, state.T); - print_state_row("Density (g/cm^3):", state_in.rho, state.rho); - print_state_row("Eint (erg/g):", state_in.e, state.e); - for (int n = 0; n < NumSpec; ++n) { - const std::string label = "n(" + short_spec_names_cxx[n] + "):"; - print_state_row(label, state_in.xn[n], state.xn[n]); - } - std::cout << std::string(2 * column_width, '-') << std::endl; - std::cout << std::right; - - return state.success; -} AMREX_INLINE auto burn_cell_multi_c() -> int { diff --git a/unit_test/burn_cell_primordial_chem/main.cpp b/unit_test/burn_cell_primordial_chem/main.cpp index d3f71f2cf3..cc95d411fd 100644 --- a/unit_test/burn_cell_primordial_chem/main.cpp +++ b/unit_test/burn_cell_primordial_chem/main.cpp @@ -26,8 +26,6 @@ int main(int argc, char *argv[]) { AMREX_ALWAYS_ASSERT_WITH_MESSAGE(run_prefix == input_run_prefix, "input file is missing or incorrect!"); - std::cout << "\nstarting the single zone burn..." << std::endl; - ParmParse ppa("amr"); init_unit_test(); @@ -39,12 +37,10 @@ int main(int argc, char *argv[]) { // C++ Network, RHS, screening, rates initialization network_init(); - int single_success = burn_cell_c(); - std::cout << "\nstarting the multi-zone burn..." << std::endl; int multi_success = burn_cell_multi_c(); - success = single_success && multi_success; + success = multi_success; } amrex::Finalize(); From ee66b58290a02db5351a41ef6164d57025984def Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Fri, 24 Oct 2025 14:12:29 -0400 Subject: [PATCH 20/50] report relative norms --- .../burn_cell_primordial_chem/burn_cell.H | 40 ++++++++++++++----- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index a76ee3b9ee..401bd93ad2 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -443,14 +443,21 @@ auto burn_cell_multi_c() -> int { std::map mismatch_summary; struct NormAccumulator { - amrex::Real l1 = 0.0_rt; - amrex::Real l2_sq = 0.0_rt; - amrex::Real linf = 0.0_rt; - void record(amrex::Real diff) { + amrex::Real diff_l1 = 0.0_rt; + amrex::Real diff_l2_sq = 0.0_rt; + amrex::Real diff_linf = 0.0_rt; + amrex::Real ref_l1 = 0.0_rt; + amrex::Real ref_l2_sq = 0.0_rt; + amrex::Real ref_linf = 0.0_rt; + void record(amrex::Real diff, amrex::Real ref) { amrex::Real abs_diff = std::abs(diff); - l1 += abs_diff; - l2_sq += diff * diff; - linf = std::max(linf, abs_diff); + amrex::Real abs_ref = std::abs(ref); + diff_l1 += abs_diff; + diff_l2_sq += diff * diff; + diff_linf = std::max(diff_linf, abs_diff); + ref_l1 += abs_ref; + ref_l2_sq += ref * ref; + ref_linf = std::max(ref_linf, abs_ref); } }; @@ -474,7 +481,7 @@ auto burn_cell_multi_c() -> int { if (!track_norm) { return; } - field_norms[field].record(cpu_value - gpu_value); + field_norms[field].record(cpu_value - gpu_value, cpu_value); }; for (int iz = 0; iz < n_zones; ++iz) { @@ -529,11 +536,22 @@ auto burn_cell_multi_c() -> int { if (!field_norms.empty()) { std::cout << "CPU/GPU multi-zone difference norms:" << std::endl; + auto safe_norm_ratio = [](amrex::Real num, amrex::Real denom) { + if (denom > 0.0_rt) { + return num / denom; + } + return (num == 0.0_rt) ? 0.0_rt + : std::numeric_limits::infinity(); + }; auto print_norm = [&](const std::string &label, const NormAccumulator &acc) { - std::cout << " " << label << ": L1 = " << acc.l1 - << ", L2 = " << std::sqrt(acc.l2_sq) - << ", L_inf = " << acc.linf << std::endl; + amrex::Real diff_l2 = std::sqrt(acc.diff_l2_sq); + amrex::Real ref_l2 = std::sqrt(acc.ref_l2_sq); + std::cout << " " << label + << ": L1 = " << safe_norm_ratio(acc.diff_l1, acc.ref_l1) + << ", L2 = " << safe_norm_ratio(diff_l2, ref_l2) + << ", L_inf = " + << safe_norm_ratio(acc.diff_linf, acc.ref_linf) << std::endl; }; auto temp_it = field_norms.find("temperature"); if (temp_it != field_norms.end()) { From 319610ce26a516ca798009604cf550ed0a71cf4a Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Fri, 24 Oct 2025 14:15:51 -0400 Subject: [PATCH 21/50] report timings --- unit_test/burn_cell_primordial_chem/burn_cell.H | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index 401bd93ad2..efa0ebf4e7 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -400,12 +401,18 @@ auto burn_cell_multi_c() -> int { std::vector cpu_states; std::vector cpu_times; MultiZoneSummary cpu_summary; + auto cpu_start = std::chrono::steady_clock::now(); bool cpu_success = run_multi_zone(false, cpu_states, cpu_times, cpu_summary); + auto cpu_end = std::chrono::steady_clock::now(); + std::chrono::duration cpu_wall = cpu_end - cpu_start; std::vector gpu_states; std::vector gpu_times; MultiZoneSummary gpu_summary; + auto gpu_start = std::chrono::steady_clock::now(); bool gpu_success = run_multi_zone(true, gpu_states, gpu_times, gpu_summary); + auto gpu_end = std::chrono::steady_clock::now(); + std::chrono::duration gpu_wall = gpu_end - gpu_start; auto print_summary = [&](const std::string &label, const MultiZoneSummary &summary) { @@ -424,7 +431,11 @@ auto burn_cell_multi_c() -> int { }; print_summary("CPU", cpu_summary); + std::cout << "CPU multi-zone burn walltime (s): " << cpu_wall.count() + << std::endl; print_summary("GPU", gpu_summary); + std::cout << "GPU multi-zone burn walltime (s): " << gpu_wall.count() + << std::endl; auto within_tol = [&](amrex::Real a, amrex::Real b) -> bool { amrex::Real diff = std::abs(a - b); From d0337fd0d8e26f16d7a7c18a3fe5f9ce99de2d10 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Fri, 24 Oct 2025 14:16:32 -0400 Subject: [PATCH 22/50] turn off debug --- unit_test/burn_cell_primordial_chem/GNUmakefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/GNUmakefile b/unit_test/burn_cell_primordial_chem/GNUmakefile index 669e9839c7..c00cb402e2 100644 --- a/unit_test/burn_cell_primordial_chem/GNUmakefile +++ b/unit_test/burn_cell_primordial_chem/GNUmakefile @@ -2,7 +2,7 @@ PRECISION = DOUBLE PROFILE = FALSE # Set DEBUG to TRUE if debugging -DEBUG = TRUE +DEBUG = FALSE DIM = 1 @@ -15,7 +15,7 @@ USE_CUDA = FALSE USE_REACT = TRUE # Set USE_MICROPHYSICS_DEBUG to TRUE if debugging -USE_MICROPHYSICS_DEBUG = TRUE +USE_MICROPHYSICS_DEBUG = FALSE EBASE = main From 01b4c788b87648e3dd28a5fc20307dac27af80cf Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Fri, 24 Oct 2025 14:43:07 -0400 Subject: [PATCH 23/50] add n_zones parameter --- unit_test/burn_cell_primordial_chem/burn_cell.H | 6 ++++-- unit_test/burn_cell_primordial_chem/inputs_primordial_chem | 2 ++ unit_test/burn_cell_primordial_chem/main.cpp | 5 ++++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index efa0ebf4e7..7415f7f5fd 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -177,12 +177,14 @@ void enforce_post_burn_constraints(burn_t &state) { AMREX_INLINE -auto burn_cell_multi_c() -> int { +auto burn_cell_multi_c(int n_zones) -> int { - constexpr int n_zones = 128; constexpr amrex::Real rel_tol = 1.0e-10_rt; constexpr amrex::Real abs_tol = 1.0e-12_rt; + AMREX_ALWAYS_ASSERT_WITH_MESSAGE(n_zones > 0, + "n_zones must be positive"); + struct MultiZoneSummary { int failures = 0; int retries = 0; diff --git a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem index 8db5c8d51d..d25657d102 100644 --- a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem +++ b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem @@ -26,6 +26,8 @@ unit_test.primary_species_12 = 1e-40 unit_test.primary_species_13 = 1e-40 unit_test.primary_species_14 = 0.0775e0 +unit_test.n_zones = 32**3 + # integrator runtime parameters # are we using primordial chemistry? then we use number densities integrator.use_number_densities = 1 diff --git a/unit_test/burn_cell_primordial_chem/main.cpp b/unit_test/burn_cell_primordial_chem/main.cpp index cc95d411fd..622d0eabfa 100644 --- a/unit_test/burn_cell_primordial_chem/main.cpp +++ b/unit_test/burn_cell_primordial_chem/main.cpp @@ -23,6 +23,9 @@ int main(int argc, char *argv[]) { std::string const run_prefix = "burn_cell_primordial_chem_"; std::string input_run_prefix; pp.query("run_prefix", input_run_prefix); + int n_zones = 128; + pp.query("n_zones", n_zones); + amrex::Print() << "n_zones = " << n_zones << "\n"; AMREX_ALWAYS_ASSERT_WITH_MESSAGE(run_prefix == input_run_prefix, "input file is missing or incorrect!"); @@ -38,7 +41,7 @@ int main(int argc, char *argv[]) { network_init(); std::cout << "\nstarting the multi-zone burn..." << std::endl; - int multi_success = burn_cell_multi_c(); + int multi_success = burn_cell_multi_c(n_zones); success = multi_success; } From 8d221e68506e56f4bb385e6904c49d274882ff7c Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Fri, 24 Oct 2025 15:01:29 -0400 Subject: [PATCH 24/50] add OMP on CPU --- .../burn_cell_primordial_chem/burn_cell.H | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index 7415f7f5fd..797a68002e 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -341,13 +342,16 @@ auto burn_cell_multi_c(int n_zones) -> int { states_d.end(), states_h.begin()); amrex::Gpu::streamSynchronize(); } else { - for (int iz = 0; iz < n_zones; ++iz) { - amrex::Real dt = dt_h[iz]; - if (dt <= 0.0_rt) { - continue; - } - burn_backend(states_h[iz], dt); - } + burn_t *states_ptr = states_h.data(); + amrex::Real *dt_ptr = dt_h.data(); + amrex::ParallelForOMP( + n_zones, [states_ptr, dt_ptr, &burn_backend](int iz) noexcept { + amrex::Real dt = dt_ptr[iz]; + if (dt <= 0.0_rt) { + return; + } + burn_backend(states_ptr[iz], dt); + }); } for (int iz = 0; iz < n_zones; ++iz) { From 2b50afdbb2ae7df4e000d9704012f2d5e5cb4f2d Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Sat, 17 Jan 2026 15:25:19 -0500 Subject: [PATCH 25/50] avoid warnings on aarch64 --- unit_test/burn_cell_primordial_chem/GNUmakefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/unit_test/burn_cell_primordial_chem/GNUmakefile b/unit_test/burn_cell_primordial_chem/GNUmakefile index c00cb402e2..91a6f24fea 100644 --- a/unit_test/burn_cell_primordial_chem/GNUmakefile +++ b/unit_test/burn_cell_primordial_chem/GNUmakefile @@ -7,6 +7,7 @@ DEBUG = FALSE DIM = 1 COMP = gnu +EXTRACXXFLAGS += -Wno-psabi USE_MPI = FALSE USE_OMP = FALSE @@ -38,3 +39,5 @@ Bpack := ./Make.package Blocs := . include $(MICROPHYSICS_HOME)/unit_test/Make.unit_test + + From fded6dd6118944e9b6a685d631d397ab06dff104 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Sat, 17 Jan 2026 15:26:21 -0500 Subject: [PATCH 26/50] add interactive plot of solution --- .../plot_state_over_time.py | 155 ++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 unit_test/burn_cell_primordial_chem/plot_state_over_time.py diff --git a/unit_test/burn_cell_primordial_chem/plot_state_over_time.py b/unit_test/burn_cell_primordial_chem/plot_state_over_time.py new file mode 100644 index 0000000000..48fa7ee710 --- /dev/null +++ b/unit_test/burn_cell_primordial_chem/plot_state_over_time.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 + +""" +Plot temperature and species fractions versus density from state_over_time.txt using Plotly. +""" + +from __future__ import annotations + +import argparse +from pathlib import Path +from typing import Sequence + +import pandas as pd +import plotly.graph_objects as go +import plotly.io as pio +from plotly.subplots import make_subplots + + +def parse_header(path: Path) -> Sequence[str]: + """Extract column names from the leading comment line.""" + with path.open("r", encoding="ascii") as handle: + for line in handle: + stripped = line.strip() + if not stripped: + continue + if stripped.startswith("#"): + return stripped.lstrip("#").split() + # If no comment header is present, fall back to whitespace split. + return stripped.split() + raise ValueError(f"Could not find a header row in {path}") + + +def load_state_data(path: Path) -> pd.DataFrame: + columns = parse_header(path) + # Skip the header we just parsed and load the remainder. + return pd.read_csv( + path, + delim_whitespace=True, + skiprows=1, + names=columns, + comment="#", + ) + + +def plot_state(path: Path, output: Path | None) -> None: + data = load_state_data(path) + + required_columns = {"Density", "Temperature"} + missing = required_columns - set(data.columns) + if missing: + missing_cols = ", ".join(sorted(missing)) + raise KeyError(f"Missing required column(s): {missing_cols}") + + density = data["Density"] + temperature = data["Temperature"] + species_cols = [ + col for col in data.columns if col not in {"Time", "Density", "Temperature"} + ] + + if not species_cols: + raise ValueError("No species fraction columns found in input data.") + + if (density <= 0).any(): + raise ValueError("Density contains non-positive values; cannot use log scale.") + if (temperature <= 0).any(): + raise ValueError("Temperature contains non-positive values; cannot use log scale.") + + fig = make_subplots( + rows=2, + cols=1, + shared_xaxes=True, + vertical_spacing=0.08, + subplot_titles=( + "Temperature vs. Density", + "Species Fractions vs. Density", + ), + ) + + fig.add_trace( + go.Scatter( + x=density, + y=temperature, + mode="lines", + name="Temperature", + hovertemplate=( + "Density: %{x:.3e}
Temperature: %{y:.3e}Temperature" + ), + ), + row=1, + col=1, + ) + + for column in species_cols: + series = data[column] + positive_mask = series > 0 + if positive_mask.any(): + fig.add_trace( + go.Scatter( + x=density[positive_mask], + y=series[positive_mask], + mode="lines", + name=column, + hovertemplate=( + "Species: " + + column + + "
Density: %{x:.3e}" + + "
Fraction: %{y:.3e}" + ), + ), + row=2, + col=1, + ) + + fig.update_xaxes(type="log", row=1, col=1) + fig.update_xaxes(type="log", title_text="Density", row=2, col=1) + fig.update_yaxes(type="log", title_text="Temperature", row=1, col=1) + fig.update_yaxes(type="log", title_text="Species Fraction", row=2, col=1) + + fig.update_layout( + height=800, + legend_title_text="Species", + hovermode="x unified", + margin=dict(l=70, r=200, t=80, b=60), + ) + + if output: + output.parent.mkdir(parents=True, exist_ok=True) + pio.write_html(fig, file=str(output), include_plotlyjs="cdn", full_html=True) + else: + fig.show() + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Plot temperature and species fractions versus density." + ) + parser.add_argument( + "--input", + type=Path, + default=Path("state_over_time.txt"), + help="Path to the state_over_time.txt file (default: state_over_time.txt).", + ) + parser.add_argument( + "--output", + type=Path, + default=None, + help="Optional output path for saving the Plotly figure as HTML.", + ) + args = parser.parse_args() + + plot_state(args.input, args.output) + + +if __name__ == "__main__": + main() From 8fa855870e12d38317746e2f13e3ae4e722bf82e Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Sat, 17 Jan 2026 15:29:51 -0500 Subject: [PATCH 27/50] only run GPU test if GPU enabled --- .../burn_cell_primordial_chem/burn_cell.H | 123 ++++++++++-------- unit_test/burn_cell_primordial_chem/main.cpp | 8 +- 2 files changed, 76 insertions(+), 55 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index 797a68002e..34d3200d39 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -178,7 +178,7 @@ void enforce_post_burn_constraints(burn_t &state) { AMREX_INLINE -auto burn_cell_multi_c(int n_zones) -> int { +auto burn_cell_multi_c(int n_zones, bool enable_gpu) -> int { constexpr amrex::Real rel_tol = 1.0e-10_rt; constexpr amrex::Real abs_tol = 1.0e-12_rt; @@ -415,10 +415,14 @@ auto burn_cell_multi_c(int n_zones) -> int { std::vector gpu_states; std::vector gpu_times; MultiZoneSummary gpu_summary; - auto gpu_start = std::chrono::steady_clock::now(); - bool gpu_success = run_multi_zone(true, gpu_states, gpu_times, gpu_summary); - auto gpu_end = std::chrono::steady_clock::now(); - std::chrono::duration gpu_wall = gpu_end - gpu_start; + bool gpu_success = true; + std::chrono::duration gpu_wall(0.0); + if (enable_gpu) { + auto gpu_start = std::chrono::steady_clock::now(); + gpu_success = run_multi_zone(true, gpu_states, gpu_times, gpu_summary); + auto gpu_end = std::chrono::steady_clock::now(); + gpu_wall = gpu_end - gpu_start; + } auto print_summary = [&](const std::string &label, const MultiZoneSummary &summary) { @@ -439,9 +443,14 @@ auto burn_cell_multi_c(int n_zones) -> int { print_summary("CPU", cpu_summary); std::cout << "CPU multi-zone burn walltime (s): " << cpu_wall.count() << std::endl; - print_summary("GPU", gpu_summary); - std::cout << "GPU multi-zone burn walltime (s): " << gpu_wall.count() - << std::endl; + if (enable_gpu) { + print_summary("GPU", gpu_summary); + std::cout << "GPU multi-zone burn walltime (s): " << gpu_wall.count() + << std::endl; + } else { + std::cout << "GPU multi-zone burn skipped (no GPU backend)" + << std::endl; + } auto within_tol = [&](amrex::Real a, amrex::Real b) -> bool { amrex::Real diff = std::abs(a - b); @@ -501,53 +510,56 @@ auto burn_cell_multi_c(int n_zones) -> int { field_norms[field].record(cpu_value - gpu_value, cpu_value); }; - for (int iz = 0; iz < n_zones; ++iz) { - bool cpu_zone_success = - cpu_states[iz].success && cpu_states[iz].e > 0.0_rt; - bool gpu_zone_success = - gpu_states[iz].success && gpu_states[iz].e > 0.0_rt; - - if (cpu_zone_success != gpu_zone_success) { - record_mismatch(iz, "success_flag", - cpu_zone_success ? 1.0_rt : 0.0_rt, - gpu_zone_success ? 1.0_rt : 0.0_rt); - record_difference("success_flag", + if (enable_gpu) { + for (int iz = 0; iz < n_zones; ++iz) { + bool cpu_zone_success = + cpu_states[iz].success && cpu_states[iz].e > 0.0_rt; + bool gpu_zone_success = + gpu_states[iz].success && gpu_states[iz].e > 0.0_rt; + + if (cpu_zone_success != gpu_zone_success) { + record_mismatch(iz, "success_flag", cpu_zone_success ? 1.0_rt : 0.0_rt, - gpu_zone_success ? 1.0_rt : 0.0_rt, false); - continue; - } - if (!cpu_zone_success) { - continue; - } + gpu_zone_success ? 1.0_rt : 0.0_rt); + record_difference("success_flag", + cpu_zone_success ? 1.0_rt : 0.0_rt, + gpu_zone_success ? 1.0_rt : 0.0_rt, false); + continue; + } + if (!cpu_zone_success) { + continue; + } - if (!within_tol(cpu_times[iz], gpu_times[iz])) { - record_mismatch(iz, "time", cpu_times[iz], gpu_times[iz]); - } - record_difference("time", cpu_times[iz], gpu_times[iz], false); + if (!within_tol(cpu_times[iz], gpu_times[iz])) { + record_mismatch(iz, "time", cpu_times[iz], gpu_times[iz]); + } + record_difference("time", cpu_times[iz], gpu_times[iz], false); - if (!within_tol(cpu_states[iz].rho, gpu_states[iz].rho)) { - record_mismatch(iz, "rho", cpu_states[iz].rho, gpu_states[iz].rho); - } - record_difference("rho", cpu_states[iz].rho, gpu_states[iz].rho, false); - if (!within_tol(cpu_states[iz].T, gpu_states[iz].T)) { - record_mismatch(iz, "T", cpu_states[iz].T, gpu_states[iz].T); - } - record_difference("temperature", cpu_states[iz].T, gpu_states[iz].T, true); - if (!within_tol(cpu_states[iz].e, gpu_states[iz].e)) { - record_mismatch(iz, "e", cpu_states[iz].e, gpu_states[iz].e); - } - record_difference("e", cpu_states[iz].e, gpu_states[iz].e, false); - for (int n = 0; n < NumSpec; ++n) { - if (!within_tol(cpu_states[iz].xn[n], gpu_states[iz].xn[n])) { - std::string label = - "xn(" + std::string(short_spec_names_cxx[n]) + ")"; - record_mismatch(iz, label, cpu_states[iz].xn[n], - gpu_states[iz].xn[n]); + if (!within_tol(cpu_states[iz].rho, gpu_states[iz].rho)) { + record_mismatch(iz, "rho", cpu_states[iz].rho, gpu_states[iz].rho); + } + record_difference("rho", cpu_states[iz].rho, gpu_states[iz].rho, false); + if (!within_tol(cpu_states[iz].T, gpu_states[iz].T)) { + record_mismatch(iz, "T", cpu_states[iz].T, gpu_states[iz].T); + } + record_difference("temperature", cpu_states[iz].T, gpu_states[iz].T, + true); + if (!within_tol(cpu_states[iz].e, gpu_states[iz].e)) { + record_mismatch(iz, "e", cpu_states[iz].e, gpu_states[iz].e); + } + record_difference("e", cpu_states[iz].e, gpu_states[iz].e, false); + for (int n = 0; n < NumSpec; ++n) { + if (!within_tol(cpu_states[iz].xn[n], gpu_states[iz].xn[n])) { + std::string label = + "xn(" + std::string(short_spec_names_cxx[n]) + ")"; + record_mismatch(iz, label, cpu_states[iz].xn[n], + gpu_states[iz].xn[n]); + } + std::string norm_label = + "number_density(" + std::string(short_spec_names_cxx[n]) + ")"; + record_difference(norm_label, cpu_states[iz].xn[n], + gpu_states[iz].xn[n], true); } - std::string norm_label = - "number_density(" + std::string(short_spec_names_cxx[n]) + ")"; - record_difference(norm_label, cpu_states[iz].xn[n], - gpu_states[iz].xn[n], true); } } @@ -582,15 +594,15 @@ auto burn_cell_multi_c(int n_zones) -> int { print_norm(label, it->second); } } - } else { + } else if (enable_gpu) { std::cout << "No comparable CPU/GPU multi-zone states to compute norms" << std::endl; } - if (mismatch_summary.empty()) { + if (mismatch_summary.empty() && enable_gpu) { std::cout << "CPU/GPU multi-zone states agree within tolerances" << std::endl; - } else { + } else if (enable_gpu) { std::cout << "CPU/GPU mismatches detected:" << std::endl; for (const auto &entry : mismatch_summary) { const auto &field = entry.first; @@ -606,6 +618,9 @@ auto burn_cell_multi_c(int n_zones) -> int { } } + if (!enable_gpu) { + return cpu_success; + } return (cpu_success && gpu_success && mismatch_summary.empty()); } #endif diff --git a/unit_test/burn_cell_primordial_chem/main.cpp b/unit_test/burn_cell_primordial_chem/main.cpp index 622d0eabfa..6647105a90 100644 --- a/unit_test/burn_cell_primordial_chem/main.cpp +++ b/unit_test/burn_cell_primordial_chem/main.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include using namespace amrex; @@ -40,8 +41,13 @@ int main(int argc, char *argv[]) { // C++ Network, RHS, screening, rates initialization network_init(); + bool enable_gpu = false; +#ifdef AMREX_USE_GPU + enable_gpu = true; +#endif + std::cout << "\nstarting the multi-zone burn..." << std::endl; - int multi_success = burn_cell_multi_c(n_zones); + int multi_success = burn_cell_multi_c(n_zones, enable_gpu); success = multi_success; } From 04ca71f6e9d4b5a43d0afd51b891f91b61951445 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Fri, 6 Feb 2026 18:45:17 -0500 Subject: [PATCH 28/50] fix one-zone test --- unit_test/burn_cell_primordial_chem/burn_cell.H | 12 +++++++----- .../burn_cell_primordial_chem/inputs_primordial_chem | 5 +++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index 34d3200d39..0fa4e01924 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -208,11 +208,13 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu) -> int { int retry_total = 0; for (int iz = 0; iz < n_zones; ++iz) { - amrex::Real temp_offset = - 0.5_rt * unit_test_rp::temperature * - (static_cast(iz) / - static_cast(n_zones - 1) - - 0.5_rt); + amrex::Real temp_offset = 0.0_rt; + if (n_zones > 1) { + temp_offset = 0.5_rt * unit_test_rp::temperature * + (static_cast(iz) / + static_cast(n_zones - 1) - + 0.5_rt); + } init_burn_state(states_h[iz], 1.0_rt, temp_offset, false); dd_h[iz] = states_h[iz].rho; dt_h[iz] = 0.0_rt; diff --git a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem index d25657d102..3cdc3b2371 100644 --- a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem +++ b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem @@ -26,7 +26,8 @@ unit_test.primary_species_12 = 1e-40 unit_test.primary_species_13 = 1e-40 unit_test.primary_species_14 = 0.0775e0 -unit_test.n_zones = 32**3 +unit_test.n_zones = 1 +#unit_test.n_zones = 32**3 # integrator runtime parameters # are we using primordial chemistry? then we use number densities @@ -37,7 +38,7 @@ integrator.subtract_internal_energy = 0 integrator.do_species_clip = 0 # minimum positive value of number densities integrator.SMALL_X_SAFE = 1e-100 -integrator.burner_verbose = 0 +integrator.burner_verbose = 1 # do you want to use the jacobian calculated in a previous step? integrator.use_jacobian_caching = 0 # integration will fail if the number density > X_reject_buffer*atol From 96ab99b1a289087b08823e9b0ec503450f013e6e Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Fri, 6 Feb 2026 18:51:12 -0500 Subject: [PATCH 29/50] update inputs --- .../inputs_primordial_chem | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem index 3cdc3b2371..fda7a0a837 100644 --- a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem +++ b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem @@ -1,15 +1,23 @@ +unit_test.n_zones = 8 +#unit_test.n_zones = 32**3 +integrator.burner_verbose = 0 + +## unit_test runtime parameters unit_test.run_prefix = "burn_cell_primordial_chem_" -# unit_test runtime parameters unit_test.small_temp = 1.e1 unit_test.small_dens = 1.e-60 unit_test.tff_reduc = 1.e-1 + # number of integration steps unit_test.nsteps = 1000 + # max total time unit_test.tmax = 7.e20 + # initial temperature unit_test.temperature = 1e2 + # initial number densities unit_test.primary_species_1 = 1e-4 unit_test.primary_species_2 = 1e-4 @@ -26,29 +34,34 @@ unit_test.primary_species_12 = 1e-40 unit_test.primary_species_13 = 1e-40 unit_test.primary_species_14 = 0.0775e0 -unit_test.n_zones = 1 -#unit_test.n_zones = 32**3 +## integrator runtime parameters -# integrator runtime parameters # are we using primordial chemistry? then we use number densities integrator.use_number_densities = 1 + # we do not want to subtract the internal energy integrator.subtract_internal_energy = 0 + # we do not want to clip species between 0 and 1 integrator.do_species_clip = 0 + # minimum positive value of number densities integrator.SMALL_X_SAFE = 1e-100 -integrator.burner_verbose = 1 + # do you want to use the jacobian calculated in a previous step? integrator.use_jacobian_caching = 0 + # integration will fail if the number density > X_reject_buffer*atol integrator.X_reject_buffer = 1e100 + # Set which jacobian to use # 1 = analytic jacobian # 2 = numerical jacobian integrator.jacobian = 2 + # do you want to normalize abundances within VODE? (you don't!) integrator.renormalize_abundances = 0 + # tolerances integrator.rtol_spec = 1.0e-4 integrator.atol_spec = 1.0e-4 @@ -60,4 +73,3 @@ network.redshift = 30.0 # these params help debug the code #amrex.throw_exception = 1 #amrex.signal_handling = 0 - From 2e216112f458f76849eec7aea5501953bd1b9f3d Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Fri, 6 Feb 2026 19:02:59 -0500 Subject: [PATCH 30/50] update inputs --- integration/VODE/vode_dvnlsd.H | 2 +- .../inputs_primordial_chem | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/integration/VODE/vode_dvnlsd.H b/integration/VODE/vode_dvnlsd.H index f080958a90..a505bece89 100644 --- a/integration/VODE/vode_dvnlsd.H +++ b/integration/VODE/vode_dvnlsd.H @@ -148,7 +148,7 @@ amrex::Real dvnlsd (int& NFLAG, BurnT& state, DvodeT& vstate) // sometime VODE goes way off tangent. If these mass fractions // are really bad, then let's just bail now - if (integrator_rp::do_corrector_validation && !integrator_rp::use_number_densities) { + if (integrator_rp::do_corrector_validation) { #ifdef SDC const amrex::Real rho_current = state.rho_orig + vstate.tn * state.ydot_a[SRHO]; const amrex::Real thresh = species_failure_tolerance * rho_current; diff --git a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem index fda7a0a837..8ef42ff89a 100644 --- a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem +++ b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem @@ -1,16 +1,16 @@ -unit_test.n_zones = 8 +unit_test.n_zones = 1 #unit_test.n_zones = 32**3 -integrator.burner_verbose = 0 +integrator.burner_verbose = 1 ## unit_test runtime parameters unit_test.run_prefix = "burn_cell_primordial_chem_" -unit_test.small_temp = 1.e1 -unit_test.small_dens = 1.e-60 -unit_test.tff_reduc = 1.e-1 +unit_test.small_temp = 10. +unit_test.small_dens = 1.0e-60 +unit_test.tff_reduc = 0.025 # number of integration steps -unit_test.nsteps = 1000 +unit_test.nsteps = 1e4 # max total time unit_test.tmax = 7.e20 @@ -45,6 +45,10 @@ integrator.subtract_internal_energy = 0 # we do not want to clip species between 0 and 1 integrator.do_species_clip = 0 +# in the corrector loop, do we check if the predicted state is +# valid (X > 0) before calling the RHS? +integrator.do_corrector_validation = 1 + # minimum positive value of number densities integrator.SMALL_X_SAFE = 1e-100 From 2683c430c4e991d504cabe309db4b82e400bad81 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Fri, 6 Feb 2026 19:04:50 -0500 Subject: [PATCH 31/50] update inputs --- unit_test/burn_cell_primordial_chem/inputs_primordial_chem | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem index 8ef42ff89a..7ab0e97bb4 100644 --- a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem +++ b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem @@ -7,7 +7,7 @@ unit_test.run_prefix = "burn_cell_primordial_chem_" unit_test.small_temp = 10. unit_test.small_dens = 1.0e-60 -unit_test.tff_reduc = 0.025 +unit_test.tff_reduc = 0.03 # number of integration steps unit_test.nsteps = 1e4 From 78b179e73b2b31f0d9b16f6f8373a9984bbb8d79 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Fri, 6 Feb 2026 19:45:46 -0500 Subject: [PATCH 32/50] compute richardson error estimate --- .../burn_cell_primordial_chem/_parameters | 7 + .../burn_cell_primordial_chem/burn_cell.H | 513 ++++++++++++++++-- .../inputs_primordial_chem | 8 +- 3 files changed, 480 insertions(+), 48 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/_parameters b/unit_test/burn_cell_primordial_chem/_parameters index a81e1ff435..8970ecc2fb 100644 --- a/unit_test/burn_cell_primordial_chem/_parameters +++ b/unit_test/burn_cell_primordial_chem/_parameters @@ -17,6 +17,13 @@ tfirst real 0.0 # number of steps for the single zone burn nsteps int 1000 +# estimate truncation error with three tolerance levels and Richardson extrapolation +richardson_enable bool 1 +richardson_tol_factor real 10.0 +richardson_print_per_zone bool 0 +# non-trace threshold as a fraction of the most abundant species in each zone +richardson_trace_species_cutoff real 0.0 + # initial temperature temperature real 1e2 diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index 0fa4e01924..2ee3c04861 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -2,6 +2,7 @@ #define BURN_CELL_H #include +#include #include #include #include @@ -406,52 +407,390 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu) -> int { return (summary.failures == 0); }; - std::vector cpu_states; - std::vector cpu_times; - MultiZoneSummary cpu_summary; - auto cpu_start = std::chrono::steady_clock::now(); - bool cpu_success = run_multi_zone(false, cpu_states, cpu_times, cpu_summary); - auto cpu_end = std::chrono::steady_clock::now(); - std::chrono::duration cpu_wall = cpu_end - cpu_start; - - std::vector gpu_states; - std::vector gpu_times; - MultiZoneSummary gpu_summary; - bool gpu_success = true; - std::chrono::duration gpu_wall(0.0); - if (enable_gpu) { - auto gpu_start = std::chrono::steady_clock::now(); - gpu_success = run_multi_zone(true, gpu_states, gpu_times, gpu_summary); - auto gpu_end = std::chrono::steady_clock::now(); - gpu_wall = gpu_end - gpu_start; - } - auto print_summary = [&](const std::string &label, const MultiZoneSummary &summary) { - std::cout << label << " multi-zone burn complete; zones failed = " + const std::string prefix = label + " multi-zone"; + std::cout << prefix << " summary:" << std::endl; + std::cout << " burn complete : zones failed = " << summary.failures << std::endl; if (summary.any_success) { - std::cout << label << " multi-zone temperature range: [" - << summary.t_min << ", " << summary.t_max << "]" - << std::endl; - std::cout << label << " multi-zone retries applied: " + std::cout << " temperature range : [" + << summary.t_min << ", " << summary.t_max << "]" << std::endl; + std::cout << " retries applied : " << summary.retries << std::endl; } else { - std::cout << label << " multi-zone burn had no successful zones" + std::cout << " status : no successful zones" << std::endl; } }; - print_summary("CPU", cpu_summary); - std::cout << "CPU multi-zone burn walltime (s): " << cpu_wall.count() - << std::endl; - if (enable_gpu) { - print_summary("GPU", gpu_summary); - std::cout << "GPU multi-zone burn walltime (s): " << gpu_wall.count() + struct BackendRunResult { + std::vector states; + std::vector times; + MultiZoneSummary summary; + bool success = false; + std::chrono::duration wall{0.0}; + }; + + struct TolLevelResult { + amrex::Real scale = 1.0_rt; + BackendRunResult cpu; + BackendRunResult gpu; + }; + + const amrex::Real base_rtol_spec = integrator_rp::rtol_spec; + const amrex::Real base_atol_spec = integrator_rp::atol_spec; + const amrex::Real base_rtol_enuc = integrator_rp::rtol_enuc; + const amrex::Real base_atol_enuc = integrator_rp::atol_enuc; + const amrex::Real base_retry_rtol_spec = integrator_rp::retry_rtol_spec; + const amrex::Real base_retry_atol_spec = integrator_rp::retry_atol_spec; + const amrex::Real base_retry_rtol_enuc = integrator_rp::retry_rtol_enuc; + const amrex::Real base_retry_atol_enuc = integrator_rp::retry_atol_enuc; + + const amrex::Real tol_factor = + amrex::max(unit_test_rp::richardson_tol_factor, 1.0_rt); + const bool do_richardson = + unit_test_rp::richardson_enable && (tol_factor > 1.0_rt); + const int n_tol_levels = do_richardson ? 3 : 1; + + auto apply_tolerance_scale = [&](amrex::Real scale) { + integrator_rp::rtol_spec = base_rtol_spec / scale; + integrator_rp::atol_spec = base_atol_spec / scale; + integrator_rp::rtol_enuc = base_rtol_enuc / scale; + integrator_rp::atol_enuc = base_atol_enuc / scale; + + if (base_retry_rtol_spec > 0.0_rt) { + integrator_rp::retry_rtol_spec = base_retry_rtol_spec / scale; + } + if (base_retry_atol_spec > 0.0_rt) { + integrator_rp::retry_atol_spec = base_retry_atol_spec / scale; + } + if (base_retry_rtol_enuc > 0.0_rt) { + integrator_rp::retry_rtol_enuc = base_retry_rtol_enuc / scale; + } + if (base_retry_atol_enuc > 0.0_rt) { + integrator_rp::retry_atol_enuc = base_retry_atol_enuc / scale; + } + }; + + auto restore_base_tolerances = [&]() { + integrator_rp::rtol_spec = base_rtol_spec; + integrator_rp::atol_spec = base_atol_spec; + integrator_rp::rtol_enuc = base_rtol_enuc; + integrator_rp::atol_enuc = base_atol_enuc; + integrator_rp::retry_rtol_spec = base_retry_rtol_spec; + integrator_rp::retry_atol_spec = base_retry_atol_spec; + integrator_rp::retry_rtol_enuc = base_retry_rtol_enuc; + integrator_rp::retry_atol_enuc = base_retry_atol_enuc; + }; + + std::vector level_results(n_tol_levels); + bool cpu_success = true; + bool gpu_success = true; + + for (int ilev = 0; ilev < n_tol_levels; ++ilev) { + amrex::Real scale = std::pow(tol_factor, static_cast(ilev)); + level_results[ilev].scale = scale; + apply_tolerance_scale(scale); + + auto cpu_start = std::chrono::steady_clock::now(); + level_results[ilev].cpu.success = + run_multi_zone(false, level_results[ilev].cpu.states, + level_results[ilev].cpu.times, + level_results[ilev].cpu.summary); + auto cpu_end = std::chrono::steady_clock::now(); + level_results[ilev].cpu.wall = cpu_end - cpu_start; + cpu_success = cpu_success && level_results[ilev].cpu.success; + + std::ostringstream level_label; + if (do_richardson) { + level_label << " (tol level " << (ilev + 1) << ", scale=" << scale + << ")"; + } + const std::string cpu_label = "CPU" + level_label.str(); + print_summary(cpu_label, level_results[ilev].cpu.summary); + std::cout << " burn walltime (s) : " + << level_results[ilev].cpu.wall.count() << std::endl; + + if (enable_gpu) { + auto gpu_start = std::chrono::steady_clock::now(); + level_results[ilev].gpu.success = + run_multi_zone(true, level_results[ilev].gpu.states, + level_results[ilev].gpu.times, + level_results[ilev].gpu.summary); + auto gpu_end = std::chrono::steady_clock::now(); + level_results[ilev].gpu.wall = gpu_end - gpu_start; + gpu_success = gpu_success && level_results[ilev].gpu.success; + + const std::string gpu_label = "GPU" + level_label.str(); + print_summary(gpu_label, level_results[ilev].gpu.summary); + std::cout << " burn walltime (s) : " + << level_results[ilev].gpu.wall.count() << std::endl; + } else if (ilev == 0) { + std::cout << "GPU multi-zone burn skipped (no GPU backend)" + << std::endl; + } + } + + restore_base_tolerances(); + + const auto &cpu_states = level_results.front().cpu.states; + const auto &cpu_times = level_results.front().cpu.times; + const auto &gpu_states = level_results.front().gpu.states; + const auto &gpu_times = level_results.front().gpu.times; + + struct RichardsonEstimate { + bool valid = false; + amrex::Real p = 0.0_rt; + amrex::Real err = 0.0_rt; + amrex::Real delta01 = 0.0_rt; + }; + + auto estimate_richardson = [&](amrex::Real y0, amrex::Real y1, + amrex::Real y2) -> RichardsonEstimate { + RichardsonEstimate est; + const amrex::Real d01 = y0 - y1; + const amrex::Real d12 = y1 - y2; + est.delta01 = d01; + + constexpr amrex::Real tiny = 1.0e-300_rt; + const amrex::Real abs_d01 = std::abs(d01); + const amrex::Real abs_d12 = std::abs(d12); + + if (abs_d01 <= tiny && abs_d12 <= tiny) { + est.valid = true; + return est; + } + + if (abs_d01 <= tiny || abs_d12 <= tiny) { + est.err = d01; + return est; + } + + amrex::Real ratio = abs_d01 / abs_d12; + if (!(ratio > 0.0_rt) || !std::isfinite(ratio)) { + est.err = d01; + return est; + } + + est.p = std::log(ratio) / std::log(tol_factor); + if (!std::isfinite(est.p)) { + est.err = d01; + return est; + } + + amrex::Real denom = 1.0_rt - std::pow(tol_factor, -est.p); + if (std::abs(denom) <= tiny || !std::isfinite(denom)) { + est.err = d01; + return est; + } + + est.err = d01 / denom; + est.valid = true; + return est; + }; + + auto report_richardson = [&](const std::string &label, + const std::vector &l0, + const std::vector &l1, + const std::vector &l2) { + if (!do_richardson) { + return; + } + if (l0.size() != l1.size() || l0.size() != l2.size()) { + std::cout << label + << " Richardson estimate skipped (inconsistent level sizes)" + << std::endl; + return; + } + + int valid_zones = 0; + int valid_temp_estimates = 0; + amrex::Real max_abs_temp_err = 0.0_rt; + amrex::Real max_abs_energy_err = 0.0_rt; + amrex::Real max_abs_species_err = 0.0_rt; + amrex::Real max_rel_temp_err = 0.0_rt; + amrex::Real max_rel_energy_err = 0.0_rt; + amrex::Real max_rel_species_err = 0.0_rt; + int species_rel_count = 0; + std::vector species_rel_hit_count(NumSpec, 0); + + const amrex::Real trace_rel_cutoff = + amrex::max(unit_test_rp::richardson_trace_species_cutoff, 0.0_rt); + + auto rel_error = [](amrex::Real err, amrex::Real ref) { + constexpr amrex::Real tiny = 1.0e-300_rt; + return std::abs(err) / amrex::max(std::abs(ref), tiny); + }; + + std::cout << label << " Richardson truncation estimate:" << std::endl; + std::cout << " levels with scales : [1, " << tol_factor << ", " + << (tol_factor * tol_factor) << "]" << std::endl; + + for (int iz = 0; iz < n_zones; ++iz) { + const bool ok0 = l0[iz].success && l0[iz].e > 0.0_rt; + const bool ok1 = l1[iz].success && l1[iz].e > 0.0_rt; + const bool ok2 = l2[iz].success && l2[iz].e > 0.0_rt; + if (!(ok0 && ok1 && ok2)) { + continue; + } + + ++valid_zones; + + const RichardsonEstimate t_est = + estimate_richardson(l0[iz].T, l1[iz].T, l2[iz].T); + const RichardsonEstimate e_est = + estimate_richardson(l0[iz].e, l1[iz].e, l2[iz].e); + + if (t_est.valid) { + ++valid_temp_estimates; + } + max_abs_temp_err = std::max(max_abs_temp_err, std::abs(t_est.err)); + max_abs_energy_err = std::max(max_abs_energy_err, std::abs(e_est.err)); + max_rel_temp_err = std::max(max_rel_temp_err, rel_error(t_est.err, l2[iz].T)); + max_rel_energy_err = + std::max(max_rel_energy_err, rel_error(e_est.err, l2[iz].e)); + + amrex::Real zone_species_linf = 0.0_rt; + amrex::Real zone_rel_species_linf = 0.0_rt; + std::ostringstream zone_species_list; + bool zone_first_species = true; + amrex::Real zone_max_xn = 0.0_rt; + for (int n = 0; n < NumSpec; ++n) { + zone_max_xn = std::max(zone_max_xn, std::abs(l2[iz].xn[n])); + } + const amrex::Real zone_trace_cutoff = trace_rel_cutoff * zone_max_xn; + for (int n = 0; n < NumSpec; ++n) { + const RichardsonEstimate xn_est = + estimate_richardson(l0[iz].xn[n], l1[iz].xn[n], l2[iz].xn[n]); + zone_species_linf = std::max(zone_species_linf, std::abs(xn_est.err)); + if (std::abs(l2[iz].xn[n]) >= zone_trace_cutoff) { + zone_rel_species_linf = std::max( + zone_rel_species_linf, rel_error(xn_est.err, l2[iz].xn[n])); + ++species_rel_count; + ++species_rel_hit_count[n]; + if (!zone_first_species) { + zone_species_list << ", "; + } + zone_species_list << short_spec_names_cxx[n]; + zone_first_species = false; + } + } + max_abs_species_err = std::max(max_abs_species_err, zone_species_linf); + max_rel_species_err = + std::max(max_rel_species_err, zone_rel_species_linf); + + if (unit_test_rp::richardson_print_per_zone) { + std::cout << label << " zone " << iz << ": |E_T|=" << std::abs(t_est.err) + << ", rel(E_T)=" << rel_error(t_est.err, l2[iz].T) + << ", p_T=" << t_est.p << ", |E_e|=" << std::abs(e_est.err) + << ", rel(E_e)=" << rel_error(e_est.err, l2[iz].e) + << ", p_e=" << e_est.p << ", |E_xn|_inf=" << zone_species_linf + << ", rel(E_xn)_inf(non-trace)=" << zone_rel_species_linf + << ", non-trace species: [" + << (zone_first_species ? "none" : zone_species_list.str()) + << "]" + << std::endl; + } + } + + std::cout << label << " Richardson summary:" << std::endl; + std::cout << " valid zones : " << valid_zones << "/" + << n_zones << std::endl; + std::cout << " valid temperature estimates: " << valid_temp_estimates + << std::endl; + std::cout << " max |E_T| : " << max_abs_temp_err + << std::endl; + std::cout << " max |E_e| : " << max_abs_energy_err + << std::endl; + std::cout << " max |E_xn|_inf : " << max_abs_species_err + << std::endl; + std::cout << " max rel(E_T) : " << max_rel_temp_err + << std::endl; + std::cout << " max rel(E_e) : " << max_rel_energy_err + << std::endl; + std::cout << " max rel(E_xn)_inf(non-trace): " << max_rel_species_err + << std::endl; + std::cout << " trace rel cutoff : " << trace_rel_cutoff + << std::endl; + std::cout << " species count (relative) : " << species_rel_count + << std::endl; + + std::ostringstream considered_species; + bool first_species = true; + for (int n = 0; n < NumSpec; ++n) { + if (species_rel_hit_count[n] > 0) { + if (!first_species) { + considered_species << ", "; + } + considered_species << short_spec_names_cxx[n]; + first_species = false; + } + } + std::cout << label << " non-trace species used in relative metric:" << std::endl; - } else { - std::cout << "GPU multi-zone burn skipped (no GPU backend)" + std::cout << " species : [" + << (first_species ? "none" : considered_species.str()) << "]" << std::endl; + }; + + if (do_richardson && n_tol_levels == 3) { + report_richardson("CPU", level_results[0].cpu.states, + level_results[1].cpu.states, level_results[2].cpu.states); + } + + const bool use_richardson_gpu_failure = + enable_gpu && do_richardson && (n_tol_levels == 3); + + struct ZoneRichardsonThresholds { + bool valid = false; + amrex::Real temp_err = 0.0_rt; + amrex::Real energy_err = 0.0_rt; + std::array species_err{}; + std::array species_non_trace{}; + }; + + std::vector cpu_zone_thresholds; + if (use_richardson_gpu_failure) { + cpu_zone_thresholds.resize(n_zones); + const auto &cpu_l0 = level_results[0].cpu.states; + const auto &cpu_l1 = level_results[1].cpu.states; + const auto &cpu_l2 = level_results[2].cpu.states; + const amrex::Real trace_rel_cutoff = + amrex::max(unit_test_rp::richardson_trace_species_cutoff, 0.0_rt); + + for (int iz = 0; iz < n_zones; ++iz) { + auto &thr = cpu_zone_thresholds[iz]; + const bool ok0 = cpu_l0[iz].success && cpu_l0[iz].e > 0.0_rt; + const bool ok1 = cpu_l1[iz].success && cpu_l1[iz].e > 0.0_rt; + const bool ok2 = cpu_l2[iz].success && cpu_l2[iz].e > 0.0_rt; + if (!(ok0 && ok1 && ok2)) { + continue; + } + + thr.valid = true; + thr.temp_err = + std::abs(estimate_richardson(cpu_l0[iz].T, cpu_l1[iz].T, cpu_l2[iz].T) + .err); + thr.energy_err = + std::abs(estimate_richardson(cpu_l0[iz].e, cpu_l1[iz].e, cpu_l2[iz].e) + .err); + + amrex::Real zone_max_xn = 0.0_rt; + for (int n = 0; n < NumSpec; ++n) { + zone_max_xn = std::max(zone_max_xn, std::abs(cpu_l2[iz].xn[n])); + } + const amrex::Real zone_trace_cutoff = trace_rel_cutoff * zone_max_xn; + + for (int n = 0; n < NumSpec; ++n) { + thr.species_err[n] = std::abs(estimate_richardson( + cpu_l0[iz].xn[n], cpu_l1[iz].xn[n], + cpu_l2[iz].xn[n]) + .err); + thr.species_non_trace[n] = + (std::abs(cpu_l2[iz].xn[n]) >= zone_trace_cutoff) ? 1 : 0; + } + } } auto within_tol = [&](amrex::Real a, amrex::Real b) -> bool { @@ -468,7 +807,17 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu) -> int { amrex::Real gpu_value_at_max = 0.0_rt; }; + struct GpuFailureSummary { + int zones_failed = 0; + int temp_failures = 0; + int energy_failures = 0; + int species_failures = 0; + int missing_richardson = 0; + }; + std::map mismatch_summary; + bool gpu_failure_detected = false; + GpuFailureSummary gpu_failure_summary; struct NormAccumulator { amrex::Real diff_l1 = 0.0_rt; @@ -526,6 +875,10 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu) -> int { record_difference("success_flag", cpu_zone_success ? 1.0_rt : 0.0_rt, gpu_zone_success ? 1.0_rt : 0.0_rt, false); + if (use_richardson_gpu_failure) { + gpu_failure_detected = true; + ++gpu_failure_summary.zones_failed; + } continue; } if (!cpu_zone_success) { @@ -541,27 +894,75 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu) -> int { record_mismatch(iz, "rho", cpu_states[iz].rho, gpu_states[iz].rho); } record_difference("rho", cpu_states[iz].rho, gpu_states[iz].rho, false); - if (!within_tol(cpu_states[iz].T, gpu_states[iz].T)) { - record_mismatch(iz, "T", cpu_states[iz].T, gpu_states[iz].T); + + bool zone_failed_by_richardson = false; + if (use_richardson_gpu_failure) { + const auto &thr = cpu_zone_thresholds[iz]; + if (!thr.valid) { + record_mismatch(iz, "richardson_unavailable", 1.0_rt, 0.0_rt); + zone_failed_by_richardson = true; + ++gpu_failure_summary.missing_richardson; + } else { + if (std::abs(cpu_states[iz].T - gpu_states[iz].T) > + 2.0_rt * thr.temp_err) { + record_mismatch(iz, "T_richardson_fail", cpu_states[iz].T, + gpu_states[iz].T); + zone_failed_by_richardson = true; + ++gpu_failure_summary.temp_failures; + } + if (std::abs(cpu_states[iz].e - gpu_states[iz].e) > + 2.0_rt * thr.energy_err) { + record_mismatch(iz, "e_richardson_fail", cpu_states[iz].e, + gpu_states[iz].e); + zone_failed_by_richardson = true; + ++gpu_failure_summary.energy_failures; + } + for (int n = 0; n < NumSpec; ++n) { + if (thr.species_non_trace[n] == 0) { + continue; + } + if (std::abs(cpu_states[iz].xn[n] - gpu_states[iz].xn[n]) > + 2.0_rt * thr.species_err[n]) { + std::string label = "xn_richardson_fail(" + + std::string(short_spec_names_cxx[n]) + ")"; + record_mismatch(iz, label, cpu_states[iz].xn[n], + gpu_states[iz].xn[n]); + zone_failed_by_richardson = true; + ++gpu_failure_summary.species_failures; + } + } + } + } else { + if (!within_tol(cpu_states[iz].T, gpu_states[iz].T)) { + record_mismatch(iz, "T", cpu_states[iz].T, gpu_states[iz].T); + } + if (!within_tol(cpu_states[iz].e, gpu_states[iz].e)) { + record_mismatch(iz, "e", cpu_states[iz].e, gpu_states[iz].e); + } + for (int n = 0; n < NumSpec; ++n) { + if (!within_tol(cpu_states[iz].xn[n], gpu_states[iz].xn[n])) { + std::string label = + "xn(" + std::string(short_spec_names_cxx[n]) + ")"; + record_mismatch(iz, label, cpu_states[iz].xn[n], + gpu_states[iz].xn[n]); + } + } } + record_difference("temperature", cpu_states[iz].T, gpu_states[iz].T, true); - if (!within_tol(cpu_states[iz].e, gpu_states[iz].e)) { - record_mismatch(iz, "e", cpu_states[iz].e, gpu_states[iz].e); - } record_difference("e", cpu_states[iz].e, gpu_states[iz].e, false); for (int n = 0; n < NumSpec; ++n) { - if (!within_tol(cpu_states[iz].xn[n], gpu_states[iz].xn[n])) { - std::string label = - "xn(" + std::string(short_spec_names_cxx[n]) + ")"; - record_mismatch(iz, label, cpu_states[iz].xn[n], - gpu_states[iz].xn[n]); - } std::string norm_label = "number_density(" + std::string(short_spec_names_cxx[n]) + ")"; record_difference(norm_label, cpu_states[iz].xn[n], gpu_states[iz].xn[n], true); } + + if (use_richardson_gpu_failure && zone_failed_by_richardson) { + gpu_failure_detected = true; + ++gpu_failure_summary.zones_failed; + } } } @@ -602,8 +1003,14 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu) -> int { } if (mismatch_summary.empty() && enable_gpu) { - std::cout << "CPU/GPU multi-zone states agree within tolerances" - << std::endl; + if (use_richardson_gpu_failure) { + std::cout << "CPU/GPU multi-zone states pass Richardson-based GPU " + "failure criterion" + << std::endl; + } else { + std::cout << "CPU/GPU multi-zone states agree within tolerances" + << std::endl; + } } else if (enable_gpu) { std::cout << "CPU/GPU mismatches detected:" << std::endl; for (const auto &entry : mismatch_summary) { @@ -618,11 +1025,23 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu) -> int { } std::cout << std::endl; } + if (use_richardson_gpu_failure) { + std::cout << "GPU failure summary (|CPU-GPU| > 2*|Richardson error|): " + << "zones_failed=" << gpu_failure_summary.zones_failed + << ", temp_failures=" << gpu_failure_summary.temp_failures + << ", energy_failures=" << gpu_failure_summary.energy_failures + << ", species_failures=" << gpu_failure_summary.species_failures + << ", missing_richardson=" + << gpu_failure_summary.missing_richardson << std::endl; + } } if (!enable_gpu) { return cpu_success; } + if (use_richardson_gpu_failure) { + return (cpu_success && gpu_success && !gpu_failure_detected); + } return (cpu_success && gpu_success && mismatch_summary.empty()); } #endif diff --git a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem index 7ab0e97bb4..5f903e7160 100644 --- a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem +++ b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem @@ -1,6 +1,6 @@ unit_test.n_zones = 1 #unit_test.n_zones = 32**3 -integrator.burner_verbose = 1 +integrator.burner_verbose = 0 ## unit_test runtime parameters unit_test.run_prefix = "burn_cell_primordial_chem_" @@ -11,6 +11,12 @@ unit_test.tff_reduc = 0.03 # number of integration steps unit_test.nsteps = 1e4 +unit_test.richardson_enable = 1 +unit_test.richardson_tol_factor = 10.0 +# set to 1 to print per-zone Richardson estimates +unit_test.richardson_print_per_zone = 0 +# non-trace threshold as fraction of most abundant species per zone +unit_test.richardson_trace_species_cutoff = 1e-15 # max total time unit_test.tmax = 7.e20 From ff4a0b7288109149e2d0c73e154720c9a9d55eb0 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Fri, 6 Feb 2026 19:48:17 -0500 Subject: [PATCH 33/50] increase max steps for richardson estimate --- unit_test/burn_cell_primordial_chem/inputs_primordial_chem | 3 +++ 1 file changed, 3 insertions(+) diff --git a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem index 5f903e7160..69bbab0532 100644 --- a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem +++ b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem @@ -76,6 +76,9 @@ integrator.renormalize_abundances = 0 integrator.rtol_spec = 1.0e-4 integrator.atol_spec = 1.0e-4 +# max substeps +integrator.ode_max_steps = 3000000 + #assumed redshift for Pop III star formation network.redshift = 30.0 From 33c4fcac62f5f3bc846b28f11088fb570cd5b66e Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Fri, 6 Feb 2026 19:50:37 -0500 Subject: [PATCH 34/50] add more info to printout --- .../burn_cell_primordial_chem/burn_cell.H | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index 2ee3c04861..a5fc91b760 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -484,6 +484,12 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu) -> int { integrator_rp::retry_atol_enuc = base_retry_atol_enuc; }; + auto format_tol = [](amrex::Real tol) { + std::ostringstream out; + out << std::scientific << std::setprecision(10) << tol; + return out.str(); + }; + std::vector level_results(n_tol_levels); bool cpu_success = true; bool gpu_success = true; @@ -493,6 +499,29 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu) -> int { level_results[ilev].scale = scale; apply_tolerance_scale(scale); + if (do_richardson) { + std::cout << "Richardson tolerances (level " << (ilev + 1) + << ", scale=" << scale << "):" << std::endl; + std::cout << " rtol_spec / atol_spec : " + << format_tol(integrator_rp::rtol_spec) << " / " + << format_tol(integrator_rp::atol_spec) << std::endl; + std::cout << " rtol_enuc / atol_enuc : " + << format_tol(integrator_rp::rtol_enuc) << " / " + << format_tol(integrator_rp::atol_enuc) << std::endl; + if (integrator_rp::retry_rtol_spec > 0.0_rt && + integrator_rp::retry_atol_spec > 0.0_rt) { + std::cout << " retry_rtol_spec / retry_atol_spec : " + << format_tol(integrator_rp::retry_rtol_spec) << " / " + << format_tol(integrator_rp::retry_atol_spec) << std::endl; + } + if (integrator_rp::retry_rtol_enuc > 0.0_rt && + integrator_rp::retry_atol_enuc > 0.0_rt) { + std::cout << " retry_rtol_enuc / retry_atol_enuc : " + << format_tol(integrator_rp::retry_rtol_enuc) << " / " + << format_tol(integrator_rp::retry_atol_enuc) << std::endl; + } + } + auto cpu_start = std::chrono::steady_clock::now(); level_results[ilev].cpu.success = run_multi_zone(false, level_results[ilev].cpu.states, From 05c18eb35e3f7433fd6b23a7c2e0e0d1b2ca767d Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Sat, 7 Feb 2026 13:18:27 -0500 Subject: [PATCH 35/50] fix const bug in numerical_jacobian.H --- integration/utils/numerical_jacobian.H | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration/utils/numerical_jacobian.H b/integration/utils/numerical_jacobian.H index 5affd37179..6c4d131753 100644 --- a/integration/utils/numerical_jacobian.H +++ b/integration/utils/numerical_jacobian.H @@ -61,7 +61,7 @@ void numerical_jac(const BurnT& state, const jac_info_t& jac_info, JacNetArray2D // default -- plus convert the dY/dt into dX/dt - actual_rhs(state, ydotm); + actual_rhs(state_delp, ydotm); for (int q = 1; q <= NumSpec; q++) { ydotm(q) *= aion[q-1]; From f676fbd0e17723e4621d485f3daf440b354d72f5 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Sat, 7 Feb 2026 13:20:58 -0500 Subject: [PATCH 36/50] makefile fixes --- .../burn_cell_primordial_chem/GNUmakefile | 6 +- .../burn_cell_primordial_chem/burn_cell.H | 83 +++++++++++++++++++ .../post_link_install_name.sh | 51 ++++++++++++ 3 files changed, 138 insertions(+), 2 deletions(-) create mode 100755 unit_test/burn_cell_primordial_chem/post_link_install_name.sh diff --git a/unit_test/burn_cell_primordial_chem/GNUmakefile b/unit_test/burn_cell_primordial_chem/GNUmakefile index 91a6f24fea..0f575b4244 100644 --- a/unit_test/burn_cell_primordial_chem/GNUmakefile +++ b/unit_test/burn_cell_primordial_chem/GNUmakefile @@ -31,7 +31,7 @@ NETWORK_DIR := primordial_chem CONDUCTIVITY_DIR := stellar -INTEGRATOR_DIR = VODE +INTEGRATOR_DIR = BackwardEuler EXTERN_SEARCH += . @@ -40,4 +40,6 @@ Blocs := . include $(MICROPHYSICS_HOME)/unit_test/Make.unit_test - +ifeq ($(shell uname),Darwin) +AMREX_LINKER := ./post_link_install_name.sh $(AMREX_LINKER) +endif diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index a5fc91b760..3f54c89b18 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -768,6 +768,89 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu) -> int { level_results[1].cpu.states, level_results[2].cpu.states); } + struct ZoneTruncationEstimate { + bool valid = false; + amrex::Real temp_err = 0.0_rt; + amrex::Real energy_err = 0.0_rt; + std::array species_err{}; + }; + + std::vector cpu_zone_truncation; + if (do_richardson && n_tol_levels == 3) { + cpu_zone_truncation.resize(n_zones); + const auto &cpu_l0 = level_results[0].cpu.states; + const auto &cpu_l1 = level_results[1].cpu.states; + const auto &cpu_l2 = level_results[2].cpu.states; + + for (int iz = 0; iz < n_zones; ++iz) { + auto &est = cpu_zone_truncation[iz]; + const bool ok0 = cpu_l0[iz].success && cpu_l0[iz].e > 0.0_rt; + const bool ok1 = cpu_l1[iz].success && cpu_l1[iz].e > 0.0_rt; + const bool ok2 = cpu_l2[iz].success && cpu_l2[iz].e > 0.0_rt; + if (!(ok0 && ok1 && ok2)) { + continue; + } + + est.valid = true; + est.temp_err = + estimate_richardson(cpu_l0[iz].T, cpu_l1[iz].T, cpu_l2[iz].T).err; + est.energy_err = + estimate_richardson(cpu_l0[iz].e, cpu_l1[iz].e, cpu_l2[iz].e).err; + for (int n = 0; n < NumSpec; ++n) { + est.species_err[n] = + estimate_richardson(cpu_l0[iz].xn[n], cpu_l1[iz].xn[n], + cpu_l2[iz].xn[n]) + .err; + } + } + } + + { + const std::string output_file = "burn_cell_final_state.txt"; + std::ofstream ofs(output_file); + if (!ofs) { + std::cout << "WARNING: unable to open " << output_file + << " for writing final state output" << std::endl; + } else { + ofs << std::scientific << std::setprecision(16); + ofs << "# burn_cell_primordial_chem final state (CPU, tolerance level 1)\n"; + ofs << "# columns: zone time rho T e success"; + for (int n = 0; n < NumSpec; ++n) { + ofs << " xn_" << short_spec_names_cxx[n]; + } + ofs << " truncation_valid truncation_err_T truncation_err_e"; + for (int n = 0; n < NumSpec; ++n) { + ofs << " truncation_err_xn_" << short_spec_names_cxx[n]; + } + ofs << "\n"; + + for (int iz = 0; iz < n_zones; ++iz) { + ofs << iz << " " << cpu_times[iz] << " " << cpu_states[iz].rho << " " + << cpu_states[iz].T << " " << cpu_states[iz].e << " " + << (cpu_states[iz].success ? 1 : 0); + for (int n = 0; n < NumSpec; ++n) { + ofs << " " << cpu_states[iz].xn[n]; + } + + if (!cpu_zone_truncation.empty() && cpu_zone_truncation[iz].valid) { + ofs << " 1 " << cpu_zone_truncation[iz].temp_err << " " + << cpu_zone_truncation[iz].energy_err; + for (int n = 0; n < NumSpec; ++n) { + ofs << " " << cpu_zone_truncation[iz].species_err[n]; + } + } else { + ofs << " 0 0.0 0.0"; + for (int n = 0; n < NumSpec; ++n) { + ofs << " 0.0"; + } + } + ofs << "\n"; + } + std::cout << "Wrote final state and truncation estimates to " + << output_file << std::endl; + } + } + const bool use_richardson_gpu_failure = enable_gpu && do_richardson && (n_tol_levels == 3); diff --git a/unit_test/burn_cell_primordial_chem/post_link_install_name.sh b/unit_test/burn_cell_primordial_chem/post_link_install_name.sh new file mode 100755 index 0000000000..39a7b74e0f --- /dev/null +++ b/unit_test/burn_cell_primordial_chem/post_link_install_name.sh @@ -0,0 +1,51 @@ +#!/bin/sh +# +# Wrapper around the AMReX linker command that patches stale Zerobrew GCC +# install_names after linking on macOS. + +set -eu + +real_linker="$1" +shift + +output="" +prev="" +for arg in "$@"; do + if [ "$prev" = "-o" ]; then + output="$arg" + break + fi + prev="$arg" +done + +"$real_linker" "$@" + +if [ "$(uname)" != "Darwin" ]; then + exit 0 +fi + +if [ -z "$output" ] || [ ! -f "$output" ]; then + exit 0 +fi + +if ! command -v otool >/dev/null 2>&1; then + exit 0 +fi + +if ! command -v install_name_tool >/dev/null 2>&1; then + exit 0 +fi + +target_root="/opt/zerobrew/prefix/lib/gcc/current" + +for lib in libgfortran.5.dylib libquadmath.0.dylib libstdc++.6.dylib; do + new_path="${target_root}/${lib}" + if [ ! -e "$new_path" ]; then + continue + fi + + old_path=$(otool -L "$output" | awk -v lib="$lib" 'index($1, lib) {print $1; exit}') + if [ -n "${old_path}" ] && [ "${old_path}" != "${new_path}" ]; then + install_name_tool -change "$old_path" "$new_path" "$output" + fi +done From 88562222e8fcc744519b51303e097d3a4e97c2f5 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Sat, 7 Feb 2026 14:03:05 -0500 Subject: [PATCH 37/50] fix numerical jacobian for BE --- integration/BackwardEuler/be_integrator.H | 42 ++++++++++++++++++++--- integration/utils/numerical_jacobian.H | 26 +++++++++----- 2 files changed, 54 insertions(+), 14 deletions(-) diff --git a/integration/BackwardEuler/be_integrator.H b/integration/BackwardEuler/be_integrator.H index a204f404a5..2778ba28e8 100644 --- a/integration/BackwardEuler/be_integrator.H +++ b/integration/BackwardEuler/be_integrator.H @@ -2,6 +2,7 @@ #define BE_INTEGRATOR_H #include +#include #include #include @@ -11,7 +12,6 @@ #endif #include #include -#include #ifdef STRANG #include #endif @@ -31,6 +31,7 @@ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int single_step (BurnT& state, BeT& be, const amrex::Real dt) { constexpr int int_neqs = integrator_neqs(); + constexpr amrex::Real uround = std::numeric_limits::epsilon(); int ierr = IERR_SUCCESS; bool converged = false; @@ -88,10 +89,41 @@ int single_step (BurnT& state, BeT& be, const amrex::Real dt) if (be.jacobian_type == 1) { jac(be.t, state, be, be.jac); } else { - jac_info_t jac_info; - jac_info.h = dt; - numerical_jac(state, jac_info, be.jac); - be.n_rhs += (NumSpec+1); + // Build a numerical Jacobian by differencing the wrapped RHS. + // This keeps the Jacobian consistent with whatever variables + // the integrator is actually evolving (X or number densities). + amrex::Real fac = 0.0_rt; + for (int i = 1; i <= int_neqs; ++i) { + const amrex::Real wt = (i <= NumSpec) ? + (integrator_rp::rtol_spec * std::abs(be.y(i)) + integrator_rp::atol_spec) : + (integrator_rp::rtol_enuc * std::abs(be.y(i)) + integrator_rp::atol_enuc); + fac += (ydot(i) / wt) * (ydot(i) / wt); + } + fac = std::sqrt(fac / int_neqs); + + amrex::Real r0 = 1000.0_rt * std::abs(dt) * uround * int_neqs * fac; + if (r0 == 0.0_rt) { + r0 = 1.0_rt; + } + + constexpr bool in_jacobian = true; + amrex::Array1D ydot_pert; + for (int j = 1; j <= int_neqs; ++j) { + const amrex::Real yj = be.y(j); + const amrex::Real wt = (j <= NumSpec) ? + (integrator_rp::rtol_spec * std::abs(yj) + integrator_rp::atol_spec) : + (integrator_rp::rtol_enuc * std::abs(yj) + integrator_rp::atol_enuc); + const amrex::Real r = amrex::max(std::sqrt(uround) * std::abs(yj), r0 * wt); + + be.y(j) += r; + rhs(be.t, state, be, ydot_pert, in_jacobian); + for (int i = 1; i <= int_neqs; ++i) { + be.jac(i, j) = (ydot_pert(i) - ydot(i)) / r; + } + be.y(j) = yj; + } + + be.n_rhs += int_neqs; } be.n_jac++; diff --git a/integration/utils/numerical_jacobian.H b/integration/utils/numerical_jacobian.H index 6c4d131753..c025acb48a 100644 --- a/integration/utils/numerical_jacobian.H +++ b/integration/utils/numerical_jacobian.H @@ -59,12 +59,16 @@ void numerical_jac(const BurnT& state, const jac_info_t& jac_info, JacNetArray2D BurnT state_delp = state; - // default -- plus convert the dY/dt into dX/dt + // default state RHS actual_rhs(state_delp, ydotm); - for (int q = 1; q <= NumSpec; q++) { - ydotm(q) *= aion[q-1]; + // We integrate X, not Y, except for primordial chemistry where + // we integrate number densities directly. + if (! integrator_rp::use_number_densities) { + for (int q = 1; q <= NumSpec; q++) { + ydotm(q) *= aion[q-1]; + } } // start by computing |f|, which is the norm of the RHS / weight @@ -112,10 +116,12 @@ void numerical_jac(const BurnT& state, const jac_info_t& jac_info, JacNetArray2D actual_rhs(state_delp, ydotp); - // We integrate X, so convert from the Y we got back from the RHS - - for (int q = 1; q <= NumSpec; q++) { - ydotp(q) *= aion[q-1]; + // We integrate X, not Y, except for primordial chemistry where + // we integrate number densities directly. + if (! integrator_rp::use_number_densities) { + for (int q = 1; q <= NumSpec; q++) { + ydotp(q) *= aion[q-1]; + } } // now fill in all of the rows for this column X_n @@ -150,8 +156,10 @@ void numerical_jac(const BurnT& state, const jac_info_t& jac_info, JacNetArray2D actual_rhs(state_delp, ydotp); - for (int q = 1; q <= NumSpec; q++) { - ydotp(q) *= aion[q-1]; + if (! integrator_rp::use_number_densities) { + for (int q = 1; q <= NumSpec; q++) { + ydotp(q) *= aion[q-1]; + } } // first fill just the last column with dy/dT From f3dc2e7b50f3cf395e92bf31179025655cb97b95 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Sat, 7 Feb 2026 14:03:30 -0500 Subject: [PATCH 38/50] add stiffness ratio output --- .../burn_cell_primordial_chem/burn_cell.H | 182 ++++++++++++++++++ 1 file changed, 182 insertions(+) diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index 3f54c89b18..2011f67e76 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +21,8 @@ #include #include #include +#include +#include #include #include @@ -27,6 +30,165 @@ amrex::Real grav_constant = 6.674e-8; namespace { +using Complex = std::complex; +using ComplexMatrix = std::vector>; + +AMREX_INLINE +auto identity_complex_matrix(int n) -> ComplexMatrix { + ComplexMatrix I(n, std::vector(n, Complex(0.0_rt, 0.0_rt))); + for (int i = 0; i < n; ++i) { + I[i][i] = Complex(1.0_rt, 0.0_rt); + } + return I; +} + +AMREX_INLINE +auto matmul(const ComplexMatrix &A, const ComplexMatrix &B) -> ComplexMatrix { + const int n = static_cast(A.size()); + ComplexMatrix C(n, std::vector(n, Complex(0.0_rt, 0.0_rt))); + for (int i = 0; i < n; ++i) { + for (int k = 0; k < n; ++k) { + const Complex aik = A[i][k]; + if (std::abs(aik) == 0.0_rt) { + continue; + } + for (int j = 0; j < n; ++j) { + C[i][j] += aik * B[k][j]; + } + } + } + return C; +} + +AMREX_INLINE +void qr_decompose_mgs(const ComplexMatrix &A, ComplexMatrix &Q, + ComplexMatrix &R) { + const int n = static_cast(A.size()); + Q.assign(n, std::vector(n, Complex(0.0_rt, 0.0_rt))); + R.assign(n, std::vector(n, Complex(0.0_rt, 0.0_rt))); + + std::vector v(n); + + for (int j = 0; j < n; ++j) { + for (int i = 0; i < n; ++i) { + v[i] = A[i][j]; + } + + for (int i = 0; i < j; ++i) { + Complex rij(0.0_rt, 0.0_rt); + for (int k = 0; k < n; ++k) { + rij += std::conj(Q[k][i]) * v[k]; + } + R[i][j] = rij; + for (int k = 0; k < n; ++k) { + v[k] -= rij * Q[k][i]; + } + } + + amrex::Real vnorm2 = 0.0_rt; + for (int k = 0; k < n; ++k) { + vnorm2 += std::norm(v[k]); + } + const amrex::Real vnorm = std::sqrt(vnorm2); + R[j][j] = Complex(vnorm, 0.0_rt); + + if (vnorm > 0.0_rt) { + for (int k = 0; k < n; ++k) { + Q[k][j] = v[k] / vnorm; + } + } else { + Q[j][j] = Complex(1.0_rt, 0.0_rt); + } + } +} + +AMREX_INLINE +auto jacobian_eigenvalues_qr(const RArray2D &jac) -> std::vector { + constexpr int n = INT_NEQS; + ComplexMatrix A(n, std::vector(n, Complex(0.0_rt, 0.0_rt))); + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + A[i][j] = Complex(static_cast(jac(i + 1, j + 1)), 0.0_rt); + } + } + + ComplexMatrix Q; + ComplexMatrix R; + const auto I = identity_complex_matrix(n); + + constexpr int max_iter = 256; + constexpr amrex::Real offdiag_tol = 1.0e-12_rt; + + for (int iter = 0; iter < max_iter; ++iter) { + amrex::Real offdiag_norm = 0.0_rt; + for (int i = 1; i < n; ++i) { + for (int j = 0; j < i; ++j) { + offdiag_norm += std::norm(A[i][j]); + } + } + offdiag_norm = std::sqrt(offdiag_norm); + if (offdiag_norm < offdiag_tol) { + break; + } + + const Complex mu = A[n - 1][n - 1]; + ComplexMatrix shifted = A; + for (int i = 0; i < n; ++i) { + shifted[i][i] -= mu * I[i][i]; + } + + qr_decompose_mgs(shifted, Q, R); + A = matmul(R, Q); + for (int i = 0; i < n; ++i) { + A[i][i] += mu * I[i][i]; + } + } + + std::vector eigvals(n, Complex(0.0_rt, 0.0_rt)); + for (int i = 0; i < n; ++i) { + eigvals[i] = A[i][i]; + } + return eigvals; +} + +struct JacobianStiffness { + amrex::Real max_abs_real = 0.0_rt; + amrex::Real min_abs_real = std::numeric_limits::infinity(); + amrex::Real ratio = std::numeric_limits::infinity(); + bool valid = false; +}; + +AMREX_INLINE +auto compute_jacobian_stiffness(const burn_t &state_in) -> JacobianStiffness { + burn_t state = state_in; + RArray2D jac; + actual_jac(state, jac); + + const auto eigvals = jacobian_eigenvalues_qr(jac); + constexpr amrex::Real tiny = 1.0e-300_rt; + + JacobianStiffness result; + for (const auto &lam : eigvals) { + const amrex::Real ar = std::abs(lam.real()); + if (!std::isfinite(ar)) { + continue; + } + result.max_abs_real = std::max(result.max_abs_real, ar); + if (ar > tiny) { + result.min_abs_real = std::min(result.min_abs_real, ar); + } + } + + if (result.max_abs_real > tiny && + std::isfinite(result.min_abs_real) && + result.min_abs_real > tiny) { + result.ratio = result.max_abs_real / result.min_abs_real; + result.valid = true; + } + + return result; +} + AMREX_INLINE void init_burn_state(burn_t &state, amrex::Real density_scale, amrex::Real temp_offset, bool print_initial) { @@ -192,6 +354,9 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu) -> int { int retries = 0; amrex::Real t_min = std::numeric_limits::max(); amrex::Real t_max = -std::numeric_limits::max(); + amrex::Real stiffness_ratio_min = std::numeric_limits::max(); + amrex::Real stiffness_ratio_max = 0.0_rt; + int stiffness_samples = 0; bool any_success = false; }; @@ -381,6 +546,14 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu) -> int { state.suppress_failure_output = false; enforce_post_burn_constraints(state); + const auto stiff = compute_jacobian_stiffness(state); + if (stiff.valid) { + summary.stiffness_ratio_min = + std::min(summary.stiffness_ratio_min, stiff.ratio); + summary.stiffness_ratio_max = + std::max(summary.stiffness_ratio_max, stiff.ratio); + ++summary.stiffness_samples; + } time_h[iz] += dt_h[iz]; } } @@ -418,6 +591,15 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu) -> int { << summary.t_min << ", " << summary.t_max << "]" << std::endl; std::cout << " retries applied : " << summary.retries << std::endl; + if (summary.stiffness_samples > 0) { + std::cout << " stiffness ratio range : [" + << std::scientific << summary.stiffness_ratio_min << ", " + << summary.stiffness_ratio_max << "]" + << std::defaultfloat << " (samples=" + << summary.stiffness_samples << ")" << std::endl; + } else { + std::cout << " stiffness ratio range : unavailable" << std::endl; + } } else { std::cout << " status : no successful zones" << std::endl; From ebfa0b776852b1a8f85f581761c0e36b67ffe339 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Sat, 7 Feb 2026 14:22:08 -0500 Subject: [PATCH 39/50] report diff in normalized error units --- .../burn_cell_primordial_chem/GNUmakefile | 2 +- unit_test/burn_cell_primordial_chem/README.md | 26 ++ .../burn_cell_primordial_chem/burn_cell.H | 255 ++++++++++++++++-- .../burn_cell_final_state.txt | 3 + .../inputs_primordial_chem | 2 +- unit_test/burn_cell_primordial_chem/main.cpp | 43 ++- 6 files changed, 305 insertions(+), 26 deletions(-) create mode 100644 unit_test/burn_cell_primordial_chem/burn_cell_final_state.txt diff --git a/unit_test/burn_cell_primordial_chem/GNUmakefile b/unit_test/burn_cell_primordial_chem/GNUmakefile index 0f575b4244..bb3ab88f60 100644 --- a/unit_test/burn_cell_primordial_chem/GNUmakefile +++ b/unit_test/burn_cell_primordial_chem/GNUmakefile @@ -31,7 +31,7 @@ NETWORK_DIR := primordial_chem CONDUCTIVITY_DIR := stellar -INTEGRATOR_DIR = BackwardEuler +INTEGRATOR_DIR = VODE EXTERN_SEARCH += . diff --git a/unit_test/burn_cell_primordial_chem/README.md b/unit_test/burn_cell_primordial_chem/README.md index ed6a65afff..467a545109 100644 --- a/unit_test/burn_cell_primordial_chem/README.md +++ b/unit_test/burn_cell_primordial_chem/README.md @@ -21,3 +21,29 @@ # continuous integration The code is built with the `primordial_chem` network and run with `inputs_primordial_chem`. + +# reference and compare workflow + +Use this two-step workflow to split CPU reference generation from later +comparison runs. + +1. Generate/update the saved CPU reference solution: + + ```bash + ./main1d.gnu.ex inputs_primordial_chem + ``` + + This runs the normal Richardson-enabled path and writes + `burn_cell_final_state.txt`. + +2. Compare a future run against the saved reference: + + ```bash + ./main1d.gnu.ex inputs_primordial_chem --compare-final-state burn_cell_final_state.txt + ``` + + In compare mode, the code does not run Richardson. It performs one batch + run on the active backend (GPU when enabled, otherwise CPU), compares each + zone against the saved reference, and reports per-zone `PASS`/`FAIL`. + For `T`, `e`, and each species number density, the comparison threshold is + `2 * |saved truncation error|` from the reference file. diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index 2011f67e76..25d8aec224 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -341,7 +341,9 @@ void enforce_post_burn_constraints(burn_t &state) { AMREX_INLINE -auto burn_cell_multi_c(int n_zones, bool enable_gpu) -> int { +auto burn_cell_multi_c(int n_zones, bool enable_gpu, + const std::string &compare_final_state_file = "") + -> int { constexpr amrex::Real rel_tol = 1.0e-10_rt; constexpr amrex::Real abs_tol = 1.0e-12_rt; @@ -631,9 +633,14 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu) -> int { const amrex::Real tol_factor = amrex::max(unit_test_rp::richardson_tol_factor, 1.0_rt); + const bool compare_against_saved = !compare_final_state_file.empty(); const bool do_richardson = - unit_test_rp::richardson_enable && (tol_factor > 1.0_rt); + !compare_against_saved && unit_test_rp::richardson_enable && + (tol_factor > 1.0_rt); const int n_tol_levels = do_richardson ? 3 : 1; + const bool compare_backend_is_gpu = compare_against_saved && enable_gpu; + const bool run_gpu_backend = + compare_against_saved ? compare_backend_is_gpu : enable_gpu; auto apply_tolerance_scale = [&](amrex::Real scale) { integrator_rp::rtol_spec = base_rtol_spec / scale; @@ -704,26 +711,31 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu) -> int { } } - auto cpu_start = std::chrono::steady_clock::now(); - level_results[ilev].cpu.success = - run_multi_zone(false, level_results[ilev].cpu.states, - level_results[ilev].cpu.times, - level_results[ilev].cpu.summary); - auto cpu_end = std::chrono::steady_clock::now(); - level_results[ilev].cpu.wall = cpu_end - cpu_start; - cpu_success = cpu_success && level_results[ilev].cpu.success; - std::ostringstream level_label; if (do_richardson) { level_label << " (tol level " << (ilev + 1) << ", scale=" << scale << ")"; } - const std::string cpu_label = "CPU" + level_label.str(); - print_summary(cpu_label, level_results[ilev].cpu.summary); - std::cout << " burn walltime (s) : " - << level_results[ilev].cpu.wall.count() << std::endl; + if (!compare_against_saved || !compare_backend_is_gpu) { + auto cpu_start = std::chrono::steady_clock::now(); + level_results[ilev].cpu.success = + run_multi_zone(false, level_results[ilev].cpu.states, + level_results[ilev].cpu.times, + level_results[ilev].cpu.summary); + auto cpu_end = std::chrono::steady_clock::now(); + level_results[ilev].cpu.wall = cpu_end - cpu_start; + cpu_success = cpu_success && level_results[ilev].cpu.success; + + const std::string cpu_label = "CPU" + level_label.str(); + print_summary(cpu_label, level_results[ilev].cpu.summary); + std::cout << " burn walltime (s) : " + << level_results[ilev].cpu.wall.count() << std::endl; + } else if (ilev == 0) { + std::cout << "CPU multi-zone burn skipped (saved-state compare mode)" + << std::endl; + } - if (enable_gpu) { + if (run_gpu_backend) { auto gpu_start = std::chrono::steady_clock::now(); level_results[ilev].gpu.success = run_multi_zone(true, level_results[ilev].gpu.states, @@ -749,6 +761,11 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu) -> int { const auto &cpu_times = level_results.front().cpu.times; const auto &gpu_states = level_results.front().gpu.states; const auto &gpu_times = level_results.front().gpu.times; + const auto &compare_states = compare_backend_is_gpu ? gpu_states : cpu_states; + const auto &compare_times = compare_backend_is_gpu ? gpu_times : cpu_times; + const bool compare_backend_success = + compare_backend_is_gpu ? level_results.front().gpu.success + : level_results.front().cpu.success; struct RichardsonEstimate { bool valid = false; @@ -801,6 +818,206 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu) -> int { return est; }; + auto within_tol = [&](amrex::Real a, amrex::Real b) -> bool { + amrex::Real diff = std::abs(a - b); + amrex::Real scale = std::max(std::abs(a), std::abs(b)); + return diff <= (abs_tol + rel_tol * scale); + }; + + if (compare_against_saved) { + struct SavedZoneData { + bool present = false; + int zone = -1; + amrex::Real time = 0.0_rt; + amrex::Real rho = 0.0_rt; + amrex::Real T = 0.0_rt; + amrex::Real e = 0.0_rt; + int success = 0; + std::array xn{}; + int trunc_valid = 0; + amrex::Real trunc_err_T = 0.0_rt; + amrex::Real trunc_err_e = 0.0_rt; + std::array trunc_err_xn{}; + }; + + std::ifstream ifs(compare_final_state_file); + if (!ifs) { + std::cout << "ERROR: unable to open saved final-state file " + << compare_final_state_file << std::endl; + return false; + } + + std::vector saved(n_zones); + std::string line; + int parsed_rows = 0; + while (std::getline(ifs, line)) { + if (line.empty() || line[0] == '#') { + continue; + } + + std::istringstream iss(line); + SavedZoneData row; + if (!(iss >> row.zone >> row.time >> row.rho >> row.T >> row.e >> + row.success)) { + std::cout << "ERROR: malformed row in saved-state file: " << line + << std::endl; + return false; + } + for (int n = 0; n < NumSpec; ++n) { + if (!(iss >> row.xn[n])) { + std::cout << "ERROR: missing species values in saved-state file row: " + << line << std::endl; + return false; + } + } + if (!(iss >> row.trunc_valid >> row.trunc_err_T >> row.trunc_err_e)) { + std::cout << "ERROR: missing truncation fields in saved-state file row: " + << line << std::endl; + return false; + } + for (int n = 0; n < NumSpec; ++n) { + if (!(iss >> row.trunc_err_xn[n])) { + std::cout << "ERROR: missing species truncation fields in saved-state " + "file row: " + << line << std::endl; + return false; + } + } + + if (row.zone < 0 || row.zone >= n_zones) { + std::cout << "ERROR: zone index " << row.zone + << " is out of bounds for n_zones = " << n_zones + << std::endl; + return false; + } + row.present = true; + saved[row.zone] = row; + ++parsed_rows; + } + + std::cout << "Loaded " << parsed_rows << " zone rows from " + << compare_final_state_file << std::endl; + + int missing_rows = 0; + int zones_failed = 0; + int zones_passed = 0; + amrex::Real max_saved_err_units = 0.0_rt; + int max_saved_err_zone = -1; + std::string max_saved_err_field = "none"; + + for (int iz = 0; iz < n_zones; ++iz) { + if (!saved[iz].present) { + ++missing_rows; + ++zones_failed; + std::cout << "zone " << iz << " : FAIL (missing from saved file)" + << std::endl; + continue; + } + + const auto &ref = saved[iz]; + std::vector mismatches; + + const bool cur_success = + compare_states[iz].success && compare_states[iz].e > 0.0_rt; + const bool ref_success = (ref.success != 0); + if (cur_success != ref_success) { + mismatches.push_back("success"); + } + + if (!within_tol(compare_times[iz], ref.time)) { + mismatches.push_back("time"); + } + if (!within_tol(compare_states[iz].rho, ref.rho)) { + mismatches.push_back("rho"); + } + + auto within_saved_err = [&](amrex::Real cur, amrex::Real reference, + amrex::Real saved_err) { + const amrex::Real threshold = 2.0_rt * std::abs(saved_err); + return std::abs(cur - reference) <= threshold; + }; + auto update_saved_err_units = [&](const std::string &field, + amrex::Real cur, + amrex::Real reference, + amrex::Real saved_err) { + const amrex::Real diff = std::abs(cur - reference); + amrex::Real units = 0.0_rt; + if (std::abs(saved_err) > 0.0_rt) { + units = diff / std::abs(saved_err); + } else if (diff > 0.0_rt) { + units = std::numeric_limits::infinity(); + } + if (units > max_saved_err_units) { + max_saved_err_units = units; + max_saved_err_zone = iz; + max_saved_err_field = field; + } + }; + + if (ref.trunc_valid != 0) { + update_saved_err_units("T", compare_states[iz].T, ref.T, + ref.trunc_err_T); + if (!within_saved_err(compare_states[iz].T, ref.T, ref.trunc_err_T)) { + mismatches.push_back("T"); + } + update_saved_err_units("e", compare_states[iz].e, ref.e, + ref.trunc_err_e); + if (!within_saved_err(compare_states[iz].e, ref.e, ref.trunc_err_e)) { + mismatches.push_back("e"); + } + for (int n = 0; n < NumSpec; ++n) { + const std::string species_field = + "xn(" + std::string(short_spec_names_cxx[n]) + ")"; + update_saved_err_units(species_field, compare_states[iz].xn[n], + ref.xn[n], ref.trunc_err_xn[n]); + if (!within_saved_err(compare_states[iz].xn[n], ref.xn[n], + ref.trunc_err_xn[n])) { + mismatches.push_back(species_field); + } + } + } else { + if (!within_tol(compare_states[iz].T, ref.T)) { + mismatches.push_back("T"); + } + if (!within_tol(compare_states[iz].e, ref.e)) { + mismatches.push_back("e"); + } + for (int n = 0; n < NumSpec; ++n) { + if (!within_tol(compare_states[iz].xn[n], ref.xn[n])) { + mismatches.push_back("xn(" + std::string(short_spec_names_cxx[n]) + + ")"); + } + } + } + + if (mismatches.empty()) { + ++zones_passed; + std::cout << "zone " << iz << " : PASS" << std::endl; + } else { + ++zones_failed; + std::cout << "zone " << iz << " : FAIL ["; + for (int i = 0; i < static_cast(mismatches.size()); ++i) { + if (i > 0) { + std::cout << ", "; + } + std::cout << mismatches[i]; + } + std::cout << "]" << std::endl; + } + } + + std::cout << "Saved-state comparison summary: passed=" << zones_passed + << ", failed=" << zones_failed + << ", missing_rows=" << missing_rows << std::endl; + if (max_saved_err_zone >= 0) { + std::cout << "Largest saved-state difference in truncation-error units: " + << std::fixed << std::setprecision(3) << max_saved_err_units + << std::defaultfloat << " (zone " << max_saved_err_zone + << ", field " << max_saved_err_field << ")" << std::endl; + } + return (compare_backend_success && zones_failed == 0); + } + auto report_richardson = [&](const std::string &label, const std::vector &l0, const std::vector &l1, @@ -1087,12 +1304,6 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu) -> int { } } - auto within_tol = [&](amrex::Real a, amrex::Real b) -> bool { - amrex::Real diff = std::abs(a - b); - amrex::Real scale = std::max(std::abs(a), std::abs(b)); - return diff <= (abs_tol + rel_tol * scale); - }; - struct MismatchStats { int count = 0; amrex::Real max_abs_diff = 0.0_rt; diff --git a/unit_test/burn_cell_primordial_chem/burn_cell_final_state.txt b/unit_test/burn_cell_primordial_chem/burn_cell_final_state.txt new file mode 100644 index 0000000000..c92688e922 --- /dev/null +++ b/unit_test/burn_cell_primordial_chem/burn_cell_final_state.txt @@ -0,0 +1,3 @@ +# burn_cell_primordial_chem final state (CPU, tolerance level 1) +# columns: zone time rho T e success xn_E xn_Hp xn_H xn_Hm xn_Dp xn_D xn_H2p xn_Dm xn_H2 xn_HDp xn_HD xn_HEpp xn_HEp xn_HE truncation_valid truncation_err_T truncation_err_e truncation_err_xn_E truncation_err_xn_Hp truncation_err_xn_H truncation_err_xn_Hm truncation_err_xn_Dp truncation_err_xn_D truncation_err_xn_H2p truncation_err_xn_Dm truncation_err_xn_H2 truncation_err_xn_HDp truncation_err_xn_HD truncation_err_xn_HEpp truncation_err_xn_HEp truncation_err_xn_HE +0 2.9012387103716860e+15 1.9961120914752843e-06 3.0220039253542591e+03 2.7057252655131058e+11 1 2.1893650918390576e+04 2.1876795602568498e+04 1.6413848966501424e+17 2.0318005106263142e+00 1.0126163773370691e-13 4.8526995588950339e-01 1.8887116332697953e+01 5.6097856365484663e-18 3.7322447569746246e+17 8.4238862248256666e-21 5.0548695829416816e+00 4.6969874683631905e-60 6.4625573688153329e-12 7.0563330696716992e+16 1 1.9382470267505686e-01 2.1908306639547002e+07 6.2134551579068166e+01 6.2090238616582923e+01 8.6552519123699500e+13 5.7594400197570356e-03 8.3625042756144710e-13 4.0429234333147477e+00 5.0072163679372422e-02 4.6310926407628473e-17 -4.3279217491960219e+13 6.9550109718833353e-20 4.2214669845038465e+01 3.6058714783733737e-61 3.1929064523884894e-14 1.7938150956989748e+09 diff --git a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem index 69bbab0532..e47c409fd8 100644 --- a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem +++ b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem @@ -67,7 +67,7 @@ integrator.X_reject_buffer = 1e100 # Set which jacobian to use # 1 = analytic jacobian # 2 = numerical jacobian -integrator.jacobian = 2 +integrator.jacobian = 1 # do you want to normalize abundances within VODE? (you don't!) integrator.renormalize_abundances = 0 diff --git a/unit_test/burn_cell_primordial_chem/main.cpp b/unit_test/burn_cell_primordial_chem/main.cpp index 6647105a90..b86109f450 100644 --- a/unit_test/burn_cell_primordial_chem/main.cpp +++ b/unit_test/burn_cell_primordial_chem/main.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -15,7 +16,40 @@ using namespace amrex; int main(int argc, char *argv[]) { - amrex::Initialize(argc, argv); + std::string compare_final_state_file; + std::vector amrex_argv; + amrex_argv.reserve(static_cast(argc)); + amrex_argv.push_back(argv[0]); + + for (int i = 1; i < argc; ++i) { + std::string arg(argv[i]); + if (arg == "--compare-final-state") { + if (i + 1 >= argc) { + std::cerr << "ERROR: --compare-final-state requires a file path" + << std::endl; + return 1; + } + compare_final_state_file = argv[++i]; + continue; + } + + constexpr const char *prefix = "--compare-final-state="; + if (arg.rfind(prefix, 0) == 0) { + compare_final_state_file = arg.substr(std::strlen(prefix)); + if (compare_final_state_file.empty()) { + std::cerr << "ERROR: --compare-final-state requires a non-empty file path" + << std::endl; + return 1; + } + continue; + } + + amrex_argv.push_back(argv[i]); + } + + int amrex_argc = static_cast(amrex_argv.size()); + char **amrex_argv_ptr = amrex_argv.data(); + amrex::Initialize(amrex_argc, amrex_argv_ptr); int success = 0; { @@ -47,7 +81,12 @@ int main(int argc, char *argv[]) { #endif std::cout << "\nstarting the multi-zone burn..." << std::endl; - int multi_success = burn_cell_multi_c(n_zones, enable_gpu); + if (!compare_final_state_file.empty()) { + std::cout << "saved-state comparison file: " << compare_final_state_file + << std::endl; + } + int multi_success = + burn_cell_multi_c(n_zones, enable_gpu, compare_final_state_file); success = multi_success; } From ad6bbbecfab07f3b1806b18f655557df3e7ed2f9 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Sat, 7 Feb 2026 14:30:09 -0500 Subject: [PATCH 40/50] workaround --- integration/utils/jacobian_utilities.H | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/integration/utils/jacobian_utilities.H b/integration/utils/jacobian_utilities.H index 291fc939fd..52cfb299e2 100644 --- a/integration/utils/jacobian_utilities.H +++ b/integration/utils/jacobian_utilities.H @@ -2,7 +2,8 @@ #define JACOBIAN_UTILITIES_H #ifndef AMREX_USE_GPU -#include +#include +#include #endif #include @@ -31,20 +32,20 @@ print_jacobian (const T& jac) { constexpr int int_neqs = integrator_neqs(); - std::cout << std::format("{:9} ", "*"); + std::cout << std::setw(9) << "*" << " "; for (int i = 1; i <= NumSpec; ++i) { - std::cout << std::format("{:9} ", short_spec_names_cxx[i-1]); + std::cout << std::setw(9) << short_spec_names_cxx[i-1] << " "; } - std::cout << std::format("{:9} ", "e") << std::endl; + std::cout << std::setw(9) << "e" << std::endl; for (int j = 1; j <= int_neqs; ++j) { if (j < int_neqs) { - std::cout << std::format("{:5} ", short_spec_names_cxx[j-1]); + std::cout << std::setw(5) << short_spec_names_cxx[j-1] << " "; } else { - std::cout << std::format("{:5} ", "e"); + std::cout << std::setw(5) << "e" << " "; } for (int i = 1; i <= int_neqs; ++i) { - std::cout << std::format("{:9.3g} ", jac.get(i, j)); + std::cout << std::setw(9) << std::setprecision(3) << jac.get(i, j) << " "; } std::cout << std::endl; } From ec86dd34bb836c5e34a42a4813078abcd235c233 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Sat, 7 Feb 2026 14:41:24 -0500 Subject: [PATCH 41/50] rewrite to avoid nvcc limitation --- .../burn_cell_primordial_chem/burn_cell.H | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index 25d8aec224..db5174659f 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -33,6 +33,23 @@ namespace { using Complex = std::complex; using ComplexMatrix = std::vector>; +struct BurnZoneKernel { + burn_t *states_ptr; + const amrex::Real *dt_ptr; + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + void operator()(int iz) const noexcept { + amrex::Real dt = dt_ptr[iz]; + if (dt <= 0.0_rt) { + return; + } + burn_t &state = states_ptr[iz]; + state.time = 0.0_rt; + state.suppress_failure_output = true; + burner(state, dt); + } +}; + AMREX_INLINE auto identity_complex_matrix(int n) -> ComplexMatrix { ComplexMatrix I(n, std::vector(n, Complex(0.0_rt, 0.0_rt))); @@ -513,15 +530,8 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, amrex::Gpu::streamSynchronize(); } else { burn_t *states_ptr = states_h.data(); - amrex::Real *dt_ptr = dt_h.data(); - amrex::ParallelForOMP( - n_zones, [states_ptr, dt_ptr, &burn_backend](int iz) noexcept { - amrex::Real dt = dt_ptr[iz]; - if (dt <= 0.0_rt) { - return; - } - burn_backend(states_ptr[iz], dt); - }); + const amrex::Real *dt_ptr = dt_h.data(); + amrex::ParallelForOMP(n_zones, BurnZoneKernel{states_ptr, dt_ptr}); } for (int iz = 0; iz < n_zones; ++iz) { From 6a965896ddb1cce3507a18941f1dca93ca5752ec Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Sat, 7 Feb 2026 14:45:57 -0500 Subject: [PATCH 42/50] add -latomic on aarch64 --- unit_test/burn_cell_primordial_chem/GNUmakefile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/unit_test/burn_cell_primordial_chem/GNUmakefile b/unit_test/burn_cell_primordial_chem/GNUmakefile index bb3ab88f60..9d944ad669 100644 --- a/unit_test/burn_cell_primordial_chem/GNUmakefile +++ b/unit_test/burn_cell_primordial_chem/GNUmakefile @@ -40,6 +40,14 @@ Blocs := . include $(MICROPHYSICS_HOME)/unit_test/Make.unit_test +ifeq ($(USE_CUDA),TRUE) +ifeq ($(shell uname -s),Linux) +ifeq ($(shell uname -m),aarch64) +override XTRALIBS += -latomic +endif +endif +endif + ifeq ($(shell uname),Darwin) AMREX_LINKER := ./post_link_install_name.sh $(AMREX_LINKER) endif From 61e921db758b971f618031f48a83acbe1cf36db9 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Sat, 7 Feb 2026 14:50:54 -0500 Subject: [PATCH 43/50] try to fix atomics on aarch64 CUDA --- unit_test/burn_cell_primordial_chem/GNUmakefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/unit_test/burn_cell_primordial_chem/GNUmakefile b/unit_test/burn_cell_primordial_chem/GNUmakefile index 9d944ad669..3995a61a79 100644 --- a/unit_test/burn_cell_primordial_chem/GNUmakefile +++ b/unit_test/burn_cell_primordial_chem/GNUmakefile @@ -43,6 +43,9 @@ include $(MICROPHYSICS_HOME)/unit_test/Make.unit_test ifeq ($(USE_CUDA),TRUE) ifeq ($(shell uname -s),Linux) ifeq ($(shell uname -m),aarch64) +# NVTX3 can trigger GCC outlined atomics on AArch64; make link+codegen robust. +EXTRACXXFLAGS += -mno-outline-atomics +NVCC_FLAGS += -Xcompiler -mno-outline-atomics override XTRALIBS += -latomic endif endif From 561cb6b5dd13e65c9185141c14ab2813238144c6 Mon Sep 17 00:00:00 2001 From: Benjamin Wibking Date: Sat, 7 Feb 2026 13:57:23 -0600 Subject: [PATCH 44/50] add batch script --- .../burn_cell_primordial_chem/dtai-1gpu.submit | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 unit_test/burn_cell_primordial_chem/dtai-1gpu.submit diff --git a/unit_test/burn_cell_primordial_chem/dtai-1gpu.submit b/unit_test/burn_cell_primordial_chem/dtai-1gpu.submit new file mode 100644 index 0000000000..24719650b0 --- /dev/null +++ b/unit_test/burn_cell_primordial_chem/dtai-1gpu.submit @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --mem=64G +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --ntasks-per-socket=1 +#SBATCH --cpus-per-task=64 +#SBATCH --partition=ghx4 +#SBATCH --time=00:10:00 +#SBATCH --job-name=quokka-1gpu +#SBATCH --account=cvz-dtai-gh +#SBATCH --gpus-per-node=1 +#SBATCH --gpu-bind=verbose,closest + +srun ./main1d.gnu.CUDA.ex inputs_primordial_chem --compare-final-state burn_cell_final_state.txt From 83e40d674aab8a7ffc97ae22ed0239cad33f2ebc Mon Sep 17 00:00:00 2001 From: Benjamin Wibking Date: Sat, 7 Feb 2026 14:06:16 -0600 Subject: [PATCH 45/50] production test settings --- .../burn_cell_final_state.txt | 3 --- .../burn_cell_primordial_chem/dtai-cpu.submit | 14 ++++++++++++++ .../inputs_primordial_chem | 4 ++-- 3 files changed, 16 insertions(+), 5 deletions(-) delete mode 100644 unit_test/burn_cell_primordial_chem/burn_cell_final_state.txt create mode 100644 unit_test/burn_cell_primordial_chem/dtai-cpu.submit diff --git a/unit_test/burn_cell_primordial_chem/burn_cell_final_state.txt b/unit_test/burn_cell_primordial_chem/burn_cell_final_state.txt deleted file mode 100644 index c92688e922..0000000000 --- a/unit_test/burn_cell_primordial_chem/burn_cell_final_state.txt +++ /dev/null @@ -1,3 +0,0 @@ -# burn_cell_primordial_chem final state (CPU, tolerance level 1) -# columns: zone time rho T e success xn_E xn_Hp xn_H xn_Hm xn_Dp xn_D xn_H2p xn_Dm xn_H2 xn_HDp xn_HD xn_HEpp xn_HEp xn_HE truncation_valid truncation_err_T truncation_err_e truncation_err_xn_E truncation_err_xn_Hp truncation_err_xn_H truncation_err_xn_Hm truncation_err_xn_Dp truncation_err_xn_D truncation_err_xn_H2p truncation_err_xn_Dm truncation_err_xn_H2 truncation_err_xn_HDp truncation_err_xn_HD truncation_err_xn_HEpp truncation_err_xn_HEp truncation_err_xn_HE -0 2.9012387103716860e+15 1.9961120914752843e-06 3.0220039253542591e+03 2.7057252655131058e+11 1 2.1893650918390576e+04 2.1876795602568498e+04 1.6413848966501424e+17 2.0318005106263142e+00 1.0126163773370691e-13 4.8526995588950339e-01 1.8887116332697953e+01 5.6097856365484663e-18 3.7322447569746246e+17 8.4238862248256666e-21 5.0548695829416816e+00 4.6969874683631905e-60 6.4625573688153329e-12 7.0563330696716992e+16 1 1.9382470267505686e-01 2.1908306639547002e+07 6.2134551579068166e+01 6.2090238616582923e+01 8.6552519123699500e+13 5.7594400197570356e-03 8.3625042756144710e-13 4.0429234333147477e+00 5.0072163679372422e-02 4.6310926407628473e-17 -4.3279217491960219e+13 6.9550109718833353e-20 4.2214669845038465e+01 3.6058714783733737e-61 3.1929064523884894e-14 1.7938150956989748e+09 diff --git a/unit_test/burn_cell_primordial_chem/dtai-cpu.submit b/unit_test/burn_cell_primordial_chem/dtai-cpu.submit new file mode 100644 index 0000000000..7707eb4ca6 --- /dev/null +++ b/unit_test/burn_cell_primordial_chem/dtai-cpu.submit @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH --mem=64G +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --ntasks-per-socket=1 +#SBATCH --cpus-per-task=64 +#SBATCH --partition=ghx4 +#SBATCH --time=01:00:00 +#SBATCH --job-name=quokka-1gpu +#SBATCH --account=cvz-dtai-gh +#SBATCH --gpus-per-node=1 +#SBATCH --gpu-bind=verbose,closest + +srun ./main1d.gnu.arm-grace.OMP.ex inputs_primordial_chem diff --git a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem index e47c409fd8..fde88be45b 100644 --- a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem +++ b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem @@ -1,5 +1,5 @@ -unit_test.n_zones = 1 -#unit_test.n_zones = 32**3 +#unit_test.n_zones = 1 +unit_test.n_zones = 16**3 integrator.burner_verbose = 0 ## unit_test runtime parameters From 875e816b9cabaa17881e546f8751917d1fb55591 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Mon, 9 Feb 2026 12:36:57 -0500 Subject: [PATCH 46/50] show progress bar --- .../burn_cell_primordial_chem/_parameters | 2 + .../burn_cell_primordial_chem/burn_cell.H | 51 +++++++++++++++++-- .../inputs_primordial_chem | 3 ++ 3 files changed, 51 insertions(+), 5 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/_parameters b/unit_test/burn_cell_primordial_chem/_parameters index 8970ecc2fb..b1e5fab15e 100644 --- a/unit_test/burn_cell_primordial_chem/_parameters +++ b/unit_test/burn_cell_primordial_chem/_parameters @@ -16,6 +16,8 @@ tfirst real 0.0 # number of steps for the single zone burn nsteps int 1000 +# print multi-zone burn progress every N timesteps (<= 0 disables) +burn_progress_interval int 100 # estimate truncation error with three tolerance levels and Richardson extrapolation richardson_enable bool 1 diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index db5174659f..02b40fce7b 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -462,6 +462,45 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, return status; }; + auto precompute_total_steps = [&]() -> int { + auto dd_pc = dd_h; + auto active_pc = active_h; + int steps = 0; + + for (int step = 0; step < unit_test_rp::nsteps; ++step) { + bool any_active = false; + + for (int iz = 0; iz < n_zones; ++iz) { + if (active_pc[iz] == 0) { + continue; + } + + amrex::Real dd1 = dd_pc[iz]; + amrex::Real tff = + std::sqrt(M_PI * 3.0_rt / (32.0_rt * dd1 * grav_constant)); + amrex::Real dt = unit_test_rp::tff_reduc * tff; + amrex::Real dd_new = dd1 + dt * (dd1 / tff); + + if (dt < 10.0_rt || dd_new > 2.0e-6_rt) { + active_pc[iz] = 0; + continue; + } + + dd_pc[iz] = dd_new; + any_active = true; + } + + if (!any_active) { + break; + } + ++steps; + } + + return steps; + }; + + const int total_steps = precompute_total_steps(); + for (int step = 0; step < unit_test_rp::nsteps; ++step) { bool any_active = false; @@ -472,12 +511,8 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, } amrex::Real dd1 = dd_h[iz]; - amrex::Real rhotmp = 0.0_rt; - for (int nn = 0; nn < NumSpec; ++nn) { - rhotmp += states_h[iz].xn[nn] * spmasses[nn]; - } amrex::Real tff = - std::sqrt(M_PI * 3.0_rt / (32.0_rt * rhotmp * grav_constant)); + std::sqrt(M_PI * 3.0_rt / (32.0_rt * dd1 * grav_constant)); amrex::Real dt = unit_test_rp::tff_reduc * tff; amrex::Real dd_new = dd1 + dt * (dd1 / tff); @@ -505,6 +540,12 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, break; } + if (unit_test_rp::burn_progress_interval > 0 && + (step + 1) % unit_test_rp::burn_progress_interval == 0) { + std::cout << "Burn progress: timestep " << (step + 1) << " / " + << total_steps << std::endl; + } + if (use_gpu) { amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, states_h.begin(), states_h.end(), states_d.begin()); diff --git a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem index e47c409fd8..f4569278ac 100644 --- a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem +++ b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem @@ -1,5 +1,6 @@ unit_test.n_zones = 1 #unit_test.n_zones = 32**3 + integrator.burner_verbose = 0 ## unit_test runtime parameters @@ -11,6 +12,8 @@ unit_test.tff_reduc = 0.03 # number of integration steps unit_test.nsteps = 1e4 +unit_test.burn_progress_interval = 10 + unit_test.richardson_enable = 1 unit_test.richardson_tol_factor = 10.0 # set to 1 to print per-zone Richardson estimates From cbfd1a4d0e9e011e76de17ed39be17add6d5697b Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Mon, 9 Feb 2026 12:55:24 -0500 Subject: [PATCH 47/50] fix CPU OMP scaling --- .../burn_cell_primordial_chem/_parameters | 3 + .../burn_cell_primordial_chem/burn_cell.H | 90 ++++++++++++++----- .../inputs_primordial_chem | 2 + 3 files changed, 73 insertions(+), 22 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/_parameters b/unit_test/burn_cell_primordial_chem/_parameters index b1e5fab15e..0d0d88c74e 100644 --- a/unit_test/burn_cell_primordial_chem/_parameters +++ b/unit_test/burn_cell_primordial_chem/_parameters @@ -26,6 +26,9 @@ richardson_print_per_zone bool 0 # non-trace threshold as a fraction of the most abundant species in each zone richardson_trace_species_cutoff real 0.0 +# compute per-zone Jacobian stiffness diagnostics (expensive on CPU) +compute_stiffness_diagnostics bool 0 + # initial temperature temperature real 1e2 diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index 02b40fce7b..6bb57913e7 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -371,6 +371,7 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, struct MultiZoneSummary { int failures = 0; int retries = 0; + bool stiffness_enabled = false; amrex::Real t_min = std::numeric_limits::max(); amrex::Real t_max = -std::numeric_limits::max(); amrex::Real stiffness_ratio_min = std::numeric_limits::max(); @@ -390,6 +391,7 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, std::vector active_h(n_zones, 1); summary = MultiZoneSummary{}; + summary.stiffness_enabled = unit_test_rp::compute_stiffness_diagnostics; int retry_total = 0; for (int iz = 0; iz < n_zones; ++iz) { @@ -468,8 +470,10 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, int steps = 0; for (int step = 0; step < unit_test_rp::nsteps; ++step) { - bool any_active = false; - + int any_active_int = 0; +#ifdef AMREX_USE_OMP +#pragma omp parallel for schedule(dynamic, 64) reduction(max : any_active_int) +#endif for (int iz = 0; iz < n_zones; ++iz) { if (active_pc[iz] == 0) { continue; @@ -487,9 +491,10 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, } dd_pc[iz] = dd_new; - any_active = true; + any_active_int = 1; } + const bool any_active = (any_active_int != 0); if (!any_active) { break; } @@ -502,8 +507,10 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, const int total_steps = precompute_total_steps(); for (int step = 0; step < unit_test_rp::nsteps; ++step) { - bool any_active = false; - + int any_active_int = 0; +#ifdef AMREX_USE_OMP +#pragma omp parallel for schedule(dynamic, 64) reduction(max : any_active_int) +#endif for (int iz = 0; iz < n_zones; ++iz) { if (active_h[iz] == 0) { dt_h[iz] = 0.0_rt; @@ -533,9 +540,10 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, dd_h[iz] = dd_new; dt_h[iz] = dt; - any_active = true; + any_active_int = 1; } + const bool any_active = (any_active_int != 0); if (!any_active) { break; } @@ -572,9 +580,32 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, } else { burn_t *states_ptr = states_h.data(); const amrex::Real *dt_ptr = dt_h.data(); - amrex::ParallelForOMP(n_zones, BurnZoneKernel{states_ptr, dt_ptr}); +#ifdef AMREX_USE_OMP +#pragma omp parallel for schedule(dynamic, 32) +#endif + for (int iz = 0; iz < n_zones; ++iz) { + amrex::Real dt = dt_ptr[iz]; + if (dt <= 0.0_rt) { + continue; + } + burn_t &state = states_ptr[iz]; + state.time = 0.0_rt; + state.suppress_failure_output = true; + burner(state, dt); + } } + int retry_total_step = 0; + amrex::Real stiffness_ratio_min_step = + std::numeric_limits::max(); + amrex::Real stiffness_ratio_max_step = 0.0_rt; + int stiffness_samples_step = 0; + +#ifdef AMREX_USE_OMP +#pragma omp parallel for schedule(dynamic, 32) \ + reduction(+ : retry_total_step, stiffness_samples_step) \ + reduction(min : stiffness_ratio_min_step) reduction(max : stiffness_ratio_max_step) +#endif for (int iz = 0; iz < n_zones; ++iz) { if (dt_h[iz] <= 0.0_rt) { continue; @@ -585,7 +616,7 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, if (!state.success || state.e <= 0.0_rt) { burn_t retry_state = states_before[iz]; retry_state.suppress_failure_output = false; - ++retry_total; + ++retry_total_step; if (integrate_with_retries(retry_state, dt_h[iz], 0)) { state = retry_state; } else { @@ -599,16 +630,27 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, state.suppress_failure_output = false; enforce_post_burn_constraints(state); - const auto stiff = compute_jacobian_stiffness(state); - if (stiff.valid) { - summary.stiffness_ratio_min = - std::min(summary.stiffness_ratio_min, stiff.ratio); - summary.stiffness_ratio_max = - std::max(summary.stiffness_ratio_max, stiff.ratio); - ++summary.stiffness_samples; + if (summary.stiffness_enabled) { + const auto stiff = compute_jacobian_stiffness(state); + if (stiff.valid) { + stiffness_ratio_min_step = + std::min(stiffness_ratio_min_step, stiff.ratio); + stiffness_ratio_max_step = + std::max(stiffness_ratio_max_step, stiff.ratio); + ++stiffness_samples_step; + } } time_h[iz] += dt_h[iz]; } + + retry_total += retry_total_step; + if (summary.stiffness_enabled && stiffness_samples_step > 0) { + summary.stiffness_ratio_min = + std::min(summary.stiffness_ratio_min, stiffness_ratio_min_step); + summary.stiffness_ratio_max = + std::max(summary.stiffness_ratio_max, stiffness_ratio_max_step); + summary.stiffness_samples += stiffness_samples_step; + } } summary.failures = 0; @@ -644,14 +686,18 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, << summary.t_min << ", " << summary.t_max << "]" << std::endl; std::cout << " retries applied : " << summary.retries << std::endl; - if (summary.stiffness_samples > 0) { - std::cout << " stiffness ratio range : [" - << std::scientific << summary.stiffness_ratio_min << ", " - << summary.stiffness_ratio_max << "]" - << std::defaultfloat << " (samples=" - << summary.stiffness_samples << ")" << std::endl; + if (summary.stiffness_enabled) { + if (summary.stiffness_samples > 0) { + std::cout << " stiffness ratio range : [" + << std::scientific << summary.stiffness_ratio_min << ", " + << summary.stiffness_ratio_max << "]" + << std::defaultfloat << " (samples=" + << summary.stiffness_samples << ")" << std::endl; + } else { + std::cout << " stiffness ratio range : unavailable" << std::endl; + } } else { - std::cout << " stiffness ratio range : unavailable" << std::endl; + std::cout << " stiffness diagnostics : disabled" << std::endl; } } else { std::cout << " status : no successful zones" diff --git a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem index 387bdb9876..694144ec93 100644 --- a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem +++ b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem @@ -19,6 +19,8 @@ unit_test.richardson_tol_factor = 10.0 unit_test.richardson_print_per_zone = 0 # non-trace threshold as fraction of most abundant species per zone unit_test.richardson_trace_species_cutoff = 1e-15 +# set to 1 to compute per-zone Jacobian stiffness diagnostics (expensive) +unit_test.compute_stiffness_diagnostics = 0 # max total time unit_test.tmax = 7.e20 From 388f91a613de6828dbca41bc8fc5dc9b202c4ab8 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Mon, 9 Feb 2026 13:52:25 -0500 Subject: [PATCH 48/50] use numerical jacobian --- .../burn_cell_primordial_chem/burn_cell.H | 56 +++++++++++-------- .../inputs_primordial_chem | 6 +- 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index 6bb57913e7..e69719bae7 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -735,6 +735,11 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, !compare_against_saved && unit_test_rp::richardson_enable && (tol_factor > 1.0_rt); const int n_tol_levels = do_richardson ? 3 : 1; + const std::array richardson_scales = { + 1.0_rt, 1.0_rt / tol_factor, tol_factor}; + constexpr int richardson_base_level = 0; + constexpr int richardson_loose_level = 1; + constexpr int richardson_tight_level = 2; const bool compare_backend_is_gpu = compare_against_saved && enable_gpu; const bool run_gpu_backend = compare_against_saved ? compare_backend_is_gpu : enable_gpu; @@ -781,7 +786,7 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, bool gpu_success = true; for (int ilev = 0; ilev < n_tol_levels; ++ilev) { - amrex::Real scale = std::pow(tol_factor, static_cast(ilev)); + amrex::Real scale = do_richardson ? richardson_scales[ilev] : 1.0_rt; level_results[ilev].scale = scale; apply_tolerance_scale(scale); @@ -871,11 +876,11 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, amrex::Real delta01 = 0.0_rt; }; - auto estimate_richardson = [&](amrex::Real y0, amrex::Real y1, - amrex::Real y2) -> RichardsonEstimate { + auto estimate_richardson = [&](amrex::Real y_loose, amrex::Real y_base, + amrex::Real y_tight) -> RichardsonEstimate { RichardsonEstimate est; - const amrex::Real d01 = y0 - y1; - const amrex::Real d12 = y1 - y2; + const amrex::Real d01 = y_loose - y_base; + const amrex::Real d12 = y_base - y_tight; est.delta01 = d01; constexpr amrex::Real tiny = 1.0e-300_rt; @@ -904,7 +909,7 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, return est; } - amrex::Real denom = 1.0_rt - std::pow(tol_factor, -est.p); + amrex::Real denom = std::pow(tol_factor, est.p) - 1.0_rt; if (std::abs(denom) <= tiny || !std::isfinite(denom)) { est.err = d01; return est; @@ -1149,8 +1154,11 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, }; std::cout << label << " Richardson truncation estimate:" << std::endl; - std::cout << " levels with scales : [1, " << tol_factor << ", " - << (tol_factor * tol_factor) << "]" << std::endl; + std::cout << " levels with scales : [" + << richardson_scales[richardson_loose_level] << ", " + << richardson_scales[richardson_base_level] << ", " + << richardson_scales[richardson_tight_level] << "]" + << std::endl; for (int iz = 0; iz < n_zones; ++iz) { const bool ok0 = l0[iz].success && l0[iz].e > 0.0_rt; @@ -1172,9 +1180,10 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, } max_abs_temp_err = std::max(max_abs_temp_err, std::abs(t_est.err)); max_abs_energy_err = std::max(max_abs_energy_err, std::abs(e_est.err)); - max_rel_temp_err = std::max(max_rel_temp_err, rel_error(t_est.err, l2[iz].T)); + max_rel_temp_err = + std::max(max_rel_temp_err, rel_error(t_est.err, l1[iz].T)); max_rel_energy_err = - std::max(max_rel_energy_err, rel_error(e_est.err, l2[iz].e)); + std::max(max_rel_energy_err, rel_error(e_est.err, l1[iz].e)); amrex::Real zone_species_linf = 0.0_rt; amrex::Real zone_rel_species_linf = 0.0_rt; @@ -1182,16 +1191,16 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, bool zone_first_species = true; amrex::Real zone_max_xn = 0.0_rt; for (int n = 0; n < NumSpec; ++n) { - zone_max_xn = std::max(zone_max_xn, std::abs(l2[iz].xn[n])); + zone_max_xn = std::max(zone_max_xn, std::abs(l1[iz].xn[n])); } const amrex::Real zone_trace_cutoff = trace_rel_cutoff * zone_max_xn; for (int n = 0; n < NumSpec; ++n) { const RichardsonEstimate xn_est = estimate_richardson(l0[iz].xn[n], l1[iz].xn[n], l2[iz].xn[n]); zone_species_linf = std::max(zone_species_linf, std::abs(xn_est.err)); - if (std::abs(l2[iz].xn[n]) >= zone_trace_cutoff) { + if (std::abs(l1[iz].xn[n]) >= zone_trace_cutoff) { zone_rel_species_linf = std::max( - zone_rel_species_linf, rel_error(xn_est.err, l2[iz].xn[n])); + zone_rel_species_linf, rel_error(xn_est.err, l1[iz].xn[n])); ++species_rel_count; ++species_rel_hit_count[n]; if (!zone_first_species) { @@ -1260,8 +1269,9 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, }; if (do_richardson && n_tol_levels == 3) { - report_richardson("CPU", level_results[0].cpu.states, - level_results[1].cpu.states, level_results[2].cpu.states); + report_richardson("CPU", level_results[richardson_loose_level].cpu.states, + level_results[richardson_base_level].cpu.states, + level_results[richardson_tight_level].cpu.states); } struct ZoneTruncationEstimate { @@ -1274,9 +1284,9 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, std::vector cpu_zone_truncation; if (do_richardson && n_tol_levels == 3) { cpu_zone_truncation.resize(n_zones); - const auto &cpu_l0 = level_results[0].cpu.states; - const auto &cpu_l1 = level_results[1].cpu.states; - const auto &cpu_l2 = level_results[2].cpu.states; + const auto &cpu_l0 = level_results[richardson_loose_level].cpu.states; + const auto &cpu_l1 = level_results[richardson_base_level].cpu.states; + const auto &cpu_l2 = level_results[richardson_tight_level].cpu.states; for (int iz = 0; iz < n_zones; ++iz) { auto &est = cpu_zone_truncation[iz]; @@ -1361,9 +1371,9 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, std::vector cpu_zone_thresholds; if (use_richardson_gpu_failure) { cpu_zone_thresholds.resize(n_zones); - const auto &cpu_l0 = level_results[0].cpu.states; - const auto &cpu_l1 = level_results[1].cpu.states; - const auto &cpu_l2 = level_results[2].cpu.states; + const auto &cpu_l0 = level_results[richardson_loose_level].cpu.states; + const auto &cpu_l1 = level_results[richardson_base_level].cpu.states; + const auto &cpu_l2 = level_results[richardson_tight_level].cpu.states; const amrex::Real trace_rel_cutoff = amrex::max(unit_test_rp::richardson_trace_species_cutoff, 0.0_rt); @@ -1386,7 +1396,7 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, amrex::Real zone_max_xn = 0.0_rt; for (int n = 0; n < NumSpec; ++n) { - zone_max_xn = std::max(zone_max_xn, std::abs(cpu_l2[iz].xn[n])); + zone_max_xn = std::max(zone_max_xn, std::abs(cpu_l1[iz].xn[n])); } const amrex::Real zone_trace_cutoff = trace_rel_cutoff * zone_max_xn; @@ -1396,7 +1406,7 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, cpu_l2[iz].xn[n]) .err); thr.species_non_trace[n] = - (std::abs(cpu_l2[iz].xn[n]) >= zone_trace_cutoff) ? 1 : 0; + (std::abs(cpu_l1[iz].xn[n]) >= zone_trace_cutoff) ? 1 : 0; } } } diff --git a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem index 694144ec93..8726853055 100644 --- a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem +++ b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem @@ -1,5 +1,5 @@ -#unit_test.n_zones = 1 -unit_test.n_zones = 16**3 +unit_test.n_zones = 1 +#unit_test.n_zones = 16**3 integrator.burner_verbose = 0 ## unit_test runtime parameters @@ -71,7 +71,7 @@ integrator.X_reject_buffer = 1e100 # Set which jacobian to use # 1 = analytic jacobian # 2 = numerical jacobian -integrator.jacobian = 1 +integrator.jacobian = 2 # do you want to normalize abundances within VODE? (you don't!) integrator.renormalize_abundances = 0 From 269e110a85377dcf4b2857792214a9a26992f614 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Mon, 9 Feb 2026 13:52:37 -0500 Subject: [PATCH 49/50] fix linking on macos --- unit_test/burn_cell_primordial_chem/GNUmakefile | 2 +- unit_test/burn_cell_primordial_chem/post_link_install_name.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/GNUmakefile b/unit_test/burn_cell_primordial_chem/GNUmakefile index 3995a61a79..3bb548690b 100644 --- a/unit_test/burn_cell_primordial_chem/GNUmakefile +++ b/unit_test/burn_cell_primordial_chem/GNUmakefile @@ -10,7 +10,7 @@ COMP = gnu EXTRACXXFLAGS += -Wno-psabi USE_MPI = FALSE -USE_OMP = FALSE +USE_OMP = TRUE #set USE_CUDA to TRUE to compile and run on GPUs USE_CUDA = FALSE USE_REACT = TRUE diff --git a/unit_test/burn_cell_primordial_chem/post_link_install_name.sh b/unit_test/burn_cell_primordial_chem/post_link_install_name.sh index 39a7b74e0f..982ebe4978 100755 --- a/unit_test/burn_cell_primordial_chem/post_link_install_name.sh +++ b/unit_test/burn_cell_primordial_chem/post_link_install_name.sh @@ -38,7 +38,7 @@ fi target_root="/opt/zerobrew/prefix/lib/gcc/current" -for lib in libgfortran.5.dylib libquadmath.0.dylib libstdc++.6.dylib; do +for lib in libgfortran.5.dylib libquadmath.0.dylib libstdc++.6.dylib libgomp.1.dylib; do new_path="${target_root}/${lib}" if [ ! -e "$new_path" ]; then continue From f9d293d38df90e1c0b2d32cfe4d517e750466022 Mon Sep 17 00:00:00 2001 From: Ben Wibking Date: Mon, 9 Feb 2026 14:43:20 -0500 Subject: [PATCH 50/50] use less aggressive tolerance spacing --- .../burn_cell_primordial_chem/burn_cell.H | 18 +++++++++--------- .../inputs_primordial_chem | 3 +-- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/unit_test/burn_cell_primordial_chem/burn_cell.H b/unit_test/burn_cell_primordial_chem/burn_cell.H index e69719bae7..bf8c56d65b 100644 --- a/unit_test/burn_cell_primordial_chem/burn_cell.H +++ b/unit_test/burn_cell_primordial_chem/burn_cell.H @@ -728,18 +728,18 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, const amrex::Real base_retry_rtol_enuc = integrator_rp::retry_rtol_enuc; const amrex::Real base_retry_atol_enuc = integrator_rp::retry_atol_enuc; - const amrex::Real tol_factor = - amrex::max(unit_test_rp::richardson_tol_factor, 1.0_rt); const bool compare_against_saved = !compare_final_state_file.empty(); const bool do_richardson = - !compare_against_saved && unit_test_rp::richardson_enable && - (tol_factor > 1.0_rt); + !compare_against_saved && unit_test_rp::richardson_enable; const int n_tol_levels = do_richardson ? 3 : 1; const std::array richardson_scales = { - 1.0_rt, 1.0_rt / tol_factor, tol_factor}; - constexpr int richardson_base_level = 0; - constexpr int richardson_loose_level = 1; + 0.5_rt, 1.0_rt, 2.0_rt}; + constexpr int richardson_loose_level = 0; + constexpr int richardson_base_level = 1; constexpr int richardson_tight_level = 2; + const amrex::Real richardson_ratio = + richardson_scales[richardson_tight_level] / + richardson_scales[richardson_base_level]; const bool compare_backend_is_gpu = compare_against_saved && enable_gpu; const bool run_gpu_backend = compare_against_saved ? compare_backend_is_gpu : enable_gpu; @@ -903,13 +903,13 @@ auto burn_cell_multi_c(int n_zones, bool enable_gpu, return est; } - est.p = std::log(ratio) / std::log(tol_factor); + est.p = std::log(ratio) / std::log(richardson_ratio); if (!std::isfinite(est.p)) { est.err = d01; return est; } - amrex::Real denom = std::pow(tol_factor, est.p) - 1.0_rt; + amrex::Real denom = std::pow(richardson_ratio, est.p) - 1.0_rt; if (std::abs(denom) <= tiny || !std::isfinite(denom)) { est.err = d01; return est; diff --git a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem index 8726853055..0e693eed17 100644 --- a/unit_test/burn_cell_primordial_chem/inputs_primordial_chem +++ b/unit_test/burn_cell_primordial_chem/inputs_primordial_chem @@ -1,5 +1,4 @@ -unit_test.n_zones = 1 -#unit_test.n_zones = 16**3 +unit_test.n_zones = 16**3 integrator.burner_verbose = 0 ## unit_test runtime parameters