From 5c212372be561c12838b57af53dcfd086263af33 Mon Sep 17 00:00:00 2001 From: Tobias Hienzsch Date: Sun, 29 Sep 2024 00:25:50 +0200 Subject: [PATCH] [cpp] Add Half, DoubleHalf, DoubleFloat and DoubleDouble to sim2d --- models/Modes2D/Modes2D.py | 2 +- src/cpp/main.cpp | 13 +++-- src/cpp/pffdtd/double.hpp | 12 +--- src/cpp/pffdtd/engine_cpu_2d.cpp | 16 ++++-- src/cpp/pffdtd/engine_sycl_2d.cpp | 91 ++++++++++++++++++++++++++----- src/cpp/pffdtd/precision.hpp | 5 ++ src/python/pffdtd/sim2d/setup.py | 1 - 7 files changed, 105 insertions(+), 35 deletions(-) diff --git a/models/Modes2D/Modes2D.py b/models/Modes2D/Modes2D.py index 1c861c6..1b6ed21 100644 --- a/models/Modes2D/Modes2D.py +++ b/models/Modes2D/Modes2D.py @@ -19,7 +19,7 @@ def model(*, Lx=None, Ly=None, Nx=None, Ny=None, dx=None, X=None, Y=None, in_mas room=(3.65, 6), Tc=20, rh=50, - fmax=1000.0, + fmax=20_000.0, ppw=10.5, duration=8.0, refl_coeff=0.99, diff --git a/src/cpp/main.cpp b/src/cpp/main.cpp index 2e71257..7dac46a 100644 --- a/src/cpp/main.cpp +++ b/src/cpp/main.cpp @@ -35,9 +35,14 @@ namespace { [[nodiscard]] auto precisionOptions() { - return std::map{ - {"32", pffdtd::Precision::Float}, - {"64", pffdtd::Precision::Double}, + using pffdtd::Precision; + return std::map{ + { "16", Precision::Half}, + { "32", Precision::Float}, + { "64", Precision::Double}, + {"16x2", Precision::DoubleHalf}, + {"32x2", Precision::DoubleFloat}, + {"64x2", Precision::DoubleDouble}, }; } @@ -139,7 +144,7 @@ auto main(int argc, char** argv) -> int { sim2d->add_option("-s,--sim_dir", args.sim2d.simDir)->required()->check(CLI::ExistingDirectory); sim2d->add_option("-e,--engine", args.sim2d.engine)->transform(toLower); sim2d->add_option("-o,--out", args.sim2d.out); - sim2d->add_option("-p,--precision", args.sim3d.precision) + sim2d->add_option("-p,--precision", args.sim2d.precision) ->required() ->transform(CLI::CheckedTransformer(precisionOptions(), CLI::ignore_case)); diff --git a/src/cpp/pffdtd/double.hpp b/src/cpp/pffdtd/double.hpp index d5949cc..69c70ae 100644 --- a/src/cpp/pffdtd/double.hpp +++ b/src/cpp/pffdtd/double.hpp @@ -109,21 +109,16 @@ struct Double { [[nodiscard]] static constexpr auto twoProduct(Real x, Real y) noexcept -> Double { auto r = x * y; -#if defined(SYCL_LANGUAGE_VERSION) - auto e = sycl::fma(x, y, -r); - return Double{r, e}; -#else - #if defined(__APPLE__) +#if defined(__APPLE__) or defined(__clang__) if constexpr (std::same_as) { auto e = static_cast<_Float16>(std::fma(float{x}, float{y}, float{-r})); return Double{r, e}; } else - #endif +#endif { auto e = std::fma(x, y, -r); return Double{r, e}; } -#endif } Real _high{static_cast(0)}; @@ -131,6 +126,3 @@ struct Double { }; } // namespace pffdtd - -template -struct std::numeric_limits> : std::numeric_limits {}; diff --git a/src/cpp/pffdtd/engine_cpu_2d.cpp b/src/cpp/pffdtd/engine_cpu_2d.cpp index 47a9f1d..bf0caec 100644 --- a/src/cpp/pffdtd/engine_cpu_2d.cpp +++ b/src/cpp/pffdtd/engine_cpu_2d.cpp @@ -151,12 +151,16 @@ auto run(Simulation2D const& sim) { auto EngineCPU2D::operator()(Simulation2D const& sim, Precision precision) const -> stdex::mdarray> { - if (precision == Precision::Float) { - return run(sim); - } else if (precision == Precision::Double) { - return run(sim); - } else { - raisef("invalid precision {}", static_cast(precision)); + switch (precision) { + case Precision::Float: return run(sim); + case Precision::Double: return run(sim); + case Precision::DoubleFloat: return run>(sim); + case Precision::DoubleDouble: return run>(sim); +#if defined(__APPLE__) or defined(__clang__) + case Precision::Half: return run<_Float16>(sim); + case Precision::DoubleHalf: return run>(sim); +#endif + default: raisef("invalid precision {}", static_cast(precision)); } } diff --git a/src/cpp/pffdtd/engine_sycl_2d.cpp b/src/cpp/pffdtd/engine_sycl_2d.cpp index f0c4784..78a3b5d 100644 --- a/src/cpp/pffdtd/engine_sycl_2d.cpp +++ b/src/cpp/pffdtd/engine_sycl_2d.cpp @@ -18,8 +18,66 @@ namespace pffdtd { namespace { +template +struct AirUpdate; +template +struct BoundaryRigid; +template +struct BoundaryLoss; +template +struct CopyInput; +template +struct CopyOutput; + +template +static constexpr auto min_exponent = std::numeric_limits::min_exponent; + +template +static constexpr auto min_exponent> = min_exponent; + +template<> +static constexpr auto min_exponent<_Float16> = -13; + +template +static constexpr auto max_exponent = std::numeric_limits::max_exponent; + +template +static constexpr auto max_exponent> = max_exponent; + +template<> +static constexpr auto max_exponent<_Float16> = 16; + [[nodiscard]] constexpr auto to_ixy(auto x, auto y, auto /*Nx*/, auto Ny) { return x * Ny + y; } +template +auto scaleInput(std::vector const& buf) { + static constexpr auto min_exp = static_cast(min_exponent); + static constexpr auto max_exp = static_cast(max_exponent); + fmt::println("min_exp = {}, min_exp = {}", min_exp, max_exp); + + auto const aexp = 0.5; + auto const pow2 = static_cast(std::round(aexp * max_exp + (1 - aexp) * min_exp)); + auto const norm1 = std::pow(2.0, pow2); + + auto const max_in = *std::ranges::max_element(buf, [](auto l, auto r) { return std::fabs(l) < std::fabs(r); }); + auto const inv_infac = norm1 / max_in; + auto const infac = 1.0 / inv_infac; + + std::printf( + "max_in = %.16e, pow2 = %d, norm1 = %.16e, inv_infac = %.16e, infac = " + "%.16e\n", + max_in, + pow2, + norm1, + inv_infac, + infac + ); + + auto buf_out = buf; + std::ranges::transform(buf_out, buf_out.begin(), [=](auto v) { return v * inv_infac; }); + return std::pair{buf_out, infac}; +} + template auto run(Simulation2D const& sim) { summary(sim); @@ -37,6 +95,8 @@ auto run(Simulation2D const& sim) { auto device = queue.get_device(); summary(device); + auto const [in_sigs_scaled, infac] = scaleInput(sim.in_sigs); + auto u0_buf = sycl::buffer(sycl::range<1>(Npts)); auto u1_buf = sycl::buffer(sycl::range<1>(Npts)); auto u2_buf = sycl::buffer(sycl::range<1>(Npts)); @@ -45,7 +105,7 @@ auto run(Simulation2D const& sim) { auto in_mask_buf = sycl::buffer{sim.in_mask}; auto bn_ixy_buf = sycl::buffer{sim.bn_ixy}; auto adj_bn_buf = sycl::buffer{sim.adj_bn}; - auto in_sigs_buf = sycl::buffer{sim.in_sigs}; + auto in_sigs_buf = sycl::buffer{in_sigs_scaled}; auto in_ixy_buf = sycl::buffer{sim.in_ixy}; auto out_ixy_buf = sycl::buffer{sim.out_ixy}; @@ -62,7 +122,7 @@ auto run(Simulation2D const& sim) { auto u2 = sycl::accessor{u2_buf, cgh, sycl::read_only}; auto in_mask = sycl::accessor{in_mask_buf, cgh, sycl::read_only}; - cgh.parallel_for(sycl::range<2>(Nx - 2, Ny - 2), [=](sycl::id<2> id) { + cgh.parallel_for>(sycl::range<2>(Nx - 2, Ny - 2), [=](sycl::id<2> id) { auto const x = static_cast(id[0] + 1); auto const y = static_cast(id[1] + 1); auto const idx = to_ixy(x, y, 0, Ny); @@ -88,7 +148,7 @@ auto run(Simulation2D const& sim) { auto bn_ixy = sycl::accessor{bn_ixy_buf, cgh, sycl::read_only}; auto adj_bn = sycl::accessor{adj_bn_buf, cgh, sycl::read_only}; - cgh.parallel_for(Nb, [=](sycl::id<1> id) { + cgh.parallel_for>(Nb, [=](sycl::id<1> id) { auto const ib = bn_ixy[id]; auto const K = static_cast(adj_bn[id]); @@ -111,7 +171,7 @@ auto run(Simulation2D const& sim) { auto bn_ixy = sycl::accessor{bn_ixy_buf, cgh, sycl::read_only}; auto adj_bn = sycl::accessor{adj_bn_buf, cgh, sycl::read_only}; - cgh.parallel_for(Nb, [=](sycl::id<1> id) { + cgh.parallel_for>(Nb, [=](sycl::id<1> id) { auto const ib = bn_ixy[id]; auto const K = adj_bn[id]; auto const current = u0[ib]; @@ -127,7 +187,7 @@ auto run(Simulation2D const& sim) { auto in_ixy = sycl::accessor{in_ixy_buf, cgh, sycl::read_only}; auto in_sigs = sycl::accessor{in_sigs_buf, cgh, sycl::read_only}; - cgh.parallel_for(Ns, [=](sycl::id<1> id) { + cgh.parallel_for>(Ns, [=](sycl::id<1> id) { auto src_ixy = to_ixy(id[0], n, Ns, Nt); u0[in_ixy[id[0]]] += static_cast(in_sigs[src_ixy]); }); @@ -138,7 +198,7 @@ auto run(Simulation2D const& sim) { auto out = sycl::accessor{out_buf, cgh, sycl::write_only}; auto out_ixy = sycl::accessor{out_ixy_buf, cgh, sycl::read_only}; - cgh.parallel_for(Nr, [=](sycl::id<1> id) { + cgh.parallel_for>(Nr, [=](sycl::id<1> id) { auto const r = id[0]; out[r][n] = u0[out_ixy[r]]; }); @@ -181,7 +241,7 @@ auto run(Simulation2D const& sim) { auto host = sycl::host_accessor{out_buf, sycl::read_only}; for (auto it{0UL}; it < static_cast(Nt); ++it) { for (auto ir{0UL}; ir < Nr; ++ir) { - outputs(ir, it) = static_cast(host[ir][it]); + outputs(ir, it) = static_cast(host[ir][it]) * infac; } } @@ -194,12 +254,17 @@ auto run(Simulation2D const& sim) { auto EngineSYCL2D::operator()(Simulation2D const& sim, Precision precision) const -> stdex::mdarray> { - if (precision == Precision::Float) { - return run(sim); - } else if (precision == Precision::Double) { - return run(sim); - } else { - raisef("invalid precision {}", static_cast(precision)); + switch (precision) { + case Precision::Float: return run(sim); + case Precision::Double: return run(sim); + case Precision::DoubleFloat: return run>(sim); + case Precision::DoubleDouble: return run>(sim); +#if defined(__APPLE__) or defined(__clang__) + case Precision::Half: return run<_Float16>(sim); + case Precision::DoubleHalf: return run>(sim); +#endif + + default: raisef("invalid precision {}", static_cast(precision)); } } } // namespace pffdtd diff --git a/src/cpp/pffdtd/precision.hpp b/src/cpp/pffdtd/precision.hpp index a024632..9cd10e0 100644 --- a/src/cpp/pffdtd/precision.hpp +++ b/src/cpp/pffdtd/precision.hpp @@ -6,8 +6,13 @@ namespace pffdtd { enum struct Precision { + Half, Float, Double, + + DoubleHalf, + DoubleFloat, + DoubleDouble, }; } // namespace pffdtd diff --git a/src/python/pffdtd/sim2d/setup.py b/src/python/pffdtd/sim2d/setup.py index e2974ca..8ff9f6b 100644 --- a/src/python/pffdtd/sim2d/setup.py +++ b/src/python/pffdtd/sim2d/setup.py @@ -109,7 +109,6 @@ def sim_setup_2d( b = 2/constants.Ts*np.array([1.0, -1.0]) a = np.array([1.0, 1.0]) in_sigs_f = lfilter(b, a, in_sigs, axis=-1) - in_sigs_f /= np.max(np.abs(in_sigs_f)) sps30 = dt*30 target_sps = 0.115