Skip to content

Commit

Permalink
[cpp] Add Half, DoubleHalf, DoubleFloat and DoubleDouble to sim2d
Browse files Browse the repository at this point in the history
  • Loading branch information
tobiashienzsch committed Sep 28, 2024
1 parent a6ef269 commit 5c21237
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 35 deletions.
2 changes: 1 addition & 1 deletion models/Modes2D/Modes2D.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def model(*, Lx=None, Ly=None, Nx=None, Ny=None, dx=None, X=None, Y=None, in_mas
room=(3.65, 6),
Tc=20,
rh=50,
fmax=1000.0,
fmax=20_000.0,
ppw=10.5,
duration=8.0,
refl_coeff=0.99,
Expand Down
13 changes: 9 additions & 4 deletions src/cpp/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,14 @@
namespace {

[[nodiscard]] auto precisionOptions() {
return std::map<std::string, pffdtd::Precision>{
{"32", pffdtd::Precision::Float},
{"64", pffdtd::Precision::Double},
using pffdtd::Precision;
return std::map<std::string, Precision>{
{ "16", Precision::Half},
{ "32", Precision::Float},
{ "64", Precision::Double},
{"16x2", Precision::DoubleHalf},
{"32x2", Precision::DoubleFloat},
{"64x2", Precision::DoubleDouble},
};
}

Expand Down Expand Up @@ -139,7 +144,7 @@ auto main(int argc, char** argv) -> int {
sim2d->add_option("-s,--sim_dir", args.sim2d.simDir)->required()->check(CLI::ExistingDirectory);
sim2d->add_option("-e,--engine", args.sim2d.engine)->transform(toLower);
sim2d->add_option("-o,--out", args.sim2d.out);
sim2d->add_option("-p,--precision", args.sim3d.precision)
sim2d->add_option("-p,--precision", args.sim2d.precision)
->required()
->transform(CLI::CheckedTransformer(precisionOptions(), CLI::ignore_case));

Expand Down
12 changes: 2 additions & 10 deletions src/cpp/pffdtd/double.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,28 +109,20 @@ struct Double {
[[nodiscard]] static constexpr auto twoProduct(Real x, Real y) noexcept -> Double {
auto r = x * y;

#if defined(SYCL_LANGUAGE_VERSION)
auto e = sycl::fma(x, y, -r);
return Double{r, e};
#else
#if defined(__APPLE__)
#if defined(__APPLE__) or defined(__clang__)
if constexpr (std::same_as<Real, _Float16>) {
auto e = static_cast<_Float16>(std::fma(float{x}, float{y}, float{-r}));
return Double{r, e};
} else
#endif
#endif
{
auto e = std::fma(x, y, -r);
return Double{r, e};
}
#endif
}

Real _high{static_cast<Real>(0)};
Real _low{static_cast<Real>(0)};
};

} // namespace pffdtd

template<typename Real>
struct std::numeric_limits<pffdtd::Double<Real>> : std::numeric_limits<Real> {};
16 changes: 10 additions & 6 deletions src/cpp/pffdtd/engine_cpu_2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,12 +151,16 @@ auto run(Simulation2D const& sim) {

auto EngineCPU2D::operator()(Simulation2D const& sim, Precision precision) const
-> stdex::mdarray<double, stdex::dextents<size_t, 2>> {
if (precision == Precision::Float) {
return run<float>(sim);
} else if (precision == Precision::Double) {
return run<double>(sim);
} else {
raisef<std::invalid_argument>("invalid precision {}", static_cast<int>(precision));
switch (precision) {
case Precision::Float: return run<float>(sim);
case Precision::Double: return run<double>(sim);
case Precision::DoubleFloat: return run<Double<float>>(sim);
case Precision::DoubleDouble: return run<Double<double>>(sim);
#if defined(__APPLE__) or defined(__clang__)
case Precision::Half: return run<_Float16>(sim);
case Precision::DoubleHalf: return run<Double<_Float16>>(sim);
#endif
default: raisef<std::invalid_argument>("invalid precision {}", static_cast<int>(precision));
}
}

Expand Down
91 changes: 78 additions & 13 deletions src/cpp/pffdtd/engine_sycl_2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,66 @@ namespace pffdtd {

namespace {

template<typename Real>
struct AirUpdate;
template<typename Real>
struct BoundaryRigid;
template<typename Real>
struct BoundaryLoss;
template<typename Real>
struct CopyInput;
template<typename Real>
struct CopyOutput;

template<typename Real>
static constexpr auto min_exponent = std::numeric_limits<Real>::min_exponent;

template<typename Real>
static constexpr auto min_exponent<Double<Real>> = min_exponent<Real>;

template<>
static constexpr auto min_exponent<_Float16> = -13;

template<typename Real>
static constexpr auto max_exponent = std::numeric_limits<Real>::max_exponent;

template<typename Real>
static constexpr auto max_exponent<Double<Real>> = max_exponent<Real>;

template<>
static constexpr auto max_exponent<_Float16> = 16;

[[nodiscard]] constexpr auto to_ixy(auto x, auto y, auto /*Nx*/, auto Ny) { return x * Ny + y; }

template<typename Real>
auto scaleInput(std::vector<double> const& buf) {
static constexpr auto min_exp = static_cast<double>(min_exponent<Real>);
static constexpr auto max_exp = static_cast<double>(max_exponent<Real>);
fmt::println("min_exp = {}, min_exp = {}", min_exp, max_exp);

auto const aexp = 0.5;
auto const pow2 = static_cast<int>(std::round(aexp * max_exp + (1 - aexp) * min_exp));
auto const norm1 = std::pow(2.0, pow2);

auto const max_in = *std::ranges::max_element(buf, [](auto l, auto r) { return std::fabs(l) < std::fabs(r); });
auto const inv_infac = norm1 / max_in;
auto const infac = 1.0 / inv_infac;

std::printf(
"max_in = %.16e, pow2 = %d, norm1 = %.16e, inv_infac = %.16e, infac = "
"%.16e\n",
max_in,
pow2,
norm1,
inv_infac,
infac
);

auto buf_out = buf;
std::ranges::transform(buf_out, buf_out.begin(), [=](auto v) { return v * inv_infac; });
return std::pair{buf_out, infac};
}

template<typename Real>
auto run(Simulation2D const& sim) {
summary(sim);
Expand All @@ -37,6 +95,8 @@ auto run(Simulation2D const& sim) {
auto device = queue.get_device();
summary(device);

auto const [in_sigs_scaled, infac] = scaleInput<Real>(sim.in_sigs);

auto u0_buf = sycl::buffer<Real, 1>(sycl::range<1>(Npts));
auto u1_buf = sycl::buffer<Real, 1>(sycl::range<1>(Npts));
auto u2_buf = sycl::buffer<Real, 1>(sycl::range<1>(Npts));
Expand All @@ -45,7 +105,7 @@ auto run(Simulation2D const& sim) {
auto in_mask_buf = sycl::buffer{sim.in_mask};
auto bn_ixy_buf = sycl::buffer{sim.bn_ixy};
auto adj_bn_buf = sycl::buffer{sim.adj_bn};
auto in_sigs_buf = sycl::buffer{sim.in_sigs};
auto in_sigs_buf = sycl::buffer{in_sigs_scaled};
auto in_ixy_buf = sycl::buffer{sim.in_ixy};
auto out_ixy_buf = sycl::buffer{sim.out_ixy};

Expand All @@ -62,7 +122,7 @@ auto run(Simulation2D const& sim) {
auto u2 = sycl::accessor{u2_buf, cgh, sycl::read_only};
auto in_mask = sycl::accessor{in_mask_buf, cgh, sycl::read_only};

cgh.parallel_for<struct AirUpdate>(sycl::range<2>(Nx - 2, Ny - 2), [=](sycl::id<2> id) {
cgh.parallel_for<AirUpdate<Real>>(sycl::range<2>(Nx - 2, Ny - 2), [=](sycl::id<2> id) {
auto const x = static_cast<int64_t>(id[0] + 1);
auto const y = static_cast<int64_t>(id[1] + 1);
auto const idx = to_ixy(x, y, 0, Ny);
Expand All @@ -88,7 +148,7 @@ auto run(Simulation2D const& sim) {
auto bn_ixy = sycl::accessor{bn_ixy_buf, cgh, sycl::read_only};
auto adj_bn = sycl::accessor{adj_bn_buf, cgh, sycl::read_only};

cgh.parallel_for<struct BoundaryRigid>(Nb, [=](sycl::id<1> id) {
cgh.parallel_for<BoundaryRigid<Real>>(Nb, [=](sycl::id<1> id) {
auto const ib = bn_ixy[id];
auto const K = static_cast<Real>(adj_bn[id]);

Expand All @@ -111,7 +171,7 @@ auto run(Simulation2D const& sim) {
auto bn_ixy = sycl::accessor{bn_ixy_buf, cgh, sycl::read_only};
auto adj_bn = sycl::accessor{adj_bn_buf, cgh, sycl::read_only};

cgh.parallel_for<struct BoundaryLoss>(Nb, [=](sycl::id<1> id) {
cgh.parallel_for<BoundaryLoss<Real>>(Nb, [=](sycl::id<1> id) {
auto const ib = bn_ixy[id];
auto const K = adj_bn[id];
auto const current = u0[ib];
Expand All @@ -127,7 +187,7 @@ auto run(Simulation2D const& sim) {
auto in_ixy = sycl::accessor{in_ixy_buf, cgh, sycl::read_only};
auto in_sigs = sycl::accessor{in_sigs_buf, cgh, sycl::read_only};

cgh.parallel_for<struct CopyInput>(Ns, [=](sycl::id<1> id) {
cgh.parallel_for<CopyInput<Real>>(Ns, [=](sycl::id<1> id) {
auto src_ixy = to_ixy(id[0], n, Ns, Nt);
u0[in_ixy[id[0]]] += static_cast<Real>(in_sigs[src_ixy]);
});
Expand All @@ -138,7 +198,7 @@ auto run(Simulation2D const& sim) {
auto out = sycl::accessor{out_buf, cgh, sycl::write_only};
auto out_ixy = sycl::accessor{out_ixy_buf, cgh, sycl::read_only};

cgh.parallel_for<struct CopyOutput>(Nr, [=](sycl::id<1> id) {
cgh.parallel_for<CopyOutput<Real>>(Nr, [=](sycl::id<1> id) {
auto const r = id[0];
out[r][n] = u0[out_ixy[r]];
});
Expand Down Expand Up @@ -181,7 +241,7 @@ auto run(Simulation2D const& sim) {
auto host = sycl::host_accessor{out_buf, sycl::read_only};
for (auto it{0UL}; it < static_cast<size_t>(Nt); ++it) {
for (auto ir{0UL}; ir < Nr; ++ir) {
outputs(ir, it) = static_cast<double>(host[ir][it]);
outputs(ir, it) = static_cast<double>(host[ir][it]) * infac;
}
}

Expand All @@ -194,12 +254,17 @@ auto run(Simulation2D const& sim) {

auto EngineSYCL2D::operator()(Simulation2D const& sim, Precision precision) const
-> stdex::mdarray<double, stdex::dextents<size_t, 2>> {
if (precision == Precision::Float) {
return run<float>(sim);
} else if (precision == Precision::Double) {
return run<double>(sim);
} else {
raisef<std::invalid_argument>("invalid precision {}", static_cast<int>(precision));
switch (precision) {
case Precision::Float: return run<float>(sim);
case Precision::Double: return run<double>(sim);
case Precision::DoubleFloat: return run<Double<float>>(sim);
case Precision::DoubleDouble: return run<Double<double>>(sim);
#if defined(__APPLE__) or defined(__clang__)
case Precision::Half: return run<_Float16>(sim);
case Precision::DoubleHalf: return run<Double<_Float16>>(sim);
#endif

default: raisef<std::invalid_argument>("invalid precision {}", static_cast<int>(precision));
}
}
} // namespace pffdtd
5 changes: 5 additions & 0 deletions src/cpp/pffdtd/precision.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,13 @@
namespace pffdtd {

enum struct Precision {
Half,
Float,
Double,

DoubleHalf,
DoubleFloat,
DoubleDouble,
};

} // namespace pffdtd
1 change: 0 additions & 1 deletion src/python/pffdtd/sim2d/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ def sim_setup_2d(
b = 2/constants.Ts*np.array([1.0, -1.0])
a = np.array([1.0, 1.0])
in_sigs_f = lfilter(b, a, in_sigs, axis=-1)
in_sigs_f /= np.max(np.abs(in_sigs_f))

sps30 = dt*30
target_sps = 0.115
Expand Down

0 comments on commit 5c21237

Please sign in to comment.