Skip to content

Commit

Permalink
[cpp] Make precision a runtime parameter for sim3d
Browse files Browse the repository at this point in the history
  • Loading branch information
tobiashienzsch committed Sep 29, 2024
1 parent 5c21237 commit 487b725
Show file tree
Hide file tree
Showing 15 changed files with 294 additions and 218 deletions.
2 changes: 1 addition & 1 deletion run_2d.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ cd "$model_dir"
python "$sim_setup"

# Run sim
DPCPP_CPU_PLACES=cores DPCPP_CPU_CU_AFFINITY=spread DPCPP_CPU_NUM_CUS=$jobs OMP_NUM_THREADS=$jobs "$engine_exe" sim2d -p 32 -s "$sim_dir" -e sycl
DPCPP_CPU_PLACES=cores DPCPP_CPU_CU_AFFINITY=spread DPCPP_CPU_NUM_CUS=$jobs OMP_NUM_THREADS=$jobs "$engine_exe" sim2d -p 64 -s "$sim_dir" -e sycl
# pffdtd sim2d run --sim_dir "$sim_dir" --video

# Post-process
Expand Down
4 changes: 2 additions & 2 deletions run_3d.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ set -e

root_dir="$(cd "$(dirname "$0")" && pwd)"
pffdtd_engine="$root_dir/build/src/cpp/pffdtd-engine"
# pffdtd_engine="$root_dir/cmake-build-cuda/src/cpp/pffdtd-engine"
pffdtd_engine="$root_dir/cmake-build-cuda/src/cpp/pffdtd-engine"

sim_name="Modes"
sim_setup="${sim_name}.py"
Expand All @@ -28,7 +28,7 @@ cd "$model_dir"
pffdtd sim3d setup "$sim_setup"

# Run sim
$pffdtd_engine sim3d -e cpu -p "32" -s "$sim_dir"
$pffdtd_engine sim3d -e cuda -p "64" -s "$sim_dir"
# pffdtd sim3d engine --sim_dir="$sim_dir" --plot --draw_backend="mayavi" --json_model="${model_dir}/model.json"

# Post-process
Expand Down
20 changes: 5 additions & 15 deletions src/cpp/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,18 +69,15 @@ namespace {
return engines;
}

template<typename Real>
[[nodiscard]] auto getEngines3D() {
using namespace pffdtd;
auto engines = std::map<std::string, std::function<void(Simulation3D<Real> const&)>>{};
auto engines = std::map<std::string, std::function<void(Simulation3D const&)>>{};
engines["cpu"] = EngineCPU3D{};
#if defined(PFFDTD_HAS_CUDA)
engines["cuda"] = EngineCUDA3D{};
#endif
#if defined(PFFDTD_HAS_METAL)
if constexpr (std::same_as<Real, float>) {
engines["metal"] = EngineMETAL3D{};
}
engines["metal"] = EngineMETAL3D{};
#endif
#if defined(PFFDTD_HAS_SYCL)
engines["sycl"] = EngineSYCL3D{};
Expand Down Expand Up @@ -111,18 +108,17 @@ struct Arguments {
Sim3D sim3d;
};

template<typename Real>
auto run3D(Arguments::Sim3D const& args) {
using namespace pffdtd;
fmt::println("Running: {} on {} with precision {}", args.simDir, args.engine, toString(args.precision));

auto const engines = getEngines3D<Real>();
auto const engines = getEngines3D();
auto const& engine = engines.at(args.engine);

auto const simDir = std::filesystem::path{args.simDir};
auto const start = getTime();

auto sim = loadSimulation3D<Real>(simDir);
auto sim = loadSimulation3D(simDir, args.precision);
scaleInput(sim);
engine(sim);
rescaleOutput(sim);
Expand Down Expand Up @@ -177,13 +173,7 @@ auto main(int argc, char** argv) -> int {
}

if (*sim3d) {
if (args.sim3d.precision == pffdtd::Precision::Float) {
run3D<float>(args.sim3d);
} else if (args.sim3d.precision == pffdtd::Precision::Double) {
run3D<double>(args.sim3d);
} else {
pffdtd::raisef<std::invalid_argument>("invalid precision '{}'", toString(args.sim3d.precision));
}
run3D(args.sim3d);
}

if (*test) {
Expand Down
36 changes: 24 additions & 12 deletions src/cpp/pffdtd/engine_cpu_3d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

#include "engine_cpu_3d.hpp"

#include "pffdtd/double.hpp"
#include "pffdtd/exception.hpp"
#include "pffdtd/progress.hpp"
#include "pffdtd/time.hpp"
#include "pffdtd/utility.hpp"
Expand Down Expand Up @@ -79,7 +81,11 @@ auto process_bnl_fd(
}

template<typename Real>
auto run(Simulation3D<Real> const& sim) -> void {
auto run(Simulation3D const& sim) -> void {
auto const ssaf_bnl_real = convertTo<Real>(sim.ssaf_bnl);
auto const mat_beta_real = convertTo<Real>(sim.mat_beta);
auto const mat_quads_real = convertTo<Real>(sim.mat_quads);

// keep local ints, scalars
int64_t const Ns = sim.Ns;
int64_t const Nr = sim.Nr;
Expand All @@ -105,9 +111,9 @@ auto run(Simulation3D<Real> const& sim) -> void {
int8_t const* mat_bnl = sim.mat_bnl.data();
int8_t const* Q_bna = sim.Q_bna.data();
double const* in_sigs = sim.in_sigs.data();
Real const* ssaf_bnl = sim.ssaf_bnl.data();
Real const* mat_beta = sim.mat_beta.data();
MatQuad<Real> const* mat_quads = sim.mat_quads.data();
Real const* ssaf_bnl = ssaf_bnl_real.data();
Real const* mat_beta = mat_beta_real.data();
MatQuad<Real> const* mat_quads = mat_quads_real.data();
double* u_out = sim.u_out.get();

// allocate memory
Expand All @@ -130,11 +136,11 @@ auto run(Simulation3D<Real> const& sim) -> void {
auto* gh1 = gh1_buf.data();

// sim coefficients
auto const lo2 = sim.lo2;
auto const sl2 = sim.sl2;
auto const l = sim.l;
auto const a1 = sim.a1;
auto const a2 = sim.a2;
auto const lo2 = static_cast<Real>(sim.lo2);
auto const sl2 = static_cast<Real>(sim.sl2);
auto const l = static_cast<Real>(sim.l);
auto const a1 = static_cast<Real>(sim.a1);
auto const a2 = static_cast<Real>(sim.a2);

// can control outside with OMP_NUM_THREADS env variable
int const numWorkers = omp_get_max_threads();
Expand Down Expand Up @@ -373,8 +379,14 @@ auto run(Simulation3D<Real> const& sim) -> void {

} // namespace

auto EngineCPU3D::operator()(Simulation3D<float> const& sim) const -> void { run(sim); }

auto EngineCPU3D::operator()(Simulation3D<double> const& sim) const -> void { run(sim); }
auto EngineCPU3D::operator()(Simulation3D const& sim) const -> void {
switch (sim.precision) {
case Precision::Float: return run<float>(sim);
case Precision::Double: return run<double>(sim);
case Precision::DoubleFloat: return run<Double<float>>(sim);
case Precision::DoubleDouble: return run<Double<double>>(sim);
default: raisef<std::invalid_argument>("invalid precision {}", static_cast<int>(sim.precision));
}
}

} // namespace pffdtd
4 changes: 2 additions & 2 deletions src/cpp/pffdtd/engine_cpu_3d.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

#pragma once

#include "pffdtd/precision.hpp"
#include "pffdtd/simulation_3d.hpp"

namespace pffdtd {

struct EngineCPU3D {
auto operator()(Simulation3D<float> const& sim) const -> void;
auto operator()(Simulation3D<double> const& sim) const -> void;
auto operator()(Simulation3D const& sim) const -> void;
};

} // namespace pffdtd
37 changes: 22 additions & 15 deletions src/cpp/pffdtd/engine_cuda_3d.cu
Original file line number Diff line number Diff line change
Expand Up @@ -543,8 +543,7 @@ auto print_gpu_details(int i) -> uint64_t {
}

// input indices need to be sorted for multi-device allocation
template<typename Real>
void checkSorted(Simulation3D<Real> const& sim) {
void checkSorted(Simulation3D const& sim) {
int64_t const* bn_ixyz = sim.bn_ixyz.data();
int64_t const* bnl_ixyz = sim.bnl_ixyz.data();
int64_t const* bna_ixyz = sim.bna_ixyz.data();
Expand Down Expand Up @@ -574,7 +573,7 @@ void checkSorted(Simulation3D<Real> const& sim) {

// counts for splitting data across GPUs
template<typename Real>
void splitData(Simulation3D<Real> const& sim, std::span<HostData<Real>> ghds) {
void splitData(Simulation3D const& sim, std::span<HostData<Real>> ghds) {
auto const Nx = sim.Nx;
auto const Ny = sim.Ny;
auto const Nz = sim.Nz;
Expand Down Expand Up @@ -724,7 +723,7 @@ void splitData(Simulation3D<Real> const& sim, std::span<HostData<Real>> ghds) {
}

template<typename Real>
auto run(Simulation3D<Real> const& sim) -> void { // NOLINT(readability-function-cognitive-complexity)
auto run(Simulation3D const& sim) -> void { // NOLINT(readability-function-cognitive-complexity)
// if you want to test synchronous, env variable for that
auto const* s = std::getenv("CUDA_LAUNCH_BLOCKING"); // NOLINT(concurrency-mt-unsafe)
if (s != nullptr) {
Expand All @@ -745,6 +744,10 @@ auto run(Simulation3D<Real> const& sim) -> void { // NOLINT(readability-function
auto ghds = std::vector<HostData<Real>>(static_cast<size_t>(ngpus));
auto gds = std::vector<DeviceData<Real>>(static_cast<size_t>(ngpus));

auto const ssaf_bnl_real = convertTo<Real>(sim.ssaf_bnl);
auto const mat_beta_real = convertTo<Real>(sim.mat_beta);
auto const mat_quads_real = convertTo<Real>(sim.mat_quads);

if (ngpus > 1) {
checkSorted(sim); // needs to be sorted for multi-GPU
}
Expand All @@ -756,11 +759,11 @@ auto run(Simulation3D<Real> const& sim) -> void { // NOLINT(readability-function
gds[gid].totalmembytes = print_gpu_details(gid);
}

Real lo2 = sim.lo2;
Real a1 = sim.a1;
Real a2 = sim.a2;
Real l = sim.l;
Real sl2 = sim.sl2;
auto lo2 = static_cast<Real>(sim.lo2);
auto a1 = static_cast<Real>(sim.a1);
auto a2 = static_cast<Real>(sim.a2);
auto l = static_cast<Real>(sim.l);
auto sl2 = static_cast<Real>(sim.sl2);

// timing stuff
auto elapsed = std::chrono::nanoseconds{0};
Expand Down Expand Up @@ -829,7 +832,7 @@ auto run(Simulation3D<Real> const& sim) -> void { // NOLINT(readability-function

// aliased pointers (to memory already allocated)
host.in_sigs = sim.in_sigs.data() + Ns_read * sim.Nt;
host.ssaf_bnl = sim.ssaf_bnl.data() + Nbl_read;
host.ssaf_bnl = ssaf_bnl_real.data() + Nbl_read;
host.adj_bn = sim.adj_bn.data() + Nb_read;
host.mat_bnl = sim.mat_bnl.data() + Nbl_read;
host.K_bn = sim.K_bn.data() + Nb_read;
Expand Down Expand Up @@ -942,12 +945,12 @@ auto run(Simulation3D<Real> const& sim) -> void { // NOLINT(readability-function
gpuErrchk(cudaMemcpy(gpu.mat_bnl, host.mat_bnl, (size_t)host.Nbl * sizeof(int8_t), cudaMemcpyHostToDevice));

gpuErrchk(cudaMalloc(&(gpu.mat_beta), (size_t)sim.Nm * sizeof(Real)));
gpuErrchk(cudaMemcpy(gpu.mat_beta, sim.mat_beta.data(), (size_t)sim.Nm * sizeof(Real), cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(gpu.mat_beta, mat_beta_real.data(), (size_t)sim.Nm * sizeof(Real), cudaMemcpyHostToDevice));

gpuErrchk(cudaMalloc(&(gpu.mat_quads), (size_t)sim.Nm * MMb * sizeof(MatQuad<Real>)));
gpuErrchk(cudaMemcpy(
gpu.mat_quads,
sim.mat_quads.data(),
mat_quads_real.data(),
(size_t)sim.Nm * MMb * sizeof(MatQuad<Real>),
cudaMemcpyHostToDevice
));
Expand Down Expand Up @@ -1407,8 +1410,12 @@ auto run(Simulation3D<Real> const& sim) -> void { // NOLINT(readability-function

} // namespace

auto EngineCUDA3D::operator()(Simulation3D<float> const& sim) const -> void { run(sim); }

auto EngineCUDA3D::operator()(Simulation3D<double> const& sim) const -> void { run(sim); }
auto EngineCUDA3D::operator()(Simulation3D const& sim) const -> void {
switch (sim.precision) {
case Precision::Float: return run<float>(sim);
case Precision::Double: return run<double>(sim);
default: throw std::invalid_argument("invalid precision " + std::to_string(static_cast<int>(sim.precision)));
}
}

} // namespace pffdtd
3 changes: 1 addition & 2 deletions src/cpp/pffdtd/engine_cuda_3d.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
namespace pffdtd {

struct EngineCUDA3D {
auto operator()(Simulation3D<float> const& sim) const -> void;
auto operator()(Simulation3D<double> const& sim) const -> void;
auto operator()(Simulation3D const& sim) const -> void;
};

} // namespace pffdtd
2 changes: 1 addition & 1 deletion src/cpp/pffdtd/engine_metal_2d.mm
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

auto toFloat(std::vector<double> const& buf) {
auto buf32 = std::vector<float>(buf.size());
std::ranges::transform(buf32, buf32.begin(), [](auto v) { return static_cast<float>(v); });
std::ranges::transform(buf, buf32.begin(), [](auto v) { return static_cast<float>(v); });
return buf32;
}

Expand Down
2 changes: 1 addition & 1 deletion src/cpp/pffdtd/engine_metal_3d.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
namespace pffdtd {

struct EngineMETAL3D {
auto operator()(Simulation3D<float> const& sim) const -> void;
auto operator()(Simulation3D const& sim) const -> void;
};

} // namespace pffdtd
7 changes: 5 additions & 2 deletions src/cpp/pffdtd/engine_metal_3d.mm
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
namespace {

template<typename Real>
auto run(Simulation3D<Real> const& sim) {
auto run(Simulation3D const& sim) {
@autoreleasepool {

// Device
Expand Down Expand Up @@ -286,6 +286,9 @@ auto run(Simulation3D<Real> const& sim) {

} // namespace

auto EngineMETAL3D::operator()(Simulation3D<float> const& sim) const -> void { run(sim); }
auto EngineMETAL3D::operator()(Simulation3D const& sim) const -> void {
PFFDTD_ASSERT(precision == Precision::Float);
run<float>(sim);
}

} // namespace pffdtd
34 changes: 27 additions & 7 deletions src/cpp/pffdtd/engine_sycl_3d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
#include "engine_sycl_3d.hpp"

#include "pffdtd/assert.hpp"
#include "pffdtd/double.hpp"
#include "pffdtd/exception.hpp"
#include "pffdtd/precision.hpp"
#include "pffdtd/progress.hpp"
#include "pffdtd/sycl.hpp"
#include "pffdtd/time.hpp"
Expand Down Expand Up @@ -39,7 +42,7 @@ template<typename T>
struct ReadOutput;

template<typename Real>
auto run(Simulation3D<Real> const& sim) -> void {
auto run(Simulation3D const& sim) -> void {
PFFDTD_ASSERT(sim.grid == Grid::CART);

auto queue = sycl::queue{sycl::property::queue::enable_profiling{}};
Expand All @@ -64,6 +67,10 @@ auto run(Simulation3D<Real> const& sim) -> void {
auto const a1 = static_cast<Real>(sim.a1);
auto const a2 = static_cast<Real>(sim.a2);

auto const ssaf_bnl_real = convertTo<Real>(sim.ssaf_bnl);
auto const mat_beta_real = convertTo<Real>(sim.mat_beta);
auto const mat_quads_real = convertTo<Real>(sim.mat_quads);

auto Q_bna_buf = sycl::buffer{sim.Q_bna};
auto bn_mask_buf = sycl::buffer{sim.bn_mask};
auto adj_bn_buf = sycl::buffer{sim.adj_bn};
Expand All @@ -73,11 +80,11 @@ auto run(Simulation3D<Real> const& sim) -> void {
auto in_ixyz_buf = sycl::buffer{sim.in_ixyz};
auto out_ixyz_buf = sycl::buffer{sim.out_ixyz};
auto in_sigs_buf = sycl::buffer{sim.in_sigs};
auto mat_beta_buf = sycl::buffer{sim.mat_beta};
auto mat_beta_buf = sycl::buffer{mat_beta_real};
auto mat_bnl_buf = sycl::buffer{sim.mat_bnl};
auto mat_quads_buf = sycl::buffer{sim.mat_quads};
auto mat_quads_buf = sycl::buffer{mat_quads_real};
auto Mb_buf = sycl::buffer{sim.Mb};
auto ssaf_bnl_buf = sycl::buffer{sim.ssaf_bnl};
auto ssaf_bnl_buf = sycl::buffer{ssaf_bnl_real};

auto u0_buf = sycl::buffer<Real>(static_cast<size_t>(Npts));
auto u1_buf = sycl::buffer<Real>(static_cast<size_t>(Npts));
Expand Down Expand Up @@ -343,14 +350,27 @@ auto run(Simulation3D<Real> const& sim) -> void {
// Copy output to host
auto host = sycl::host_accessor{u_out_buf, sycl::read_only};
for (auto i{0UL}; i < static_cast<size_t>(Nr * Nt); ++i) {
sim.u_out[i] = host[i];
sim.u_out[i] = static_cast<double>(host[i]);
}
}

} // namespace

auto EngineSYCL3D::operator()(Simulation3D<float> const& sim) const -> void { run(sim); }
auto EngineSYCL3D::operator()(Simulation3D const& sim) const -> void {
switch (sim.precision) {
#if defined(__APPLE__) or defined(__clang__)
case Precision::Half: return run<_Float16>(sim);
case Precision::DoubleHalf: return run<Double<_Float16>>(sim);
#endif

case Precision::Float: return run<float>(sim);
case Precision::DoubleFloat: return run<Double<float>>(sim);

case Precision::Double: return run<double>(sim);
case Precision::DoubleDouble: return run<Double<double>>(sim);

auto EngineSYCL3D::operator()(Simulation3D<double> const& sim) const -> void { run(sim); }
default: raisef<std::invalid_argument>("invalid precision {}", static_cast<int>(sim.precision));
}
}

} // namespace pffdtd
3 changes: 1 addition & 2 deletions src/cpp/pffdtd/engine_sycl_3d.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
namespace pffdtd {

struct EngineSYCL3D {
auto operator()(Simulation3D<float> const& sim) const -> void;
auto operator()(Simulation3D<double> const& sim) const -> void;
auto operator()(Simulation3D const& sim) const -> void;
};

} // namespace pffdtd
Loading

0 comments on commit 487b725

Please sign in to comment.