-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
a5378ee
commit 6a67d30
Showing
7 changed files
with
435 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,327 @@ | ||
// SPDX-License-Identifier: MIT | ||
// SPDX-FileCopyrightText: 2024 Tobias Hienzsch | ||
#include "engine_3d_sycl.hpp" | ||
|
||
#include "pffdtd/assert.hpp" | ||
#include "pffdtd/progress.hpp" | ||
#include "pffdtd/sycl.hpp" | ||
#include "pffdtd/time.hpp" | ||
|
||
#include <fmt/format.h> | ||
|
||
namespace pffdtd { | ||
|
||
auto Engine3DSYCL::operator()(Simulation3D& sim) const -> double { | ||
PFFDTD_ASSERT(sim.fcc_flag == 0); | ||
|
||
auto queue = sycl::queue{sycl::property::queue::enable_profiling{}}; | ||
auto device = queue.get_device(); | ||
summary(device); | ||
|
||
auto const Nx = sim.Nx; | ||
auto const Ny = sim.Ny; | ||
auto const Nz = sim.Nz; | ||
auto const NzNy = Nz * Ny; | ||
auto const Npts = sim.Npts; | ||
auto const Nb = sim.Nb; | ||
auto const Nbl = sim.Nbl; | ||
auto const Nba = sim.Nba; | ||
auto const Nm = sim.Nm; | ||
auto const Nr = sim.Nr; | ||
auto const Ns = sim.Ns; | ||
auto const Nt = sim.Nt; | ||
|
||
auto const lo2 = static_cast<Real>(sim.lo2); | ||
auto const sl2 = static_cast<Real>(sim.sl2); | ||
auto const l = static_cast<Real>(sim.l); | ||
auto const a1 = static_cast<Real>(sim.a1); | ||
auto const a2 = static_cast<Real>(sim.a2); | ||
|
||
auto Q_bna_buf = sycl::buffer{sim.Q_bna, sycl::range(size_t(Nba))}; | ||
auto bn_mask_buf = sycl::buffer{sim.bn_mask, sycl::range(size_t(Npts))}; | ||
auto adj_bn_buf = sycl::buffer{sim.adj_bn, sycl::range(size_t(Nb))}; | ||
auto bn_ixyz_buf = sycl::buffer{sim.bn_ixyz, sycl::range(size_t(Nb))}; | ||
auto bnl_ixyz_buf = sycl::buffer{sim.bnl_ixyz, sycl::range(size_t(Nb))}; | ||
auto bna_ixyz_buf = sycl::buffer{sim.bna_ixyz, sycl::range(size_t(Nba))}; | ||
auto in_ixyz_buf = sycl::buffer{sim.in_ixyz, sycl::range(size_t(Ns))}; | ||
auto out_ixyz_buf = sycl::buffer{sim.out_ixyz, sycl::range(size_t(Nr))}; | ||
auto in_sigs_buf = sycl::buffer{sim.in_sigs, sycl::range(size_t(Ns * Nt))}; | ||
auto mat_beta_buf = sycl::buffer{sim.mat_beta, sycl::range(size_t(Nm))}; | ||
auto mat_bnl_buf = sycl::buffer{sim.mat_bnl, sycl::range(size_t(Nbl))}; | ||
auto mat_quads_buf = sycl::buffer{sim.mat_quads, sycl::range(size_t(Nm * MMb))}; | ||
auto Mb_buf = sycl::buffer{sim.Mb, sycl::range(size_t(Nm))}; | ||
auto ssaf_bnl_buf = sycl::buffer{sim.ssaf_bnl, sycl::range(size_t(Nbl))}; | ||
|
||
auto u0_buf = sycl::buffer<Real>(size_t(Npts)); | ||
auto u1_buf = sycl::buffer<Real>(size_t(Npts)); | ||
auto u0b_buf = sycl::buffer<Real>(size_t(Nbl)); | ||
auto u1b_buf = sycl::buffer<Real>(size_t(Nbl)); | ||
auto u2b_buf = sycl::buffer<Real>(size_t(Nbl)); | ||
auto u2ba_buf = sycl::buffer<Real>(size_t(Nba)); | ||
auto vh1_buf = sycl::buffer<Real>(size_t(Nbl * MMb)); | ||
auto gh1_buf = sycl::buffer<Real>(size_t(Nbl * MMb)); | ||
auto u_out_buf = sycl::buffer<Real>(size_t(Nr * Nt)); | ||
|
||
auto elapsedAir = Seconds(0.0); | ||
auto elapsedBoundary = Seconds(0.0); | ||
auto const start = getTime(); | ||
|
||
for (int64_t n = 0; n < Nt; n++) { | ||
auto const sampleStart = getTime(); | ||
|
||
// Rigid update | ||
auto boundaryStartEvent = queue.submit([&](sycl::handler& cgh) { | ||
auto u0 = sycl::accessor{u0_buf, cgh, sycl::write_only}; | ||
auto u1 = sycl::accessor{u1_buf, cgh, sycl::read_only}; | ||
auto bn_ixyz = sycl::accessor{bn_ixyz_buf, cgh, sycl::read_only}; | ||
auto adj_bn = sycl::accessor{adj_bn_buf, cgh, sycl::read_only}; | ||
cgh.parallel_for<struct RigidUpdateCart>(Nb, [=](sycl::id<1> id) { | ||
auto const nb = id[0]; | ||
auto const ii = bn_ixyz[nb]; | ||
auto const adj = adj_bn[nb]; | ||
auto const Kint = sycl::popcount(adj); | ||
|
||
auto const _2 = static_cast<Real>(2.0); | ||
auto const K = static_cast<Real>(Kint); | ||
auto const b2 = static_cast<Real>(a2); | ||
auto const b1 = (_2 - sl2 * K); | ||
|
||
auto partial = b1 * u1[ii] - u0[ii]; | ||
partial += b2 * get_bit_as<Real>(adj, 0) * u1[ii + NzNy]; | ||
partial += b2 * get_bit_as<Real>(adj, 1) * u1[ii - NzNy]; | ||
partial += b2 * get_bit_as<Real>(adj, 2) * u1[ii + Nz]; | ||
partial += b2 * get_bit_as<Real>(adj, 3) * u1[ii - Nz]; | ||
partial += b2 * get_bit_as<Real>(adj, 4) * u1[ii + 1]; | ||
partial += b2 * get_bit_as<Real>(adj, 5) * u1[ii - 1]; | ||
u0[ii] = partial; | ||
}); | ||
}); | ||
|
||
// Copy to boundary buffer | ||
queue.submit([&](sycl::handler& cgh) { | ||
auto u0 = sycl::accessor{u0_buf, cgh, sycl::read_only}; | ||
auto u0b = sycl::accessor{u0b_buf, cgh, sycl::write_only}; | ||
auto bnl_ixyz = sycl::accessor{bnl_ixyz_buf, cgh, sycl::read_only}; | ||
cgh.parallel_for<struct CopyToBoundary>(Nbl, [=](sycl::id<1> id) { | ||
auto nb = id[0]; | ||
u0b[nb] = u0[bnl_ixyz[nb]]; | ||
}); | ||
}); | ||
|
||
// Apply boundary loss | ||
queue.submit([&](sycl::handler& cgh) { | ||
auto mat_beta = sycl::accessor{mat_beta_buf, cgh, sycl::read_only}; | ||
auto mat_bnl = sycl::accessor{mat_bnl_buf, cgh, sycl::read_only}; | ||
auto ssaf_bnl = sycl::accessor{ssaf_bnl_buf, cgh, sycl::read_only}; | ||
auto u2b = sycl::accessor{u2b_buf, cgh, sycl::read_only}; | ||
auto Mb = sycl::accessor{Mb_buf, cgh, sycl::read_only}; | ||
auto mat_quads = sycl::accessor{mat_quads_buf, cgh, sycl::read_only}; | ||
auto u0b = sycl::accessor{u0b_buf, cgh, sycl::read_write}; | ||
auto gh1 = sycl::accessor{gh1_buf, cgh, sycl::read_write}; | ||
auto vh1 = sycl::accessor{vh1_buf, cgh, sycl::read_write}; | ||
cgh.parallel_for<struct ApplyBoundaryLoss>(Nbl, [=](sycl::id<1> id) { | ||
auto nb = id[0]; | ||
Real _1 = 1.0; | ||
Real _2 = 2.0; | ||
int32_t const k = mat_bnl[nb]; | ||
|
||
Real lo2Kbg = lo2 * ssaf_bnl[nb] * mat_beta[k]; | ||
Real fac = _2 * lo2 * ssaf_bnl[nb] / (_1 + lo2Kbg); | ||
|
||
Real u0bint = u0b[nb]; | ||
Real u2bint = u2b[nb]; | ||
|
||
u0bint = (u0bint + lo2Kbg * u2bint) / (_1 + lo2Kbg); | ||
|
||
Real vh1nb[MMb]; | ||
for (int8_t m = 0; m < Mb[k]; m++) { | ||
int64_t const nbm = nb * MMb + m; | ||
int32_t const mbk = k * MMb + m; | ||
auto const* tm = &(mat_quads[mbk]); | ||
vh1nb[m] = vh1[nbm]; | ||
u0bint -= fac * (_2 * (tm->bDh) * vh1nb[m] - (tm->bFh) * gh1[nbm]); | ||
} | ||
|
||
Real du = u0bint - u2bint; | ||
|
||
for (int8_t m = 0; m < Mb[k]; m++) { | ||
int64_t const nbm = nb * MMb + m; | ||
int32_t const mbk = k * MMb + m; | ||
auto const* tm = &(mat_quads[mbk]); | ||
Real vh0nbm = (tm->b) * du + (tm->bd) * vh1nb[m] - _2 * (tm->bFh) * gh1[nbm]; | ||
gh1[nbm] += (vh0nbm + vh1nb[m]) / _2; | ||
vh1[nbm] = vh0nbm; | ||
} | ||
|
||
u0b[nb] = u0bint; | ||
}); | ||
}); | ||
|
||
// Copy from boundary buffer | ||
auto boundaryEndEvent = queue.submit([&](sycl::handler& cgh) { | ||
auto u0 = sycl::accessor{u0_buf, cgh, sycl::write_only}; | ||
auto u0b = sycl::accessor{u0b_buf, cgh, sycl::read_only}; | ||
auto bnl_ixyz = sycl::accessor{bnl_ixyz_buf, cgh, sycl::read_only}; | ||
cgh.parallel_for<struct CopyFromBoundary>(Nbl, [=](sycl::id<1> id) { | ||
auto nb = id[0]; | ||
u0[bnl_ixyz[nb]] = u0b[nb]; | ||
}); | ||
}); | ||
|
||
// Copy last state ABCs | ||
auto airStartEvent = queue.submit([&](sycl::handler& cgh) { | ||
auto u2ba = sycl::accessor{u2ba_buf, cgh, sycl::write_only}; | ||
auto u0 = sycl::accessor{u0_buf, cgh, sycl::read_only}; | ||
auto bna_ixyz = sycl::accessor{bna_ixyz_buf, cgh, sycl::read_only}; | ||
|
||
cgh.parallel_for<struct CopyABCs>(Nba, [=](sycl::id<1> id) { | ||
auto nb = id[0]; | ||
u2ba[nb] = u0[bna_ixyz[nb]]; | ||
}); | ||
}); | ||
|
||
// Flip halo XY | ||
queue.submit([&](sycl::handler& cgh) { | ||
auto u1 = sycl::accessor{u1_buf, cgh, sycl::read_write}; | ||
cgh.parallel_for<struct FlipHaloXY>(sycl::range<2>(Nx, Ny), [=](sycl::id<2> id) { | ||
auto const i = id[0] * NzNy + id[1] * Nz; | ||
u1[i + 0] = u1[i + 2]; | ||
u1[i + Nz - 1] = u1[i + Nz - 3]; | ||
}); | ||
}); | ||
|
||
// Flip halo XZ | ||
queue.submit([&](sycl::handler& cgh) { | ||
auto u1 = sycl::accessor{u1_buf, cgh, sycl::read_write}; | ||
cgh.parallel_for<struct FlipHaloXZ>(sycl::range<2>(Nx, Nz), [=](sycl::id<2> id) { | ||
auto const ix = id[0]; | ||
auto const iz = id[1]; | ||
|
||
u1[ix * NzNy + 0 * Nz + iz] = u1[ix * NzNy + 2 * Nz + iz]; | ||
u1[ix * NzNy + (Ny - 1) * Nz + iz] = u1[ix * NzNy + (Ny - 3) * Nz + iz]; | ||
}); | ||
}); | ||
|
||
// Flip halo YZ | ||
queue.submit([&](sycl::handler& cgh) { | ||
auto u1 = sycl::accessor{u1_buf, cgh, sycl::read_write}; | ||
cgh.parallel_for<struct FlipHaloYZ>(sycl::range<2>(Ny, Nz), [=](sycl::id<2> id) { | ||
auto const iy = id[0]; | ||
auto const iz = id[1]; | ||
|
||
u1[0 * NzNy + iy * Nz + iz] = u1[2 * NzNy + iy * Nz + iz]; | ||
u1[(Nx - 1) * NzNy + iy * Nz + iz] = u1[(Nx - 3) * NzNy + iy * Nz + iz]; | ||
}); | ||
}); | ||
|
||
// Add sources | ||
queue.submit([&](sycl::handler& cgh) { | ||
auto u0 = sycl::accessor{u0_buf, cgh, sycl::read_write}; | ||
auto in_sigs = sycl::accessor{in_sigs_buf, cgh, sycl::read_only}; | ||
auto in_ixyz = sycl::accessor{in_ixyz_buf, cgh, sycl::read_only}; | ||
cgh.parallel_for<struct AddSources>(Ns, [=](sycl::id<1> id) { | ||
auto const ns = id[0]; | ||
auto const ii = in_ixyz[ns]; | ||
u0[ii] += in_sigs[ns * Nt + n]; | ||
}); | ||
}); | ||
|
||
// Air update | ||
queue.submit([&](sycl::handler& cgh) { | ||
auto u0 = sycl::accessor{u0_buf, cgh, sycl::write_only}; | ||
auto u1 = sycl::accessor{u1_buf, cgh, sycl::read_only}; | ||
auto bn_mask = sycl::accessor{bn_mask_buf, cgh, sycl::read_only}; | ||
cgh.parallel_for<struct AirUpdateCart>(sycl::range<3>(Nx - 2, Ny - 2, Nz - 2), [=](sycl::id<3> id) { | ||
auto const ix = id[0] + 1; | ||
auto const iy = id[1] + 1; | ||
auto const iz = id[2] + 1; | ||
auto const ii = ix * NzNy + iy * Nz + iz; | ||
|
||
if ((GET_BIT(bn_mask[ii >> 3], ii % 8)) != 0) { | ||
return; | ||
} | ||
|
||
auto partial = a1 * u1[ii] - u0[ii]; | ||
partial += a2 * u1[ii + NzNy]; | ||
partial += a2 * u1[ii - NzNy]; | ||
partial += a2 * u1[ii + Nz]; | ||
partial += a2 * u1[ii - Nz]; | ||
partial += a2 * u1[ii + 1]; | ||
partial += a2 * u1[ii - 1]; | ||
u0[ii] = partial; | ||
}); | ||
}); | ||
|
||
// ABC Loss | ||
auto airEndEvent = queue.submit([&](sycl::handler& cgh) { | ||
auto u0 = sycl::accessor{u0_buf, cgh, sycl::read_write}; | ||
auto u2ba = sycl::accessor{u2ba_buf, cgh, sycl::read_only}; | ||
auto Q_bna = sycl::accessor{Q_bna_buf, cgh, sycl::read_only}; | ||
auto bna_ixyz = sycl::accessor{bna_ixyz_buf, cgh, sycl::read_only}; | ||
cgh.parallel_for<struct LossABC>(Nba, [=](sycl::id<1> id) { | ||
auto const nb = id[0]; | ||
auto const lQ = l * Q_bna[nb]; | ||
auto const ib = bna_ixyz[nb]; | ||
u0[ib] = (u0[ib] + lQ * u2ba[nb]) / (Real(1) + lQ); | ||
}); | ||
}); | ||
|
||
// Read output | ||
queue.submit([&](sycl::handler& cgh) { | ||
auto u1 = sycl::accessor{u1_buf, cgh, sycl::read_only}; | ||
auto out_ixyz = sycl::accessor{out_ixyz_buf, cgh, sycl::read_only}; | ||
auto u_out = sycl::accessor{u_out_buf, cgh, sycl::write_only}; | ||
cgh.parallel_for<struct ReadOutput>(Nr, [=](sycl::id<1> id) { | ||
auto const nr = id[0]; | ||
auto const ii = out_ixyz[nr]; | ||
u_out[nr * Nt + n] = static_cast<double>(u1[ii]); | ||
}); | ||
}); | ||
|
||
queue.wait_and_throw(); | ||
|
||
std::swap(u0_buf, u1_buf); | ||
|
||
auto tmp = u2b_buf; | ||
u2b_buf = u1b_buf; | ||
u1b_buf = u0b_buf; | ||
u0b_buf = tmp; | ||
|
||
auto const now = getTime(); | ||
|
||
auto const elapsed = Seconds(now - start); | ||
auto const elapsedSample = Seconds(now - sampleStart); | ||
|
||
auto const elapsedAirSample = elapsedTime(airStartEvent, airEndEvent); | ||
elapsedAir += elapsedAirSample; | ||
|
||
auto const elapsedBoundarySample = elapsedTime(boundaryStartEvent, boundaryEndEvent); | ||
elapsedBoundary += elapsedBoundarySample; | ||
|
||
print(ProgressReport{ | ||
.n = n, | ||
.Nt = Nt, | ||
.Npts = Npts, | ||
.Nb = Nb, | ||
.elapsed = elapsed.count(), | ||
.elapsedSample = elapsedSample.count(), | ||
.elapsedAir = elapsedAir.count(), | ||
.elapsedSampleAir = elapsedAirSample.count(), | ||
.elapsedBoundary = elapsedBoundary.count(), | ||
.elapsedSampleBoundary = elapsedBoundarySample.count(), | ||
.numWorkers = 1, | ||
}); | ||
} | ||
|
||
// Copy output to host | ||
auto host = sycl::host_accessor{u_out_buf, sycl::read_only}; | ||
for (auto i{0UL}; i < static_cast<size_t>(Nr * Nt); ++i) { | ||
sim.u_out[i] = host[i]; | ||
} | ||
|
||
fmt::println(""); | ||
|
||
return 0.0; | ||
} | ||
|
||
} // namespace pffdtd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
// SPDX-License-Identifier: MIT | ||
// SPDX-FileCopyrightText: 2024 Tobias Hienzsch | ||
#pragma once | ||
|
||
#include "pffdtd/simulation_3d.hpp" | ||
|
||
#if not defined(PFFDTD_HAS_SYCL) | ||
#error "SYCL must be enabled in CMake via PFFDTD_ENABLE_SYCL_ACPP or PFFDTD_ENABLE_SYCL_ONEAPI" | ||
#endif | ||
|
||
namespace pffdtd { | ||
|
||
struct Engine3DSYCL { | ||
auto operator()(Simulation3D& sim) const -> double; | ||
}; | ||
|
||
} // namespace pffdtd |
Oops, something went wrong.