diff --git a/src/cpp/pffdtd/engine_cpu_3d.cpp b/src/cpp/pffdtd/engine_cpu_3d.cpp index f65923e..47a95e7 100644 --- a/src/cpp/pffdtd/engine_cpu_3d.cpp +++ b/src/cpp/pffdtd/engine_cpu_3d.cpp @@ -91,24 +91,24 @@ auto run(Simulation3D const& sd) -> void { int64_t const Nb = sd.Nb; int64_t const Nbl = sd.Nbl; int64_t const Nba = sd.Nba; - int8_t* Mb = sd.Mb; // keep local copies of pointers (style choice) - int64_t* bn_ixyz = sd.bn_ixyz; - int64_t* bnl_ixyz = sd.bnl_ixyz; - int64_t* bna_ixyz = sd.bna_ixyz; - int64_t* in_ixyz = sd.in_ixyz; - int64_t* out_ixyz = sd.out_ixyz; - uint16_t* adj_bn = sd.adj_bn; - uint8_t* bn_mask = sd.bn_mask; - int8_t* mat_bnl = sd.mat_bnl; - int8_t* Q_bna = sd.Q_bna; - double* in_sigs = sd.in_sigs; - double* u_out = sd.u_out; - int8_t const fcc_flag = sd.fcc_flag; - Real* ssaf_bnl = sd.ssaf_bnl; - Real* mat_beta = sd.mat_beta; - MatQuad* mat_quads = sd.mat_quads; + int8_t const* Mb = sd.Mb.data(); + int64_t const* bn_ixyz = sd.bn_ixyz.data(); + int64_t const* bnl_ixyz = sd.bnl_ixyz.data(); + int64_t const* bna_ixyz = sd.bna_ixyz.data(); + int64_t const* in_ixyz = sd.in_ixyz.data(); + int64_t const* out_ixyz = sd.out_ixyz.data(); + uint16_t const* adj_bn = sd.adj_bn.data(); + uint8_t const* bn_mask = sd.bn_mask.data(); + int8_t const* mat_bnl = sd.mat_bnl.data(); + int8_t* Q_bna = sd.Q_bna; + double const* in_sigs = sd.in_sigs.data(); + int8_t const fcc_flag = sd.fcc_flag; + Real const* ssaf_bnl = sd.ssaf_bnl.data(); + Real const* mat_beta = sd.mat_beta.data(); + MatQuad const* mat_quads = sd.mat_quads.data(); + double* u_out = sd.u_out.get(); // allocate memory auto u0_buf = std::vector(static_cast(Npts)); diff --git a/src/cpp/pffdtd/engine_cuda_3d.cu b/src/cpp/pffdtd/engine_cuda_3d.cu index 0e8d5e6..1eefbbb 100644 --- a/src/cpp/pffdtd/engine_cuda_3d.cu +++ b/src/cpp/pffdtd/engine_cuda_3d.cu @@ -106,21 +106,21 @@ __constant__ int8_t cuMb[MNm]; // to store Mb per mat // this is data on host, sometimes copied and recomputed for copy to GPU devices // (indices), sometimes just aliased pointers (scalar arrays) template -struct HostData { // arrays on host (for copy), mirrors gpu local data - double* in_sigs{}; // aliased - Real* u_out_buf{}; // aliased - double* u_out{}; // aliased - Real* ssaf_bnl{}; // aliased - int64_t* in_ixyz{}; // recomputed - int64_t* out_ixyz{}; // recomputed - int64_t* bn_ixyz{}; // recomputed - int64_t* bnl_ixyz{}; // recomputed - int64_t* bna_ixyz{}; // recomputed - int8_t* Q_bna{}; // aliased - uint16_t* adj_bn{}; // aliased - int8_t* mat_bnl{}; // aliased - uint8_t* bn_mask{}; // recomputed - int8_t* K_bn{}; // aliased +struct HostData { // arrays on host (for copy), mirrors gpu local data + double const* in_sigs{}; // aliased + Real* u_out_buf{}; // aliased + double* u_out{}; // aliased + Real const* ssaf_bnl{}; // aliased + int64_t* in_ixyz{}; // recomputed + int64_t* out_ixyz{}; // recomputed + int64_t* bn_ixyz{}; // recomputed + int64_t* bnl_ixyz{}; // recomputed + int64_t* bna_ixyz{}; // recomputed + int8_t* Q_bna{}; // aliased + uint16_t const* adj_bn{}; // aliased + int8_t const* mat_bnl{}; // aliased + uint8_t* bn_mask{}; // recomputed + int8_t const* K_bn{}; // aliased int64_t Ns{}; int64_t Nr{}; int64_t Npts{}; @@ -546,16 +546,16 @@ auto print_gpu_details(int i) -> uint64_t { // input indices need to be sorted for multi-device allocation template void checkSorted(Simulation3D const& sim) { - int64_t* bn_ixyz = sim.bn_ixyz; - int64_t* bnl_ixyz = sim.bnl_ixyz; - int64_t* bna_ixyz = sim.bna_ixyz; - int64_t* in_ixyz = sim.in_ixyz; - int64_t* out_ixyz = sim.out_ixyz; - int64_t const Nb = sim.Nb; - int64_t const Nbl = sim.Nbl; - int64_t const Nba = sim.Nba; - int64_t const Ns = sim.Ns; - int64_t const Nr = sim.Nr; + int64_t const* bn_ixyz = sim.bn_ixyz.data(); + int64_t const* bnl_ixyz = sim.bnl_ixyz.data(); + int64_t const* bna_ixyz = sim.bna_ixyz.data(); + int64_t const* in_ixyz = sim.in_ixyz.data(); + int64_t const* out_ixyz = sim.out_ixyz.data(); + int64_t const Nb = sim.Nb; + int64_t const Nbl = sim.Nbl; + int64_t const Nba = sim.Nba; + int64_t const Ns = sim.Ns; + int64_t const Nr = sim.Nr; for (int64_t i = 1; i < Nb; i++) { PFFDTD_ASSERT(bn_ixyz[i] > bn_ixyz[i - 1]); // check save_gpu_folder } @@ -624,8 +624,8 @@ void splitData(Simulation3D const& sim, std::span> ghds) { } // bn_ixyz - Nb - int64_t* bn_ixyz = sim.bn_ixyz; - int64_t const Nb = sim.Nb; + int64_t const* bn_ixyz = sim.bn_ixyz.data(); + int64_t const Nb = sim.Nb; { int gid = 0; for (int64_t i = 0; i < Nb; i++) { @@ -644,8 +644,8 @@ void splitData(Simulation3D const& sim, std::span> ghds) { PFFDTD_ASSERT(Nb_check == Nb); // bnl_ixyz - Nbl - int64_t* bnl_ixyz = sim.bnl_ixyz; - int64_t const Nbl = sim.Nbl; + int64_t const* bnl_ixyz = sim.bnl_ixyz.data(); + int64_t const Nbl = sim.Nbl; { int gid = 0; for (int64_t i = 0; i < Nbl; i++) { @@ -664,8 +664,8 @@ void splitData(Simulation3D const& sim, std::span> ghds) { PFFDTD_ASSERT(Nbl_check == Nbl); // bna_ixyz - Nba - int64_t* bna_ixyz = sim.bna_ixyz; - int64_t const Nba = sim.Nba; + int64_t const* bna_ixyz = sim.bna_ixyz.data(); + int64_t const Nba = sim.Nba; { int gid = 0; for (int64_t i = 0; i < Nba; i++) { @@ -684,8 +684,8 @@ void splitData(Simulation3D const& sim, std::span> ghds) { PFFDTD_ASSERT(Nba_check == Nba); // in_ixyz - Ns - int64_t* in_ixyz = sim.in_ixyz; - int64_t const Ns = sim.Ns; + int64_t const* in_ixyz = sim.in_ixyz.data(); + int64_t const Ns = sim.Ns; { int gid = 0; for (int64_t i = 0; i < Ns; i++) { @@ -704,8 +704,8 @@ void splitData(Simulation3D const& sim, std::span> ghds) { PFFDTD_ASSERT(Ns_check == Ns); // out_ixyz - Nr - int64_t* out_ixyz = sim.out_ixyz; - int64_t const Nr = sim.Nr; + int64_t const* out_ixyz = sim.out_ixyz.data(); + int64_t const Nr = sim.Nr; { int gid = 0; for (int64_t i = 0; i < Nr; i++) { @@ -829,13 +829,13 @@ auto run(Simulation3D const& sim) -> void { std::printf("Nx=%ld Ns=%ld Nr=%ld Nb=%ld, Npts=%ld\n", host.Nx, host.Ns, host.Nr, host.Nb, host.Npts); // aliased pointers (to memory already allocated) - host.in_sigs = sim.in_sigs + Ns_read * sim.Nt; - host.ssaf_bnl = sim.ssaf_bnl + Nbl_read; - host.adj_bn = sim.adj_bn + Nb_read; - host.mat_bnl = sim.mat_bnl + Nbl_read; - host.K_bn = sim.K_bn + Nb_read; + host.in_sigs = sim.in_sigs.data() + Ns_read * sim.Nt; + host.ssaf_bnl = sim.ssaf_bnl.data() + Nbl_read; + host.adj_bn = sim.adj_bn.data() + Nb_read; + host.mat_bnl = sim.mat_bnl.data() + Nbl_read; + host.K_bn = sim.K_bn.data() + Nb_read; host.Q_bna = sim.Q_bna + Nba_read; - host.u_out = sim.u_out + Nr_read * sim.Nt; + host.u_out = sim.u_out.get() + Nr_read * sim.Nt; host.u_out_buf = u_out_buf + Nr_read; // recalculate indices, these are associated host versions to copy over to devices @@ -941,12 +941,15 @@ auto run(Simulation3D const& sim) -> void { gpuErrchk(cudaMemcpy(gpu.mat_bnl, host.mat_bnl, (size_t)host.Nbl * sizeof(int8_t), cudaMemcpyHostToDevice)); gpuErrchk(cudaMalloc(&(gpu.mat_beta), (size_t)sim.Nm * sizeof(Real))); - gpuErrchk(cudaMemcpy(gpu.mat_beta, sim.mat_beta, (size_t)sim.Nm * sizeof(Real), cudaMemcpyHostToDevice)); + gpuErrchk(cudaMemcpy(gpu.mat_beta, sim.mat_beta.data(), (size_t)sim.Nm * sizeof(Real), cudaMemcpyHostToDevice)); gpuErrchk(cudaMalloc(&(gpu.mat_quads), (size_t)sim.Nm * MMb * sizeof(MatQuad))); - gpuErrchk( - cudaMemcpy(gpu.mat_quads, sim.mat_quads, (size_t)sim.Nm * MMb * sizeof(MatQuad), cudaMemcpyHostToDevice) - ); + gpuErrchk(cudaMemcpy( + gpu.mat_quads, + sim.mat_quads.data(), + (size_t)sim.Nm * MMb * sizeof(MatQuad), + cudaMemcpyHostToDevice + )); gpuErrchk(cudaMalloc(&(gpu.bn_mask), (size_t)(host.Nbm * sizeof(uint8_t)))); gpuErrchk(cudaMemcpy(gpu.bn_mask, host.bn_mask, (size_t)host.Nbm * sizeof(uint8_t), cudaMemcpyHostToDevice)); @@ -969,15 +972,15 @@ auto run(Simulation3D const& sim) -> void { std::printf("\n"); // swapping x and z here (CUDA has first dim contiguous) + // same for all devices gpuErrchk(cudaMemcpyToSymbol(cuNx, &(sim.Nz), sizeof(int64_t))); gpuErrchk(cudaMemcpyToSymbol(cuNy, &(sim.Ny), sizeof(int64_t))); gpuErrchk(cudaMemcpyToSymbol(cuNz, &(host.Nxh), sizeof(int64_t))); gpuErrchk(cudaMemcpyToSymbol(cuNb, &(host.Nb), sizeof(int64_t))); gpuErrchk(cudaMemcpyToSymbol(cuNbl, &(host.Nbl), sizeof(int64_t))); gpuErrchk(cudaMemcpyToSymbol(cuNba, &(host.Nba), sizeof(int64_t))); - gpuErrchk(cudaMemcpyToSymbol(cuMb, sim.Mb, sim.Nm * sizeof(int8_t))); - gpuErrchk(cudaMemcpyToSymbol(cuNxNy, &Nzy, - sizeof(int64_t))); // same for all devices + gpuErrchk(cudaMemcpyToSymbol(cuMb, sim.Mb.data(), sim.Nm * sizeof(int8_t))); + gpuErrchk(cudaMemcpyToSymbol(cuNxNy, &Nzy, sizeof(int64_t))); if constexpr (std::is_same_v) { gpuErrchk(cudaMemcpyToSymbol(c1_f32, &a1, sizeof(float))); diff --git a/src/cpp/pffdtd/engine_sycl_3d.cpp b/src/cpp/pffdtd/engine_sycl_3d.cpp index 4509fcf..8d0563d 100644 --- a/src/cpp/pffdtd/engine_sycl_3d.cpp +++ b/src/cpp/pffdtd/engine_sycl_3d.cpp @@ -54,7 +54,6 @@ auto run(Simulation3D const& sim) -> void { auto const Nb = sim.Nb; auto const Nbl = sim.Nbl; auto const Nba = sim.Nba; - auto const Nm = sim.Nm; auto const Nr = sim.Nr; auto const Ns = sim.Ns; auto const Nt = sim.Nt; @@ -66,29 +65,29 @@ auto run(Simulation3D const& sim) -> void { auto const a2 = static_cast(sim.a2); auto Q_bna_buf = sycl::buffer{sim.Q_bna, sycl::range(size_t(Nba))}; - auto bn_mask_buf = sycl::buffer{sim.bn_mask, sycl::range(size_t(Npts))}; - auto adj_bn_buf = sycl::buffer{sim.adj_bn, sycl::range(size_t(Nb))}; - auto bn_ixyz_buf = sycl::buffer{sim.bn_ixyz, sycl::range(size_t(Nb))}; - auto bnl_ixyz_buf = sycl::buffer{sim.bnl_ixyz, sycl::range(size_t(Nb))}; - auto bna_ixyz_buf = sycl::buffer{sim.bna_ixyz, sycl::range(size_t(Nba))}; - auto in_ixyz_buf = sycl::buffer{sim.in_ixyz, sycl::range(size_t(Ns))}; - auto out_ixyz_buf = sycl::buffer{sim.out_ixyz, sycl::range(size_t(Nr))}; - auto in_sigs_buf = sycl::buffer{sim.in_sigs, sycl::range(size_t(Ns * Nt))}; - auto mat_beta_buf = sycl::buffer{sim.mat_beta, sycl::range(size_t(Nm))}; - auto mat_bnl_buf = sycl::buffer{sim.mat_bnl, sycl::range(size_t(Nbl))}; - auto mat_quads_buf = sycl::buffer{sim.mat_quads, sycl::range(size_t(Nm * MMb))}; - auto Mb_buf = sycl::buffer{sim.Mb, sycl::range(size_t(Nm))}; - auto ssaf_bnl_buf = sycl::buffer{sim.ssaf_bnl, sycl::range(size_t(Nbl))}; - - auto u0_buf = sycl::buffer(size_t(Npts)); - auto u1_buf = sycl::buffer(size_t(Npts)); - auto u0b_buf = sycl::buffer(size_t(Nbl)); - auto u1b_buf = sycl::buffer(size_t(Nbl)); - auto u2b_buf = sycl::buffer(size_t(Nbl)); - auto u2ba_buf = sycl::buffer(size_t(Nba)); - auto vh1_buf = sycl::buffer(size_t(Nbl * MMb)); - auto gh1_buf = sycl::buffer(size_t(Nbl * MMb)); - auto u_out_buf = sycl::buffer(size_t(Nr * Nt)); + auto bn_mask_buf = sycl::buffer{sim.bn_mask}; + auto adj_bn_buf = sycl::buffer{sim.adj_bn}; + auto bn_ixyz_buf = sycl::buffer{sim.bn_ixyz}; + auto bnl_ixyz_buf = sycl::buffer{sim.bnl_ixyz}; + auto bna_ixyz_buf = sycl::buffer{sim.bna_ixyz}; + auto in_ixyz_buf = sycl::buffer{sim.in_ixyz}; + auto out_ixyz_buf = sycl::buffer{sim.out_ixyz}; + auto in_sigs_buf = sycl::buffer{sim.in_sigs}; + auto mat_beta_buf = sycl::buffer{sim.mat_beta}; + auto mat_bnl_buf = sycl::buffer{sim.mat_bnl}; + auto mat_quads_buf = sycl::buffer{sim.mat_quads}; + auto Mb_buf = sycl::buffer{sim.Mb}; + auto ssaf_bnl_buf = sycl::buffer{sim.ssaf_bnl}; + + auto u0_buf = sycl::buffer(static_cast(Npts)); + auto u1_buf = sycl::buffer(static_cast(Npts)); + auto u0b_buf = sycl::buffer(static_cast(Nbl)); + auto u1b_buf = sycl::buffer(static_cast(Nbl)); + auto u2b_buf = sycl::buffer(static_cast(Nbl)); + auto u2ba_buf = sycl::buffer(static_cast(Nba)); + auto vh1_buf = sycl::buffer(static_cast(Nbl * MMb)); + auto gh1_buf = sycl::buffer(static_cast(Nbl * MMb)); + auto u_out_buf = sycl::buffer(static_cast(Nr * Nt)); auto elapsedAir = std::chrono::nanoseconds{0}; auto elapsedBoundary = std::chrono::nanoseconds{0}; diff --git a/src/cpp/pffdtd/hdf.hpp b/src/cpp/pffdtd/hdf.hpp index c34ea6b..3d10052 100644 --- a/src/cpp/pffdtd/hdf.hpp +++ b/src/cpp/pffdtd/hdf.hpp @@ -79,35 +79,41 @@ struct H5FReader { auto set = H5Dopen(_handle, dataset, H5P_DEFAULT); auto space = H5Dget_space(set); - auto ndims = 1; + // auto ndims = 1; + // PFFDTD_ASSERT(H5Sget_simple_extent_ndims(space) == ndims); + + auto ndims = H5Sget_simple_extent_ndims(space); auto dims = std::array{}; - PFFDTD_ASSERT(H5Sget_simple_extent_ndims(space) == ndims); H5Sget_simple_extent_dims(space, dims.data(), nullptr); auto size = ndims == 1 ? dims[0] : dims[0] * dims[1]; - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { + auto type = H5T_NATIVE_INT64; + auto buf = std::vector(size); + auto err = H5Dread(set, type, H5S_ALL, H5S_ALL, H5P_DEFAULT, buf.data()); + checkErrorAndCloseDataset(dataset, set, err); + return buf; + } else if constexpr (std::is_same_v) { auto type = H5T_NATIVE_UINT8; auto buf = std::vector(size); auto err = H5Dread(set, type, H5S_ALL, H5S_ALL, H5P_DEFAULT, buf.data()); checkErrorAndCloseDataset(dataset, set, err); return buf; - } - - if constexpr (std::is_same_v) { - auto type = H5T_NATIVE_INT64; + } else if constexpr (std::is_same_v) { + auto type = H5T_NATIVE_INT8; auto buf = std::vector(size); auto err = H5Dread(set, type, H5S_ALL, H5S_ALL, H5P_DEFAULT, buf.data()); checkErrorAndCloseDataset(dataset, set, err); return buf; - } - - if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v) { auto type = H5T_NATIVE_DOUBLE; auto buf = std::vector(size); auto err = H5Dread(set, type, H5S_ALL, H5S_ALL, H5P_DEFAULT, buf.data()); checkErrorAndCloseDataset(dataset, set, err); return buf; + } else { + static_assert(always_false); } } diff --git a/src/cpp/pffdtd/simulation_3d.cpp b/src/cpp/pffdtd/simulation_3d.cpp index c52ed0b..12d8828 100644 --- a/src/cpp/pffdtd/simulation_3d.cpp +++ b/src/cpp/pffdtd/simulation_3d.cpp @@ -70,7 +70,7 @@ void ind2sub3d(int64_t idx, int64_t Nx, int64_t Ny, int64_t Nz, int64_t* ix, int } // double check some index inside grid -void check_inside_grid(int64_t* idx, int64_t N, int64_t Nx, int64_t Ny, int64_t Nz) { +void check_inside_grid(int64_t const* idx, int64_t N, int64_t Nx, int64_t Ny, int64_t Nz) { for (int64_t i = 0; i < N; i++) { int64_t iz = 0; int64_t iy = 0; @@ -169,9 +169,8 @@ template ////////////////// // bn_ixyz dataset ////////////////// - expected_ndims = 1; - int64_t* bn_ixyz = read(vox_out, "bn_ixyz", expected_ndims, dims); - PFFDTD_ASSERT(static_cast(dims[0]) == Nb); + auto bn_ixyz = vox_out.read>("bn_ixyz"); + PFFDTD_ASSERT(std::cmp_equal(bn_ixyz.size(), Nb)); ////////////////// // adj_bn dataset @@ -184,9 +183,8 @@ template ////////////////// // mat_bn dataset ////////////////// - expected_ndims = 1; - int8_t* mat_bn = read(vox_out, "mat_bn", expected_ndims, dims); - PFFDTD_ASSERT(static_cast(dims[0]) == Nb); + auto mat_bn = vox_out.read>("mat_bn"); + PFFDTD_ASSERT(std::cmp_equal(mat_bn.size(), Nb)); ////////////////// // saf_bn dataset @@ -230,28 +228,23 @@ template ////////////////// // in_ixyz dataset ////////////////// - expected_ndims = 1; - int64_t* in_ixyz = read(signals, "in_ixyz", expected_ndims, dims); - PFFDTD_ASSERT(static_cast(dims[0]) == Ns); + auto in_ixyz = signals.read>("in_ixyz"); + PFFDTD_ASSERT(std::cmp_equal(in_ixyz.size(), Ns)); ////////////////// // out_ixyz dataset ////////////////// - expected_ndims = 1; - int64_t* out_ixyz = read(signals, "out_ixyz", expected_ndims, dims); - PFFDTD_ASSERT(static_cast(dims[0]) == Nr); + auto out_ixyz = signals.read>("out_ixyz"); + PFFDTD_ASSERT(std::cmp_equal(out_ixyz.size(), Nr)); - expected_ndims = 1; - int64_t* out_reorder = read(signals, "out_reorder", expected_ndims, dims); - PFFDTD_ASSERT(static_cast(dims[0]) == Nr); + auto out_reorder = signals.read>("out_reorder"); + PFFDTD_ASSERT(std::cmp_equal(out_reorder.size(), Nr)); ////////////////// // in_sigs dataset ////////////////// - expected_ndims = 2; - double* in_sigs = read(signals, "in_sigs", expected_ndims, dims); - PFFDTD_ASSERT(static_cast(dims[0]) == Ns); - PFFDTD_ASSERT(static_cast(dims[1]) == Nt); + auto in_sigs = signals.read>("in_sigs"); + PFFDTD_ASSERT(std::cmp_equal(in_sigs.size(), Ns * Nt)); // not recommended to run single without differentiating input if (sizeof(Real) == 4) { @@ -275,8 +268,8 @@ template fmt::println("Nm={}", Nm); PFFDTD_ASSERT(Nm <= MNm); - expected_ndims = 1; - int8_t* Mb = read(materials, "Mb", expected_ndims, dims); + auto Mb = materials.read>("Mb"); + PFFDTD_ASSERT(std::cmp_equal(Mb.size(), Nm)); for (int8_t i = 0; i < Nm; i++) { fmt::println("Mb[{}]={}", i, Mb[i]); @@ -285,8 +278,8 @@ template ////////////////// // DEF (RLC) datasets ////////////////// - auto* mat_beta = allocate_zeros(Nm); - auto* mat_quads = allocate_zeros>(static_cast(Nm) * MMb); + auto mat_beta = std::vector(size_t(Nm)); + auto mat_quads = std::vector>(static_cast(Nm) * size_t(MMb)); for (int8_t i = 0; i < Nm; i++) { expected_ndims = 2; auto* DEF = read(materials, fmt::format("mat_{:02d}_DEF", i).c_str(), expected_ndims, dims); @@ -331,7 +324,7 @@ template ////////////////// // check bn_ixyz ////////////////// - check_inside_grid(bn_ixyz, Nb, Nx, Ny, Nz); + check_inside_grid(bn_ixyz.data(), Nb, Nx, Ny, Nz); fmt::println("bn_ixyz checked"); ////////////////// @@ -355,7 +348,7 @@ template ////////////////// // bit-pack and check adj_bn ////////////////// - auto* adj_bn = allocate_zeros(Nb); + auto adj_bn = std::vector(size_t(Nb)); for (int64_t i = 0; i < Nb; i++) { for (int8_t j = 0; j < NN; j++) { @@ -375,8 +368,7 @@ template ////////////////// // calculate K_bn from adj_bn ////////////////// - auto* K_bn = allocate_zeros(Nb); - + auto K_bn = std::vector(size_t(Nb)); for (int64_t nb = 0; nb < Nb; nb++) { K_bn[nb] = 0; for (uint8_t nn = 0; nn < NN; nn++) { @@ -390,14 +382,14 @@ template ////////////////// // make compressed bit-mask int64_t const Nbm = (Npts - 1) / 8 + 1; - auto* bn_mask = allocate_zeros(Nbm); // one bit per + auto bn_mask = std::vector(size_t(Nbm)); // one bit per for (int64_t i = 0; i < Nb; i++) { int64_t const ii = bn_ixyz[i]; SET_BIT(bn_mask[ii >> 3], ii % 8); } // create bn_mask_raw to double check - bool* bn_mask_raw = allocate_zeros(Npts); + auto bn_mask_raw = std::vector(size_t(Npts)); for (int64_t i = 0; i < Nb; i++) { int64_t const ii = bn_ixyz[i]; @@ -415,7 +407,6 @@ template } } fmt::println("bn_mask double checked"); - delete[] bn_mask_raw; // count Nbl int64_t Nbl = 0; @@ -423,9 +414,9 @@ template Nbl += static_cast(mat_bn[i] >= 0); } fmt::println("Nbl = {}", Nbl); - auto* mat_bnl = allocate_zeros(Nbl); - auto* bnl_ixyz = allocate_zeros(Nbl); - auto* ssaf_bnl = allocate_zeros(Nbl); + auto mat_bnl = std::vector(static_cast(Nbl)); + auto bnl_ixyz = std::vector(static_cast(Nbl)); + auto ssaf_bnl = std::vector(static_cast(Nbl)); { int64_t j = 0; for (int64_t i = 0; i < Nb; i++) { @@ -438,7 +429,6 @@ template } PFFDTD_ASSERT(j == Nbl); } - delete[] mat_bn; delete[] ssaf_bn; fmt::println("separated non-rigid bn"); @@ -450,8 +440,8 @@ template Nba /= 2; } - auto* bna_ixyz = allocate_zeros(Nba); - auto* Q_bna = allocate_zeros(Nba); + auto bna_ixyz = std::vector(size_t(Nba)); + auto* Q_bna = allocate_zeros(Nba); { int64_t ii = 0; for (int64_t ix = 1; ix < Nx - 1; ix++) { @@ -482,7 +472,7 @@ template fmt::println("ABC nodes"); if (fcc_flag == 2) { // need to sort bna_ixyz auto* bna_sort_keys = allocate_zeros(Nba); - sort_keys(bna_ixyz, bna_sort_keys, Nba); + sort_keys(bna_ixyz.data(), bna_sort_keys, Nba); // now sort corresponding Q_bna array int8_t* Q_bna_unsorted = nullptr; @@ -501,20 +491,20 @@ template } return Simulation3D{ - .bn_ixyz = bn_ixyz, - .bnl_ixyz = bnl_ixyz, - .bna_ixyz = bna_ixyz, + .bn_ixyz = std::move(bn_ixyz), + .bnl_ixyz = std::move(bnl_ixyz), + .bna_ixyz = std::move(bna_ixyz), .Q_bna = Q_bna, - .in_ixyz = in_ixyz, - .out_ixyz = out_ixyz, - .out_reorder = out_reorder, - .adj_bn = adj_bn, - .ssaf_bnl = ssaf_bnl, - .bn_mask = bn_mask, - .mat_bnl = mat_bnl, - .K_bn = K_bn, - .in_sigs = in_sigs, - .u_out = allocate_zeros(Nr * Nt), + .in_ixyz = std::move(in_ixyz), + .out_ixyz = std::move(out_ixyz), + .out_reorder = std::move(out_reorder), + .adj_bn = std::move(adj_bn), + .ssaf_bnl = std::move(ssaf_bnl), + .bn_mask = std::move(bn_mask), + .mat_bnl = std::move(mat_bnl), + .K_bn = std::move(K_bn), + .in_sigs = std::move(in_sigs), + .u_out = std::unique_ptr{allocate_zeros(Nr * Nt)}, .Ns = Ns, .Nr = Nr, .Nt = Nt, @@ -530,9 +520,9 @@ template .fcc_flag = fcc_flag, .NN = NN, .Nm = Nm, - .Mb = Mb, - .mat_quads = mat_quads, - .mat_beta = mat_beta, + .Mb = std::move(Mb), + .mat_quads = std::move(mat_quads), + .mat_beta = std::move(mat_beta), .infac = 1.0, .sl2 = sl2, .lo2 = lo2, @@ -543,10 +533,10 @@ template template void writeOutputs_impl(Simulation3D const& sim, std::filesystem::path const& simDir) { - auto Nt = static_cast(sim.Nt); - auto Nr = static_cast(sim.Nr); - auto* out_reorder = sim.out_reorder; - auto u_out = stdex::mdarray>(Nr, Nt); + auto const* out_reorder = sim.out_reorder.data(); + auto Nt = static_cast(sim.Nt); + auto Nr = static_cast(sim.Nr); + auto u_out = stdex::mdarray>(Nr, Nt); // write outputs in correct order for (auto nr = size_t{0}; nr < Nr; ++nr) { @@ -563,10 +553,10 @@ void writeOutputs_impl(Simulation3D const& sim, std::filesystem::path cons // print last samples of simulation (for correctness checking..) template void printLastSample_impl(Simulation3D const& sim) { - int64_t const Nt = sim.Nt; - int64_t const Nr = sim.Nr; - double* u_out = sim.u_out; - int64_t* out_reorder = sim.out_reorder; + int64_t const Nt = sim.Nt; + int64_t const Nr = sim.Nr; + double* u_out = sim.u_out.get(); + int64_t const* out_reorder = sim.out_reorder.data(); // print last samples fmt::println("RAW OUTPUTS"); for (int64_t nr = 0; nr < Nr; nr++) { diff --git a/src/cpp/pffdtd/simulation_3d.hpp b/src/cpp/pffdtd/simulation_3d.hpp index ee4bcc8..b58577e 100644 --- a/src/cpp/pffdtd/simulation_3d.hpp +++ b/src/cpp/pffdtd/simulation_3d.hpp @@ -15,6 +15,7 @@ #include #include #include +#include namespace pffdtd { @@ -33,43 +34,43 @@ struct MatQuad { // main sim data, on host template struct Simulation3D { - int64_t* bn_ixyz; // boundary node indices - int64_t* bnl_ixyz; // lossy boundary node indices - int64_t* bna_ixyz; // absorbing boundary node indices - int8_t* Q_bna; // integer for ABCs (wall 1,edge 2,corner 3) - int64_t* in_ixyz; // input points - int64_t* out_ixyz; // output points - int64_t* out_reorder; // ordering for outputs point for final print/save - uint16_t* adj_bn; // nearest-neighbour adjancencies for all boundary nodes - Real* ssaf_bnl; // surface area corrections (with extra volume scaling) - uint8_t* bn_mask; // bit mask for bounday nodes - int8_t* mat_bnl; // material indices for lossy boundary nodes - int8_t* K_bn; // number of adjacent neighbours, boundary nodesa - double* in_sigs; // input signals - double* u_out; // for output signals - int64_t Ns; // number of input grid points - int64_t Nr; // number of output grid points - int64_t Nt; // number of samples simulation - int64_t Npts; // number of Cartesian grid points - int64_t Nx; // x-dim (non-continguous) - int64_t Ny; // y-dim - int64_t Nz; // z-dim (continguous) - int64_t Nb; // number of boundary nodes - int64_t Nbl; // number of lossy boundary nodes - int64_t Nba; // number of ABC nodes - double l; // Courant number (CFL) - double l2; // CFL number squared - int8_t fcc_flag; // boolean for FCC - int8_t NN; // integer, neareast neighbours - int8_t Nm; // number of materials used - int8_t* Mb; // number of branches per material - MatQuad* mat_quads; // RLC coefficients (essentially) - Real* mat_beta; // part of FD-boundaries one per material - double infac; // rescaling of input (for numerical reason) - Real sl2; // scaled l2 (for single precision) - Real lo2; // 0.5*l - Real a2; // update stencil coefficient - Real a1; // update stencil coefficient + std::vector bn_ixyz; // boundary node indices + std::vector bnl_ixyz; // lossy boundary node indices + std::vector bna_ixyz; // absorbing boundary node indices + int8_t* Q_bna; // integer for ABCs (wall 1,edge 2,corner 3) + std::vector in_ixyz; // input points + std::vector out_ixyz; // output points + std::vector out_reorder; // ordering for outputs point for final print/save + std::vector adj_bn; // nearest-neighbour adjancencies for all boundary nodes + std::vector ssaf_bnl; // surface area corrections (with extra volume scaling) + std::vector bn_mask; // bit mask for bounday nodes + std::vector mat_bnl; // material indices for lossy boundary nodes + std::vector K_bn; // number of adjacent neighbours, boundary nodesa + std::vector in_sigs; // input signals + std::unique_ptr u_out; // for output signals + int64_t Ns; // number of input grid points + int64_t Nr; // number of output grid points + int64_t Nt; // number of samples simulation + int64_t Npts; // number of Cartesian grid points + int64_t Nx; // x-dim (non-continguous) + int64_t Ny; // y-dim + int64_t Nz; // z-dim (continguous) + int64_t Nb; // number of boundary nodes + int64_t Nbl; // number of lossy boundary nodes + int64_t Nba; // number of ABC nodes + double l; // Courant number (CFL) + double l2; // CFL number squared + int8_t fcc_flag; // boolean for FCC + int8_t NN; // integer, neareast neighbours + int8_t Nm; // number of materials used + std::vector Mb; // number of branches per material + std::vector> mat_quads; // RLC coefficients (essentially) + std::vector mat_beta; // part of FD-boundaries one per material + double infac; // rescaling of input (for numerical reason) + Real sl2; // scaled l2 (for single precision) + Real lo2; // 0.5*l + Real a2; // update stencil coefficient + Real a1; // update stencil coefficient }; [[nodiscard]] auto loadSimulation3D_float(std::filesystem::path const& simDir) -> Simulation3D; @@ -95,7 +96,7 @@ template // scale input to be in middle of floating-point range template void scaleInput(Simulation3D& sim) { - double* in_sigs = sim.in_sigs; + auto* in_sigs = sim.in_sigs.data(); int64_t const Nt = sim.Nt; int64_t const Ns = sim.Ns; @@ -133,8 +134,7 @@ void scaleInput(Simulation3D& sim) { } } - sim.infac = infac; - sim.in_sigs = in_sigs; + sim.infac = infac; } template @@ -142,30 +142,14 @@ void rescaleOutput(Simulation3D& sim) { int64_t const Nt = sim.Nt; int64_t const Nr = sim.Nr; double infac = sim.infac; - double* u_out = sim.u_out; + double* u_out = sim.u_out.get(); std::transform(u_out, u_out + Nr * Nt, u_out, [infac](auto sample) { return sample * infac; }); } template void freeSimulation3D(Simulation3D const& sim) { - delete[] sim.bn_ixyz; - delete[] sim.bnl_ixyz; - delete[] sim.bna_ixyz; delete[] sim.Q_bna; - delete[] sim.adj_bn; - delete[] sim.mat_bnl; - delete[] sim.bn_mask; - delete[] sim.ssaf_bnl; - delete[] sim.K_bn; - delete[] sim.in_ixyz; - delete[] sim.out_ixyz; - delete[] sim.out_reorder; - delete[] sim.in_sigs; - delete[] sim.u_out; - delete[] sim.Mb; - delete[] sim.mat_beta; - delete[] sim.mat_quads; } } // namespace pffdtd