diff --git a/src/cpp/main_3d/engine_cpu.cpp b/src/cpp/main_3d/engine_cpu.cpp index 5a7d47c..1305e01 100644 --- a/src/cpp/main_3d/engine_cpu.cpp +++ b/src/cpp/main_3d/engine_cpu.cpp @@ -27,7 +27,9 @@ #include #include -auto runSim(Simulation3D& sd) -> double { +namespace pffdtd { + +auto run(Simulation3D& sd) -> double { // keep local ints, scalars int64_t Ns = sd.Ns; int64_t Nr = sd.Nr; @@ -416,3 +418,5 @@ double process_bnl_pts_fd( } return omp_get_wtime() - tstart; } + +} // namespace pffdtd diff --git a/src/cpp/main_3d/engine_cpu.hpp b/src/cpp/main_3d/engine_cpu.hpp index b57271b..228cb91 100644 --- a/src/cpp/main_3d/engine_cpu.hpp +++ b/src/cpp/main_3d/engine_cpu.hpp @@ -19,7 +19,9 @@ #include -auto runSim(Simulation3D& sd) -> double; +namespace pffdtd { + +auto run(Simulation3D& sd) -> double; double process_bnl_pts_fd( Real* u0b, Real const* u2b, @@ -33,3 +35,5 @@ double process_bnl_pts_fd( MatQuad const* mat_quads, Real const* mat_beta ); + +} // namespace pffdtd diff --git a/src/cpp/main_3d/engine_cuda.hpp b/src/cpp/main_3d/engine_cuda.hpp index 4aff488..e579c3e 100644 --- a/src/cpp/main_3d/engine_cuda.hpp +++ b/src/cpp/main_3d/engine_cuda.hpp @@ -66,7 +66,7 @@ __constant__ int8_t cuMb[MNm]; //to store Mb per mat (MNm has to be hash-defined uint64_t print_gpu_details(int i); void check_sorted( Simulation3D const& sd); void split_data( Simulation3D const& sd, struct gpuHostData *ghds, int ngpus); -double runSim(Simulation3D &sd); +double run(Simulation3D &sd); //CUDA kernels __global__ void KernelAirCart(Real * __restrict__ u0, const Real * __restrict__ u1, const uint8_t * __restrict__ bn_mask); __global__ void KernelAirFCC(Real * __restrict__ u0, const Real * __restrict__ u1, const uint8_t * __restrict__ bn_mask); @@ -648,7 +648,7 @@ void split_data( Simulation3D const& sd, struct gpuHostData *ghds, int ngpus) { } //run the sim! -double runSim(Simulation3D &sd) +double run(Simulation3D &sd) { //if you want to test synchronous, env variable for that const char* s = getenv("CUDA_LAUNCH_BLOCKING"); diff --git a/src/cpp/main_3d/main.cpp b/src/cpp/main_3d/main.cpp index 8324134..6c2ea61 100644 --- a/src/cpp/main_3d/main.cpp +++ b/src/cpp/main_3d/main.cpp @@ -33,10 +33,10 @@ auto main(int /*argc*/, char** /*argv*/) -> int { auto const start = std::chrono::steady_clock::now(); - auto sim = Simulation3D{}; + auto sim = pffdtd::Simulation3D{}; loadSimulation3D(sim); scaleInput(sim); - runSim(sim); + run(sim); rescaleOutput(sim); writeOutputs(sim); printLastSample(sim); diff --git a/src/cpp/pffdtd/hdf.hpp b/src/cpp/pffdtd/hdf.hpp index 17a81d1..5c11c1f 100644 --- a/src/cpp/pffdtd/hdf.hpp +++ b/src/cpp/pffdtd/hdf.hpp @@ -10,6 +10,14 @@ namespace pffdtd { +enum DataType : unsigned char { + FLOAT64, + FLOAT32, + INT64, + INT8, + BOOL, +}; + template inline constexpr auto isStdVector = false; diff --git a/src/cpp/pffdtd/simulation_3d.cpp b/src/cpp/pffdtd/simulation_3d.cpp index 41ae847..208bf07 100644 --- a/src/cpp/pffdtd/simulation_3d.cpp +++ b/src/cpp/pffdtd/simulation_3d.cpp @@ -15,17 +15,14 @@ #include "simulation_3d.hpp" -#include "pffdtd/hdf.hpp" - #include +#include #include #include #include namespace { -#include - // linear indices to sub-indices in 3d, Nz continguous void ind2sub3d( int64_t idx, @@ -62,6 +59,8 @@ void check_inside_grid( } } // namespace +namespace pffdtd { + // load the sim data from Python-written HDF5 files void loadSimulation3D(Simulation3D& sim) { // local values, to read in and attach to struct at end @@ -970,3 +969,5 @@ void writeOutputs(Simulation3D& sim) { writer.write("u_out", std::span{u_out}, Nr, Nt); std::puts("wrote output dataset"); } + +} // namespace pffdtd diff --git a/src/cpp/pffdtd/simulation_3d.hpp b/src/cpp/pffdtd/simulation_3d.hpp index 8afa86f..9386727 100644 --- a/src/cpp/pffdtd/simulation_3d.hpp +++ b/src/cpp/pffdtd/simulation_3d.hpp @@ -16,10 +16,9 @@ #pragma once #include "pffdtd/config.hpp" +#include "pffdtd/hdf.hpp" #include "pffdtd/utility.hpp" -#include "hdf5.h" - #include // maximum number of RLC branches in freq-dep (FD) boundaries (needed at @@ -28,6 +27,8 @@ // maximum number of materials allows (needed at compile-time for CUDA) #define MNm 64 // change as necssary +namespace pffdtd { + // main sim data, on host struct Simulation3D { int64_t* bn_ixyz; // boundary node indices @@ -93,3 +94,5 @@ void readH5Dataset( DataType t ); void readH5Constant(hid_t file, char* dset_str, void* out, DataType t); + +} // namespace pffdtd diff --git a/src/cpp/pffdtd/utility.hpp b/src/cpp/pffdtd/utility.hpp index 22a373d..431f3d5 100644 --- a/src/cpp/pffdtd/utility.hpp +++ b/src/cpp/pffdtd/utility.hpp @@ -33,14 +33,6 @@ #define SET_BIT_VAL(var, pos, val) \ ((var) = ((var) & ~(1ULL << (pos))) | ((val) << (pos))) -enum DataType : unsigned char { - FLOAT64, - FLOAT32, - INT64, - INT8, - BOOL, -}; - void allocate_zeros(void** arr, uint64_t Nbytes); int cmpfunc_int64_keys(void const* a, void const* b); void sort_keys(int64_t* val_arr, int64_t* key_arr, int64_t N); diff --git a/src/cpp/pffdtd/video.cpp b/src/cpp/pffdtd/video.cpp index fd9ebfa..a2ac387 100644 --- a/src/cpp/pffdtd/video.cpp +++ b/src/cpp/pffdtd/video.cpp @@ -1,5 +1,7 @@ #include "video.hpp" +#include "pffdtd/exception.hpp" + #include namespace pffdtd { diff --git a/src/cpp/pffdtd/video.hpp b/src/cpp/pffdtd/video.hpp index 33aa729..52f871a 100644 --- a/src/cpp/pffdtd/video.hpp +++ b/src/cpp/pffdtd/video.hpp @@ -1,11 +1,8 @@ #pragma once -#include "pffdtd/exception.hpp" - #include #include -#include namespace pffdtd {