Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port mixing length model to the gpu path (and gradient fix, part 2) #226

Merged
merged 10 commits into from
Aug 31, 2023
17 changes: 17 additions & 0 deletions src/BCintegrator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ using namespace mfem;
// Boundary face term: <F.n(u),[w]>
class BCintegrator : public NonlinearFormIntegrator {
friend class GradFaceIntegrator;
friend class M2ulPhyS;

protected:
MPI_Groups *groupsMPI;
Expand Down Expand Up @@ -108,6 +109,22 @@ class BCintegrator : public NonlinearFormIntegrator {
static void retrieveGradientsData_gpu(ParGridFunction *gradUp, DenseTensor &elGradUp, Array<int> &vdofs,
const int &num_equation, const int &dim, const int &totalDofs,
const int &elDofs);

boundaryCategory getAttributeCategory(int attr) const {
std::unordered_map<int, BoundaryCondition *>::const_iterator ibc = inletBCmap.find(attr);
std::unordered_map<int, BoundaryCondition *>::const_iterator obc = outletBCmap.find(attr);
std::unordered_map<int, BoundaryCondition *>::const_iterator wbc = wallBCmap.find(attr);
if (ibc != inletBCmap.end()) {
return INLET;
}
if (obc != outletBCmap.end()) {
return OUTLET;
}
if (wbc != wallBCmap.end()) {
return WALL;
}
return NUM_BC_CATEGORIES;
}
};

#endif // BCINTEGRATOR_HPP_
145 changes: 138 additions & 7 deletions src/M2ulPhyS.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@

#include "M2ulPhyS.hpp"

#include "wallBC.hpp"

M2ulPhyS::M2ulPhyS(TPS::Tps *tps)
: groupsMPI(new MPI_Groups(tps->getTPSCommWorld())),
nprocs_(groupsMPI->getTPSWorldSize()),
Expand Down Expand Up @@ -70,6 +72,8 @@ M2ulPhyS::M2ulPhyS(string &inputFileName, TPS::Tps *tps)
bcIntegrator->initBCs();
}

initIndirectionBC();

// set default solver state
exit_status_ = NORMAL;

Expand Down Expand Up @@ -379,10 +383,16 @@ void M2ulPhyS::initVariables() {

if (config.use_mixing_length) {
#if defined(_CUDA_) || defined(_HIP_)
mfem_error("MixingLengthTransport is not yet supported for GPU builds!");
// dynamic_cast on a device-side pointer doesn't work, so instead
// pass the TransportProperties pointer and cast it within
// instantiateDeviceMixingLengthTransport
TransportProperties *temporary_transport = transportPtr;
tpsGpuMalloc((void **)&transportPtr, sizeof(MixingLengthTransport));
gpu::instantiateDeviceMixingLengthTransport<<<1, 1>>>(d_mixture, config.mix_length_trans_input_,
temporary_transport, transportPtr);
#else
// Build mixing length transport using whatever molecular transport we've already instantiated
TransportProperties *temporary_transport = transportPtr;
MolecularTransport *temporary_transport = dynamic_cast<MolecularTransport *>(transportPtr);
transportPtr = new MixingLengthTransport(mixture, config, temporary_transport);
#endif
}
Expand Down Expand Up @@ -820,7 +830,7 @@ void M2ulPhyS::initIndirectionArrays() {
auto h_delta_el1 = face_data.delta_el1.HostWrite();
auto h_delta_el2 = face_data.delta_el2.HostWrite();
auto h_dist1 = face_data.dist1.HostWrite();
auto h_dist2 = face_data.dist1.HostWrite();
auto h_dist2 = face_data.dist2.HostWrite();

Vector xyz(dim);

Expand Down Expand Up @@ -903,7 +913,8 @@ void M2ulPhyS::initIndirectionArrays() {
const ParFiniteElementSpace *dist_fes = distance_->ParFESpace();

Array<int> dist_dofs1;
dist_fes->GetElementVDofs(tr->Elem1->ElementNo, dist_dofs1);
// dist_fes->GetElementVDofs(tr->Elem1->ElementNo, dist_dofs1);
dist_fes->GetElementVDofs(tr->Elem1No, dist_dofs1);
dist1.SetSize(dist_dofs1.Size());
distance_->GetSubVector(dist_dofs1, dist1);

Expand Down Expand Up @@ -968,7 +979,17 @@ void M2ulPhyS::initIndirectionArrays() {
boundaryFaceIntegrationData &bdry_face_data = gpu_precomputed_data_.boundary_face_data;

// This is supposed to be number of boundary faces, and for
// non-periodic cases it is. See #199 for more info.
// non-periodic cases it is. But, for periodic meshes, it includes
// periodic boundary faces---i.e., not only "true" boundary faces.

// But, just switching to mesh->GetNFbyType(FaceType::Boundary) is
// problematic, since the face index expected by, for example, tr =
// mesh->GetBdrFaceTransformations(f) ranges over mesh->GetNBE().
// Here we set up another array (abf_to_rbf) that maps from the
// "actual" boundary faces to the index within the array of boundary
// faces that includes the periodic faces.

// See #199 for more info.
const int NumBCelems = fes->GetNBE();

if (NumBCelems > 0) {
Expand All @@ -993,7 +1014,7 @@ void M2ulPhyS::initIndirectionArrays() {
auto h_face_quad_weight = bdry_face_data.quad_weight.HostWrite();

bdry_face_data.el.SetSize(NumBCelems);
bdry_face_data.el = 0;
bdry_face_data.el = -1;
auto h_face_el = bdry_face_data.el.HostWrite();

bdry_face_data.num_quad.SetSize(NumBCelems);
Expand All @@ -1005,13 +1026,34 @@ void M2ulPhyS::initIndirectionArrays() {
bdry_face_data.delta_el1 = 0.;
auto h_bdry_delta_el1 = bdry_face_data.delta_el1.HostWrite();

bdry_face_data.bc_category.SetSize(NumBCelems);
bdry_face_data.bc_category = NUM_BC_CATEGORIES;
auto h_bc_category = bdry_face_data.bc_category.HostWrite();

const FiniteElement *fe;
FaceElementTransformations *tr;
Mesh *mesh = fes->GetMesh();
// Mesh *mesh = fes->GetMesh();

// NB: *Must* call this here, as otherwise some faces are
// erroneously included as boundary faces and asserts below may
// fail
mesh->ExchangeFaceNbrNodes();
mesh->ExchangeFaceNbrData();

std::vector<int> uniqueElems;
uniqueElems.clear();

std::vector<int> rbf_to_abf;
rbf_to_abf.clear();

int numRealBCFaces = 0;

for (int f = 0; f < NumBCelems; f++) {
tr = mesh->GetBdrFaceTransformations(f);
if (tr != NULL) {
rbf_to_abf.push_back(f);

numRealBCFaces += 1;
fe = fes->GetFE(tr->Elem1No);

const int elDof = fe->GetDof();
Expand All @@ -1030,6 +1072,12 @@ void M2ulPhyS::initIndirectionArrays() {
h_face_num_quad[f] = ir->GetNPoints();
h_bdry_delta_el1[f] = mesh->GetElementSize(tr->Elem1No, 1) / fe->GetOrder();

bool inList = false;
for (size_t n = 0; n < uniqueElems.size(); n++) {
if (uniqueElems[n] == tr->Elem1No) inList = true;
}
if (!inList) uniqueElems.push_back(tr->Elem1No);

for (int q = 0; q < ir->GetNPoints(); q++) {
const IntegrationPoint &ip = ir->IntPoint(q);
tr->SetAllIntPoints(&ip);
Expand All @@ -1054,6 +1102,42 @@ void M2ulPhyS::initIndirectionArrays() {
}
}
}

assert(rbf_to_abf.size() == mesh->GetNFbyType(FaceType::Boundary));
assert(rbf_to_abf.size() == numRealBCFaces);

bdry_face_data.rbf_to_abf.SetSize(rbf_to_abf.size());
auto h_rbf_to_abf = bdry_face_data.rbf_to_abf.HostWrite();

for (size_t i = 0; i < rbf_to_abf.size(); i++) {
printf("iface = %d, true_face = %d\n", i, rbf_to_abf[i]);
fflush(stdout);
h_rbf_to_abf[i] = rbf_to_abf[i];
}

bdry_face_data.elements_to_faces.SetSize(7 * uniqueElems.size());
bdry_face_data.elements_to_faces = -1;
auto h_elements_to_faces = bdry_face_data.elements_to_faces.HostWrite();
for (size_t i = 0; i < uniqueElems.size(); i++) {
const int eli = uniqueElems[i];
for (int f = 0; f < bdry_face_data.el.Size(); f++) {
if (eli == h_face_el[f]) {
// 0 + 7 * i = element number
h_elements_to_faces[0 + 7 * i] = h_face_el[f];

// 1 + 7 * i = number of boundary faces on this elem
int numFace = h_elements_to_faces[1 + 7 * i];
if (numFace == -1) numFace = 0;
numFace++;

// 1 + (j+1) + 7 * i = jth boundary face on ith unique boundary element
h_elements_to_faces[1 + numFace + 7 * i] = f;

// update number of faces
h_elements_to_faces[1 + 7 * i] = numFace;
}
}
}
}

//-----------------------------------------------------------------
Expand Down Expand Up @@ -1283,6 +1367,53 @@ void M2ulPhyS::initIndirectionArrays() {
}
}

void M2ulPhyS::initIndirectionBC() {
boundaryFaceIntegrationData &bdry_face_data = gpu_precomputed_data_.boundary_face_data;

// This is supposed to be number of boundary faces, and for
// non-periodic cases it is. See #199 for more info.
const int NumBCelems = fes->GetNBE();

if (NumBCelems > 0) {
bdry_face_data.bc_category.SetSize(NumBCelems);
bdry_face_data.bc_category = NUM_BC_CATEGORIES;
auto h_bc_category = bdry_face_data.bc_category.HostWrite();

bdry_face_data.use_bc_in_grad.SetSize(NumBCelems);
bdry_face_data.use_bc_in_grad = false;
auto h_use_bc_in_grad = bdry_face_data.use_bc_in_grad.HostWrite();

bdry_face_data.wall_bc_temperature.SetSize(NumBCelems);
bdry_face_data.wall_bc_temperature = false;
bdry_face_data.wall_bc_temperature.UseDevice(true);
auto h_wall_bc_temperature = bdry_face_data.wall_bc_temperature.HostWrite();

FaceElementTransformations *tr;
Mesh *mesh = fes->GetMesh();

for (int f = 0; f < NumBCelems; f++) {
tr = mesh->GetBdrFaceTransformations(f);
if (tr != NULL && bcIntegrator != NULL) {
int attr = tr->Attribute;
h_bc_category[f] = bcIntegrator->getAttributeCategory(attr);
if (config.useBCinGrad && h_bc_category[f] == WALL) {
std::unordered_map<int, BoundaryCondition *>::const_iterator wbci = bcIntegrator->wallBCmap.find(attr);
if (wbci != bcIntegrator->wallBCmap.end()) {
WallBC *wbc = dynamic_cast<WallBC *>(wbci->second);
WallType wt = wbc->getType();
if (wt == VISC_ISOTH) {
printf("Using BC in Gradient!\n");
fflush(stdout);
h_use_bc_in_grad[f] = true;
h_wall_bc_temperature[f] = wbc->getWallTemp();
}
}
}
}
}
}
}

M2ulPhyS::~M2ulPhyS() {
if (rank0_) histFile.close();

Expand Down
10 changes: 10 additions & 0 deletions src/M2ulPhyS.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,16 @@ class M2ulPhyS : public TPS::Solver {
*/
void initIndirectionArrays();

/** @brief Fill BC part of precomputedIntegrationData struct
*
* The _GPU_ path required some BC information to be evaluated and
* stored at the beginning of a simulation. These quantities are
* set by this function, which is separate from
* initIndirectionArrays() b/c it must be called after the boundary
* conditions are initialized.
*/
void initIndirectionBC();

void initVariables();
void initSolutionAndVisualizationVectors();
void initialTimeStep();
Expand Down
80 changes: 20 additions & 60 deletions src/argon_transport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ ArgonMinimalTransport::ArgonMinimalTransport(GasMixture *_mixture, RunConfigurat
: ArgonMinimalTransport(_mixture, _runfile.argonTransportInput) {}

MFEM_HOST_DEVICE ArgonMinimalTransport::ArgonMinimalTransport(GasMixture *_mixture, const ArgonTransportInput &inputs)
: TransportProperties(_mixture) {
: MolecularTransport(_mixture) {
viscosityFactor_ = 5. / 16. * sqrt(PI_ * kB_);
kOverEtaFactor_ = 15. / 4. * kB_;
diffusivityFactor_ = 3. / 16. * sqrt(2.0 * PI_ * kB_) / AVOGADRONUMBER;
Expand Down Expand Up @@ -114,7 +114,7 @@ MFEM_HOST_DEVICE ArgonMinimalTransport::ArgonMinimalTransport(GasMixture *_mixtu
setArtificialMultipliers(inputs);
}

MFEM_HOST_DEVICE ArgonMinimalTransport::ArgonMinimalTransport(GasMixture *_mixture) : TransportProperties(_mixture) {
MFEM_HOST_DEVICE ArgonMinimalTransport::ArgonMinimalTransport(GasMixture *_mixture) : MolecularTransport(_mixture) {
viscosityFactor_ = 5. / 16. * sqrt(PI_ * kB_);
kOverEtaFactor_ = 15. / 4. * kB_;
diffusivityFactor_ = 3. / 16. * sqrt(2.0 * PI_ * kB_) / AVOGADRONUMBER;
Expand Down Expand Up @@ -169,19 +169,10 @@ MFEM_HOST_DEVICE collisionInputs ArgonMinimalTransport::computeCollisionInputs(c
return collInputs;
}

void ArgonMinimalTransport::ComputeFluxTransportProperties(const Vector &state, const DenseMatrix &gradUp,
const Vector &Efield, double radius, double distance,
Vector &transportBuffer, DenseMatrix &diffusionVelocity) {
transportBuffer.SetSize(FluxTrns::NUM_FLUX_TRANS);
diffusionVelocity.SetSize(numSpecies, nvel_);
ComputeFluxTransportProperties(&state[0], gradUp.Read(), &Efield[0], radius, distance, &transportBuffer[0],
diffusionVelocity.Write());
}

MFEM_HOST_DEVICE void ArgonMinimalTransport::ComputeFluxTransportProperties(const double *state, const double *gradUp,
const double *Efield, double radius,
double distance, double *transportBuffer,
double *diffusionVelocity) {
MFEM_HOST_DEVICE void ArgonMinimalTransport::ComputeFluxMolecularTransport(const double *state, const double *gradUp,
const double *Efield,
double *transportBuffer,
double *diffusionVelocity) {
// transportBuffer.SetSize(FluxTrns::NUM_FLUX_TRANS);
for (int p = 0; p < FluxTrns::NUM_FLUX_TRANS; p++) transportBuffer[p] = 0.0;

Expand Down Expand Up @@ -423,22 +414,11 @@ MFEM_HOST_DEVICE void ArgonMinimalTransport::computeMixtureAverageDiffusivity(co
CurtissHirschfelder(X_sp, Y_sp, binaryDiff, diffusivity);
}

void ArgonMinimalTransport::ComputeSourceTransportProperties(const Vector &state, const Vector &Up,
const DenseMatrix &gradUp, const Vector &Efield,
double distance, Vector &globalTransport,
DenseMatrix &speciesTransport,
DenseMatrix &diffusionVelocity, Vector &n_sp) {
globalTransport.SetSize(SrcTrns::NUM_SRC_TRANS);
speciesTransport.SetSize(numSpecies, SpeciesTrns::NUM_SPECIES_COEFFS);
diffusionVelocity.SetSize(numSpecies, nvel_);
n_sp.SetSize(3);
ComputeSourceTransportProperties(&state[0], &Up[0], gradUp.Read(), &Efield[0], distance, &globalTransport[0],
speciesTransport.Write(), diffusionVelocity.Write(), &n_sp[0]);
}

MFEM_HOST_DEVICE void ArgonMinimalTransport::ComputeSourceTransportProperties(
const double *state, const double *Up, const double *gradUp, const double *Efield, double distance,
double *globalTransport, double *speciesTransport, double *diffusionVelocity, double *n_sp) {
MFEM_HOST_DEVICE void ArgonMinimalTransport::ComputeSourceMolecularTransport(const double *state, const double *Up,
const double *gradUp, const double *Efield,
double *globalTransport,
double *speciesTransport,
double *diffusionVelocity, double *n_sp) {
for (int p = 0; p < SrcTrns::NUM_SRC_TRANS; p++) globalTransport[p] = 0.0;
for (int p = 0; p < SpeciesTrns::NUM_SPECIES_COEFFS; p++)
for (int sp = 0; sp < numSpecies; sp++) speciesTransport[sp + p * numSpecies] = 0.0;
Expand Down Expand Up @@ -792,19 +772,10 @@ MFEM_HOST_DEVICE double ArgonMixtureTransport::collisionIntegral(const int _spI,
return -1;
}

void ArgonMixtureTransport::ComputeFluxTransportProperties(const Vector &state, const DenseMatrix &gradUp,
const Vector &Efield, double radius, double distance,
Vector &transportBuffer, DenseMatrix &diffusionVelocity) {
transportBuffer.SetSize(FluxTrns::NUM_FLUX_TRANS);
diffusionVelocity.SetSize(numSpecies, nvel_);
ComputeFluxTransportProperties(&state[0], gradUp.Read(), &Efield[0], radius, distance, &transportBuffer[0],
diffusionVelocity.Write());
}

MFEM_HOST_DEVICE void ArgonMixtureTransport::ComputeFluxTransportProperties(const double *state, const double *gradUp,
const double *Efield, double radius,
double distance, double *transportBuffer,
double *diffusionVelocity) {
MFEM_HOST_DEVICE void ArgonMixtureTransport::ComputeFluxMolecularTransport(const double *state, const double *gradUp,
const double *Efield,
double *transportBuffer,
double *diffusionVelocity) {
for (int p = 0; p < FluxTrns::NUM_FLUX_TRANS; p++) transportBuffer[p] = 0.0;

double primitiveState[gpudata::MAXEQUATIONS];
Expand Down Expand Up @@ -926,22 +897,11 @@ MFEM_HOST_DEVICE double ArgonMixtureTransport::computeThirdOrderElectronThermalC
(L11 - L12 * L12 / L22);
}

void ArgonMixtureTransport::ComputeSourceTransportProperties(const Vector &state, const Vector &Up,
const DenseMatrix &gradUp, const Vector &Efield,
double distance, Vector &globalTransport,
DenseMatrix &speciesTransport,
DenseMatrix &diffusionVelocity, Vector &n_sp) {
globalTransport.SetSize(SrcTrns::NUM_SRC_TRANS);
speciesTransport.SetSize(numSpecies, SpeciesTrns::NUM_SPECIES_COEFFS);
diffusionVelocity.SetSize(numSpecies, nvel_);
n_sp.SetSize(numSpecies);
ComputeSourceTransportProperties(&state[0], &Up[0], gradUp.Read(), &Efield[0], distance, &globalTransport[0],
speciesTransport.Write(), diffusionVelocity.Write(), &n_sp[0]);
}

MFEM_HOST_DEVICE void ArgonMixtureTransport::ComputeSourceTransportProperties(
const double *state, const double *Up, const double *gradUp, const double *Efield, double distance,
double *globalTransport, double *speciesTransport, double *diffusionVelocity, double *n_sp) {
MFEM_HOST_DEVICE void ArgonMixtureTransport::ComputeSourceMolecularTransport(const double *state, const double *Up,
const double *gradUp, const double *Efield,
double *globalTransport,
double *speciesTransport,
double *diffusionVelocity, double *n_sp) {
for (int p = 0; p < SrcTrns::NUM_SRC_TRANS; p++) globalTransport[p] = 0.0;
for (int p = 0; p < SpeciesTrns::NUM_SPECIES_COEFFS; p++)
for (int sp = 0; sp < numSpecies; sp++) speciesTransport[sp + p * numSpecies] = 0.0;
Expand Down
Loading