Skip to content

Commit

Permalink
Merge pull request #285 from hklion/gpu_orlanski
Browse files Browse the repository at this point in the history
update orlanski BCs so that edges are updated first, then copied to g…
  • Loading branch information
hklion authored Nov 7, 2024
2 parents 4661a01 + a6d0ba6 commit 0c87665
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 108 deletions.
38 changes: 34 additions & 4 deletions Source/BoundaryConditions/BoundaryConditions_netcdf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,20 @@ REMORA::fill_from_bdyfiles (MultiFab& mf_to_fill, const MultiFab& mf_mask, const
Box xhi_ylo = xhi & ylo;
Box xhi_yhi = xhi & yhi;

// Box xlo_edge = xlo; xlo_edge.setSmall(0,dom_lo.x-1+mf_index_type[0]); xlo_edge.setBig(0,dom_lo.x-1+mf_index_type[0]);
// Box xhi_edge = xhi; xhi_edge.setSmall(0,dom_hi.x+1-mf_index_type[0]); xhi_edge.setBig(0,dom_hi.x+1-mf_index_type[0]);
// Box ylo_edge = ylo; ylo_edge.setSmall(1,dom_lo.y-1+mf_index_type[1]); ylo_edge.setBig(1,dom_lo.x-1+mf_index_type[1]);
// Box yhi_edge = yhi; yhi_edge.setSmall(1,dom_hi.y+1-mf_index_type[1]); yhi_edge.setBig(1,dom_hi.x+1-mf_index_type[1]);
Box xlo_edge = xlo; xlo_edge.setSmall(0,ubound(xlo).x); xlo_edge.setBig(0,ubound(xlo).x);
Box xhi_edge = xhi; xhi_edge.setSmall(0,lbound(xhi).x); xhi_edge.setBig(0,lbound(xhi).x);
Box ylo_edge = ylo; ylo_edge.setSmall(1,ubound(ylo).y); ylo_edge.setBig(1,ubound(ylo).y);
Box yhi_edge = yhi; yhi_edge.setSmall(1,lbound(ylo).y); yhi_edge.setBig(1,lbound(ylo).y);

Box xlo_ghost = xlo; xlo_ghost.setBig(0,ubound(xlo).x-1);
Box xhi_ghost = xhi; xhi_ghost.setSmall(0,lbound(xhi).x+1);
Box ylo_ghost = ylo; ylo_ghost.setBig(1,ubound(ylo).y-1);
Box yhi_ghost = yhi; yhi_ghost.setSmall(1,lbound(ylo).y+1);

const Array4<Real>& dest_arr = mf_to_fill.array(mfi);
const Array4<const Real>& mask_arr = mf_mask.array(mfi);
const Array4<const Real>& calc_arr = (!null_mf_calc) ? mf_calc.array(mfi) : Array4<amrex::Real>();
Expand Down Expand Up @@ -167,7 +181,7 @@ REMORA::fill_from_bdyfiles (MultiFab& mf_to_fill, const MultiFab& mf_mask, const
const amrex::BCRec* bc_ptr = bcrs_d.data();

if (!xlo.isEmpty()) {
ParallelFor(grow(xlo,IntVect(0,-1,0)), [=] AMREX_GPU_DEVICE (int i, int j, int k)
ParallelFor(grow(xlo_edge,IntVect(0,-1,0)), [=] AMREX_GPU_DEVICE (int i, int j, int k)
{
Real bry_val = (oma * bdatxlo_n (ubound(xlo).x,j,k,0)
+ alpha * bdatxlo_np1(ubound(xlo).x,j,k,0));
Expand Down Expand Up @@ -217,10 +231,14 @@ REMORA::fill_from_bdyfiles (MultiFab& mf_to_fill, const MultiFab& mf_mask, const
dest_arr(i,j,k,icomp+icomp_to_fill) = mask_arr(i,j,0) * (dest_arr(dom_lo.x-1+mf_index_type[0],j,k,icomp+icomp_to_fill) + tau * (bry_val - calc_arr(dom_lo.x-1+mf_index_type[0],j,k,icomp+icomp_to_fill)));
}
});
ParallelFor(grow(xlo_ghost,IntVect(0,-1,0)), [=] AMREX_GPU_DEVICE (int i, int j, int k)
{
dest_arr(i,j,k,icomp+icomp_to_fill) = dest_arr(dom_lo.x+mf_index_type[0]-1,j,k,icomp+icomp_to_fill);
});
}

if (!xhi.isEmpty()) {
ParallelFor(grow(xhi,IntVect(0,-1,0)), [=] AMREX_GPU_DEVICE (int i, int j, int k)
ParallelFor(grow(xhi_edge,IntVect(0,-1,0)), [=] AMREX_GPU_DEVICE (int i, int j, int k)
{
Real bry_val = (oma * bdatxhi_n (lbound(xhi).x,j,k,0)
+ alpha * bdatxhi_np1(lbound(xhi).x,j,k,0));
Expand Down Expand Up @@ -271,10 +289,14 @@ REMORA::fill_from_bdyfiles (MultiFab& mf_to_fill, const MultiFab& mf_mask, const
dest_arr(i,j,k,icomp+icomp_to_fill) = mask_arr(i,j,0) * (dest_arr(dom_hi.x+1-mf_index_type[0],j,k,icomp+icomp_to_fill) + tau * (bry_val - calc_arr(dom_hi.x+1-mf_index_type[0],j,k,icomp+icomp_to_fill)));
}
});
ParallelFor(grow(xhi_ghost,IntVect(0,-1,0)), [=] AMREX_GPU_DEVICE (int i, int j, int k)
{
dest_arr(i,j,k,icomp+icomp_to_fill) = dest_arr(dom_hi.x-mf_index_type[0]+1,j,k,icomp+icomp_to_fill);
});
}

if (!ylo.isEmpty()) {
ParallelFor(grow(ylo,IntVect(-1,0,0)), [=] AMREX_GPU_DEVICE (int i, int j, int k)
ParallelFor(grow(ylo_edge,IntVect(-1,0,0)), [=] AMREX_GPU_DEVICE (int i, int j, int k)
{
Real bry_val = (oma * bdatylo_n (i,ubound(ylo).y,k,0)
+ alpha * bdatylo_np1(i,ubound(ylo).y,k,0));
Expand Down Expand Up @@ -326,10 +348,14 @@ REMORA::fill_from_bdyfiles (MultiFab& mf_to_fill, const MultiFab& mf_mask, const
dest_arr(i,j,k,icomp+icomp_to_fill) = mask_arr(i,j,0) * (dest_arr(i,dom_lo.y-1+mf_index_type[1],k,icomp+icomp_to_fill) + tau * (bry_val - calc_arr(i,dom_lo.y-1+mf_index_type[1],k,icomp+icomp_to_fill)));
}
});
ParallelFor(grow(ylo_ghost,IntVect(-1,0,0)), [=] AMREX_GPU_DEVICE (int i, int j, int k)
{
dest_arr(i,j,k,icomp+icomp_to_fill) = dest_arr(i,dom_lo.y+mf_index_type[1]-1,k,icomp+icomp_to_fill);
});
}

if (!yhi.isEmpty()) {
ParallelFor(grow(yhi,IntVect(-1,0,0)), [=] AMREX_GPU_DEVICE (int i, int j, int k)
ParallelFor(grow(yhi_edge,IntVect(-1,0,0)), [=] AMREX_GPU_DEVICE (int i, int j, int k)
{
Real bry_val = (oma * bdatyhi_n (i,lbound(yhi).y,k,0)
+ alpha * bdatyhi_np1(i,lbound(yhi).y,k,0)) * mask_arr(i,j,0);
Expand Down Expand Up @@ -381,6 +407,10 @@ REMORA::fill_from_bdyfiles (MultiFab& mf_to_fill, const MultiFab& mf_mask, const
dest_arr(i,j,k,icomp+icomp_to_fill) = mask_arr(i,j,0) * (dest_arr(i,dom_hi.y+1-mf_index_type[1],k,icomp+icomp_to_fill) + tau * (bry_val - calc_arr(i,dom_hi.y+1-mf_index_type[1],k,icomp+icomp_to_fill)));
}
});
ParallelFor(grow(yhi_ghost,IntVect(-1,0,0)), [=] AMREX_GPU_DEVICE (int i, int j, int k)
{
dest_arr(i,j,k,icomp+icomp_to_fill) = dest_arr(i,dom_hi.y-mf_index_type[1]+1,k,icomp+icomp_to_fill);
});

}
// If we've applied boundary conditions to either side, update the corner
Expand Down
80 changes: 29 additions & 51 deletions Source/BoundaryConditions/BoundaryConditions_xvel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,31 +60,6 @@ void REMORAPhysBCFunct::impose_xvel_bcs (const Array4<Real>& dest_arr, const Box
Box bx_xlo_face(bx); bx_xlo_face.setSmall(0,dom_lo.x ); bx_xlo_face.setBig(0,dom_lo.x );
Box bx_xhi_face(bx); bx_xhi_face.setSmall(0,dom_hi.x+1); bx_xhi_face.setBig(0,dom_hi.x+1);
ParallelFor(
grow(bx_xlo,IntVect(0,-1,0)), ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) {
int inner = (bc_ptr[n].lo(0) == REMORABCType::foextrap) ? 1 : 0;
int iflip = dom_lo.x - i;
if (bc_ptr[n].lo(0) == REMORABCType::ext_dir) {
dest_arr(i,j,k) = l_bc_extdir_vals_d[n][0]*msku(i,j,0);
} else if (bc_ptr[n].lo(0) == REMORABCType::foextrap || bc_ptr[n].lo(0) == REMORABCType::clamped) {
dest_arr(i,j,k) = dest_arr(dom_lo.x+inner,j,k)*msku(i,j,0);
} else if (bc_ptr[n].lo(0) == REMORABCType::orlanski_rad) {
Real grad_lo = calc_arr(dom_lo.x ,j ,k) - calc_arr(dom_lo.x ,j-1,k);
Real grad_lo_ip1 = calc_arr(dom_lo.x+1,j ,k) - calc_arr(dom_lo.x+1,j-1,k);
Real grad_lo_jp1 = calc_arr(dom_lo.x ,j+1,k) - calc_arr(dom_lo.x ,j ,k);
Real grad_lo_ijp1 = calc_arr(dom_lo.x+1,j+1,k) - calc_arr(dom_lo.x+1,j ,k);
Real dUdt = calc_arr(dom_lo.x+1,j,k) - dest_arr(dom_lo.x+1,j,k);
Real dUdx = dest_arr(dom_lo.x+1,j,k) - dest_arr(dom_lo.x+2,j,k);
if (dUdt * dUdx < 0.0_rt) dUdt = 0.0_rt;
Real dUde = (dUdt * (grad_lo_ip1 + grad_lo_ijp1) > 0.0_rt) ? grad_lo_ip1 : grad_lo_ijp1;
Real cff = std::max(dUdx*dUdx+dUde*dUde,eps);
Real Cx = dUdt*dUdx;
dest_arr(i,j,k) = (cff * calc_arr(i,j,k) + Cx * dest_arr(dom_lo.x+1,j,k)) * msku(i,j,0) / (cff+Cx);
} else if (bc_ptr[n].lo(0) == REMORABCType::reflect_even) {
dest_arr(i,j,k) = dest_arr(iflip,j,k)*msku(i,j,0);
} else if (bc_ptr[n].lo(0) == REMORABCType::reflect_odd) {
dest_arr(i,j,k) = -dest_arr(iflip,j,k)*msku(i,j,0);
}
},
// We only set the values on the domain faces themselves if EXT_DIR or actual outflow
grow(bx_xlo_face,IntVect(0,-1,0)), ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) {
if (bc_ptr[n].lo(0) == REMORABCType::ext_dir) {
Expand All @@ -104,35 +79,24 @@ void REMORAPhysBCFunct::impose_xvel_bcs (const Array4<Real>& dest_arr, const Box
Real Cx = dUdt*dUdx;
dest_arr(i,j,k) = (cff * calc_arr(i,j,k) + Cx * dest_arr(dom_lo.x+1,j,k)) * msku(i,j,0) / (cff+Cx);
}
}
);
});
ParallelFor(
grow(bx_xhi,IntVect(0,-1,0)), ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) {
int iflip = 2*(dom_hi.x + 1) - i;
int inner = (bc_ptr[n].hi(0) == REMORABCType::foextrap) ? 1 : 0;
if (bc_ptr[n].hi(0) == REMORABCType::ext_dir) {
dest_arr(i,j,k) = l_bc_extdir_vals_d[n][3]*msku(i,j,0);
} else if (bc_ptr[n].hi(0) == REMORABCType::foextrap || bc_ptr[n].hi(0) == REMORABCType::clamped) {
dest_arr(i,j,k) = dest_arr(dom_hi.x+1-inner,j,k)*msku(i,j,0);
} else if (bc_ptr[n].hi(0) == REMORABCType::orlanski_rad) {
Real grad_hi = calc_arr(dom_hi.x ,j ,k) - calc_arr(dom_hi.x ,j-1,k);
Real grad_hi_ip1 = calc_arr(dom_hi.x+1,j ,k) - calc_arr(dom_hi.x+1,j-1,k);
Real grad_hi_jp1 = calc_arr(dom_hi.x ,j+1,k) - calc_arr(dom_hi.x ,j ,k);
Real grad_hi_ijp1 = calc_arr(dom_hi.x+1,j+1,k) - calc_arr(dom_hi.x+1,j ,k);
Real dUdt = calc_arr(dom_hi.x,j,k) - dest_arr(dom_hi.x ,j,k);
Real dUdx = dest_arr(dom_hi.x,j,k) - dest_arr(dom_hi.x-1,j,k);
if (dUdt * dUdx < 0.0_rt) dUdt = 0.0_rt;
Real dUde = (dUdt * (grad_hi + grad_hi_jp1) > 0.0_rt) ? grad_hi : grad_hi_jp1;
Real cff = std::max(dUdx*dUdx+dUde*dUde,eps);
Real Cx = dUdt * dUdx;
dest_arr(i,j,k) = (cff * calc_arr(dom_hi.x+1,j,k) + Cx * dest_arr(dom_hi.x,j,k)) * msku(i,j,0) / (cff + Cx);
} else if (bc_ptr[n].hi(0) == REMORABCType::reflect_even) {
grow(bx_xlo,IntVect(0,-1,0)), ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) {
int inner = (bc_ptr[n].lo(0) == REMORABCType::foextrap) ? 1 : 0;
int iflip = dom_lo.x - i;
if (bc_ptr[n].lo(0) == REMORABCType::ext_dir) {
dest_arr(i,j,k) = l_bc_extdir_vals_d[n][0]*msku(i,j,0);
} else if (bc_ptr[n].lo(0) == REMORABCType::foextrap || bc_ptr[n].lo(0) == REMORABCType::clamped ||
bc_ptr[n].lo(0) == REMORABCType::orlanski_rad) {
dest_arr(i,j,k) = dest_arr(dom_lo.x+inner,j,k)*msku(i,j,0);
} else if (bc_ptr[n].lo(0) == REMORABCType::reflect_even) {
dest_arr(i,j,k) = dest_arr(iflip,j,k)*msku(i,j,0);
} else if (bc_ptr[n].hi(0) == REMORABCType::reflect_odd) {
} else if (bc_ptr[n].lo(0) == REMORABCType::reflect_odd) {
dest_arr(i,j,k) = -dest_arr(iflip,j,k)*msku(i,j,0);
}
},
});
// We only set the values on the domain faces themselves if EXT_DIR or actual outflow
ParallelFor(
grow(bx_xhi_face,IntVect(0,-1,0)), ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) {
if (bc_ptr[n].hi(0) == REMORABCType::ext_dir) {
dest_arr(i,j,k) = l_bc_extdir_vals_d[n][3]*msku(i,j,0);
Expand All @@ -151,8 +115,22 @@ void REMORAPhysBCFunct::impose_xvel_bcs (const Array4<Real>& dest_arr, const Box
Real Cx = dUdt * dUdx;
dest_arr(i,j,k) = (cff * calc_arr(dom_hi.x+1,j,k) + Cx * dest_arr(dom_hi.x,j,k)) * msku(i,j,0) / (cff + Cx);
}
}
);
});
ParallelFor(
grow(bx_xhi,IntVect(0,-1,0)), ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) {
int iflip = 2*(dom_hi.x + 1) - i;
int inner = (bc_ptr[n].hi(0) == REMORABCType::foextrap) ? 1 : 0;
if (bc_ptr[n].hi(0) == REMORABCType::ext_dir) {
dest_arr(i,j,k) = l_bc_extdir_vals_d[n][3]*msku(i,j,0);
} else if (bc_ptr[n].hi(0) == REMORABCType::foextrap || bc_ptr[n].hi(0) == REMORABCType::clamped ||
bc_ptr[n].hi(0) == REMORABCType::orlanski_rad) {
dest_arr(i,j,k) = dest_arr(dom_hi.x+1-inner,j,k)*msku(i,j,0);
} else if (bc_ptr[n].hi(0) == REMORABCType::reflect_even) {
dest_arr(i,j,k) = dest_arr(iflip,j,k)*msku(i,j,0);
} else if (bc_ptr[n].hi(0) == REMORABCType::reflect_odd) {
dest_arr(i,j,k) = -dest_arr(iflip,j,k)*msku(i,j,0);
}
});
} // not periodic in x

if (!is_periodic_in_y or bccomp==BCVars::foextrap_bc)
Expand Down
Loading

0 comments on commit 0c87665

Please sign in to comment.