diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 6f6fd0bcc4..0c696300f8 100755 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -231,12 +231,25 @@ steps: command: "srun julia --color=yes --check-bounds=yes --project=test test/Remapping/distributed_remapping.jl" env: CLIMACOMMS_CONTEXT: "MPI" + CLIMACOMMS_DEVICE: "CPU" agents: slurm_ntasks: 2 - label: "Unit: distributed remapping (1 process)" key: distributed_remapping_1proc command: "julia --color=yes --check-bounds=yes --project=test test/Remapping/distributed_remapping.jl" + env: + CLIMACOMMS_DEVICE: "CPU" + + - label: "Unit: distributed remapping with CUDA" + key: distributed_remapping + command: "srun julia --color=yes --check-bounds=yes --project=test test/Remapping/distributed_remapping.jl" + env: + CLIMACOMMS_CONTEXT: "MPI" + CLIMACOMMS_DEVICE: "CUDA" + agents: + slurm_ntasks: 2 + slurm_gpus: 1 - label: "Unit: distributed gather" key: unit_distributed_gather4 diff --git a/src/Remapping/Remapping.jl b/src/Remapping/Remapping.jl index 77ff1060e7..107775ff81 100644 --- a/src/Remapping/Remapping.jl +++ b/src/Remapping/Remapping.jl @@ -13,6 +13,7 @@ import ..DataLayouts, ..Hypsography using ..RecursiveApply +using CUDA include("interpolate_array.jl") include("distributed_remapping.jl") diff --git a/src/Remapping/interpolate_array.jl b/src/Remapping/interpolate_array.jl index 6a2467b30f..60cb859839 100644 --- a/src/Remapping/interpolate_array.jl +++ b/src/Remapping/interpolate_array.jl @@ -5,8 +5,7 @@ function interpolate_slab( ) space = axes(field) x = zero(eltype(field)) - QS = Spaces.quadrature_style(space) - Nq = Spaces.Quadratures.degrees_of_freedom(QS) + Nq = length(I1) for i in 1:Nq ij = CartesianIndex((i,)) @@ -22,22 +21,74 @@ function interpolate_slab( ) space = axes(field) x = zero(eltype(field)) - QS = Spaces.quadrature_style(space) - Nq = Spaces.Quadratures.degrees_of_freedom(QS) + Nq1, Nq2 = length(I1), length(I2) - for j in 1:Nq, i in 1:Nq + for j in 1:Nq2, i in 1:Nq1 ij = CartesianIndex((i, j)) x += I1[i] * I2[j] * Operators.get_node(space, field, ij, slabidx) end return x end +""" + vertical_indices_ref_coordinate(space, zcoord) + +Return the vertical indices of the elements below and above `zcoord`. + +Return also the correct reference coordinate `zcoord` for vertical interpolation. +""" +function vertical_indices end + +function vertical_indices_ref_coordinate( + space::Spaces.FaceExtrudedFiniteDifferenceSpace, + zcoord, +) + vert_topology = Spaces.vertical_topology(space) + vert_mesh = vert_topology.mesh + + velem = Meshes.containing_element(vert_mesh, zcoord) + ξ3, = Meshes.reference_coordinates(vert_mesh, velem, zcoord) + v_lo, v_hi = velem - half, velem + half + return v_lo, v_hi, ξ3 +end + +function vertical_indices_ref_coordinate( + space::Spaces.CenterExtrudedFiniteDifferenceSpace, + zcoord, +) + vert_topology = Spaces.vertical_topology(space) + vert_mesh = vert_topology.mesh + Nz = Spaces.nlevels(space) + + velem = Meshes.containing_element(vert_mesh, zcoord) + ξ3, = Meshes.reference_coordinates(vert_mesh, velem, zcoord) + if ξ3 < 0 + if Topologies.isperiodic(Spaces.vertical_topology(space)) + v_lo = mod1(velem - 1, Nz) + else + v_lo = max(velem - 1, 1) + end + v_hi = velem + ξ3 = ξ3 + 1 + else + v_lo = velem + if Topologies.isperiodic(Spaces.vertical_topology(space)) + v_hi = mod1(velem + 1, Nz) + else + v_hi = min(velem + 1, Nz) + end + ξ3 = ξ3 - 1 + end + return v_lo, v_hi, ξ3 +end + """ interpolate_slab_level( field::Fields.Field, h::Integer, Is::Tuple, - zcoord, + zcoord; + fill_value = eltype(field)(NaN) ) Vertically interpolate the given `field` on `zcoord`. @@ -49,47 +100,111 @@ element in a column, no interpolation is performed and the value at the cell cen returned. Effectively, this means that the interpolation is first-order accurate across the column, but zeroth-order accurate close to the boundaries. +Return `fill_value` when the vertical coordinate is negative. + """ -function interpolate_slab_level( +function interpolate_slab_level!( + output_array, field::Fields.Field, h::Integer, Is::Tuple, - zcoord, + zpts; + fill_value = eltype(field)(NaN), ) - space = axes(field) - vert_topology = Spaces.vertical_topology(space) - vert_mesh = vert_topology.mesh - Nz = Spaces.nlevels(space) + device = ClimaComms.device(field) + interpolate_slab_level!( + output_array, + field, + h, + Is, + zpts, + device; + fill_value, + ) +end - velem = Meshes.containing_element(vert_mesh, zcoord) - ξ3, = Meshes.reference_coordinates(vert_mesh, velem, zcoord) - if space isa Spaces.FaceExtrudedFiniteDifferenceSpace - v_lo = velem - half - v_hi = velem + half - elseif space isa Spaces.CenterExtrudedFiniteDifferenceSpace - if ξ3 < 0 - if Topologies.isperiodic(Spaces.vertical_topology(space)) - v_lo = mod1(velem - 1, Nz) - else - v_lo = max(velem - 1, 1) - end - v_hi = velem - ξ3 = ξ3 + 1 - else - v_lo = velem - if Topologies.isperiodic(Spaces.vertical_topology(space)) - v_hi = mod1(velem + 1, Nz) - else - v_hi = min(velem + 1, Nz) - end - ξ3 = ξ3 - 1 - end +function interpolate_slab_level!( + output_array, + field::Fields.Field, + h::Integer, + Is::Tuple, + zpts, + device::ClimaComms.AbstractCPUDevice; + fill_value = Spaces.undertype(axes(field))(NaN), +) + output_array .= map(zpts) do (zcoord) + zcoord.z < 0 && return fill_value + + v_lo, v_hi, ξ3 = + vertical_indices_ref_coordinate(axes(field), zcoord) + + f_lo = interpolate_slab(field, Fields.SlabIndex(v_lo, h), Is) + f_hi = interpolate_slab(field, Fields.SlabIndex(v_hi, h), Is) + return ((1 - ξ3) * f_lo + (1 + ξ3) * f_hi) / 2 end - f_lo = interpolate_slab(field, Fields.SlabIndex(v_lo, h), Is) - f_hi = interpolate_slab(field, Fields.SlabIndex(v_hi, h), Is) - return ((1 - ξ3) * f_lo + (1 + ξ3) * f_hi) / 2 end +function interpolate_slab_level!( + output_array, + field::Fields.Field, + h::Integer, + Is::Tuple, + zpts, + device::ClimaComms.CUDADevice; + fill_value = Spaces.undertype(axes(field))(NaN), +) + # We have to deal with topography and NaNs. For that, we select the points that have z + # >= 0 (above the surface) and interpolate only those on the GPU. Then, we fill the + # output array with fill_value and overwrite only those values that we computed with + # interpolation. This is a simple way to avoid having branching on the GPU to check if + # z>0. + + positive_zcoords_indices = [z.z >= 0 for z in zpts] + + vertical_indices_ref_coordinates = CuArray([ + vertical_indices_ref_coordinate(axes(field), zcoord) for + zcoord in zpts[positive_zcoords_indices] + ]) + + output_cuarray = CuArray( + zeros( + Spaces.undertype(axes(field)), + length(vertical_indices_ref_coordinates), + ), + ) + + nitems = length(zpts) + nthreads, nblocks = Spaces._configure_threadblock(nitems) + @cuda threads = (nthreads) blocks = (nblocks) interpolate_slab_level_kernel!( + output_cuarray, + field, + vertical_indices_ref_coordinates, + h, + Is, + ) + output_array .= fill_value + output_array[positive_zcoords_indices] .= Array(output_cuarray) +end + +function interpolate_slab_level_kernel!( + output_array, + field, + vidx_ref_coordinates, + h, + Is, +) + index = threadIdx().x + (blockIdx().x - 1) * blockDim().x + if index <= length(output_array) + v_lo, v_hi, ξ3 = vidx_ref_coordinates[index] + + f_lo = interpolate_slab(field, Fields.SlabIndex(v_lo, h), Is) + f_hi = interpolate_slab(field, Fields.SlabIndex(v_hi, h), Is) + output_array[index] = ((1 - ξ3) * f_lo + (1 + ξ3) * f_hi) / 2 + end + return nothing +end + + """ interpolate_array(field, xpts, ypts) interpolate_array(field, xpts, ypts, zpts) @@ -136,9 +251,7 @@ function interpolate_array( weights = interpolation_weights(horz_mesh, hcoord, quad_points) h = helem - for (iz, zcoord) in enumerate(zpts) - array[ix, iz] = interpolate_slab_level(field, h, weights, zcoord) - end + interpolate_slab_level!(view(array, ix, :), field, h, weights, zpts) end return array end @@ -159,6 +272,7 @@ function interpolate_array( array = zeros(T, length(xpts), length(ypts), length(zpts)) FT = Spaces.undertype(space) + for (iy, ycoord) in enumerate(ypts), (ix, xcoord) in enumerate(xpts) hcoord = Geometry.product_coordinates(xcoord, ycoord) helem = Meshes.containing_element(horz_mesh, hcoord) @@ -168,10 +282,7 @@ function interpolate_array( gidx = horz_topology.orderindex[helem] h = gidx - for (iz, zcoord) in enumerate(zpts) - array[ix, iy, iz] = - interpolate_slab_level(field, h, weights, zcoord) - end + interpolate_slab_level!(view(array, ix, iy, :), field, h, weights, zpts) end return array end @@ -233,7 +344,6 @@ function interpolate_column( physical_z = false, ) space = axes(field) - FT = Spaces.undertype(space) # When we don't have hypsography, there is no notion of "interpolating hypsography". In # this case, the reference vertical points coincide with the physical ones. Setting @@ -263,8 +373,9 @@ function interpolate_column( zpts_ref = zpts end - return [ - z.z >= 0 ? interpolate_slab_level(field, gidx, weights, z) : FT(NaN) for - z in zpts_ref - ] + output_array = zeros(Spaces.undertype(space), length(zpts)) + + interpolate_slab_level!(output_array, field, gidx, weights, zpts_ref) + + return output_array end diff --git a/test/Remapping/distributed_remapping.jl b/test/Remapping/distributed_remapping.jl index 52b82fa657..835ffc62e4 100644 --- a/test/Remapping/distributed_remapping.jl +++ b/test/Remapping/distributed_remapping.jl @@ -15,7 +15,7 @@ import ClimaCore: using ClimaComms const context = ClimaComms.context() const pid, nprocs = ClimaComms.init(context) -const device = ClimaComms.CPUSingleThreaded() +const device = ClimaComms.device() # log output only from root process logger_stream = ClimaComms.iamroot(context) ? stderr : devnull @@ -24,93 +24,98 @@ atexit() do global_logger(prev_logger) end -@testset "2D extruded" begin - vertdomain = Domains.IntervalDomain( - Geometry.ZPoint(0.0), - Geometry.ZPoint(1000.0); - boundary_tags = (:bottom, :top), - ) - - vertmesh = Meshes.IntervalMesh(vertdomain, nelems = 30) - verttopo = Topologies.IntervalTopology( - ClimaComms.SingletonCommsContext(device), - vertmesh, - ) - vert_center_space = Spaces.CenterFiniteDifferenceSpace(verttopo) - - horzdomain = Domains.IntervalDomain( - Geometry.XPoint(-500.0) .. Geometry.XPoint(500.0), - periodic = true, - ) - - quad = Spaces.Quadratures.GLL{4}() - horzmesh = Meshes.IntervalMesh(horzdomain, nelems = 10) - horztopology = Topologies.IntervalTopology( - ClimaComms.SingletonCommsContext(device), - horzmesh, - ) - horzspace = Spaces.SpectralElementSpace1D(horztopology, quad) - - hv_center_space = - Spaces.ExtrudedFiniteDifferenceSpace(horzspace, vert_center_space) - - coords = Fields.coordinate_field(hv_center_space) - - xpts = range(-500.0, 500.0, length = 21) - zpts = range(0.0, 1000.0, length = 21) - hcoords = [Geometry.XPoint(x) for x in xpts] - zcoords = [Geometry.ZPoint(z) for z in zpts] - - remapper = Remapping.Remapper(hcoords, zcoords, hv_center_space) - - interp_x = Remapping.interpolate(remapper, coords.x) - if ClimaComms.iamroot(context) - @test interp_x ≈ [x for x in xpts, z in zpts] - end - - interp_z = Remapping.interpolate(remapper, coords.z) - expected_z = [z for x in xpts, z in zpts] - if ClimaComms.iamroot(context) - @test interp_z[:, 2:(end - 1)] ≈ expected_z[:, 2:(end - 1)] - @test interp_z[:, 1] ≈ [1000.0 * (0 / 30 + 1 / 30) / 2 for x in xpts] - @test interp_z[:, end] ≈ - [1000.0 * (29 / 30 + 30 / 30) / 2 for x in xpts] - end - - # With hypsography - z_surface = cosd.(Fields.coordinate_field(horzspace).x) .+ 20 - hv_center_space = Spaces.ExtrudedFiniteDifferenceSpace( - horzspace, - vert_center_space, - Hypsography.LinearAdaption(z_surface), - ) - - coords = Fields.coordinate_field(hv_center_space) - - remapper = Remapping.Remapper(hcoords, zcoords, hv_center_space;) - - function ref_z(x, z) - z_surface = cosd(x) + 20 - z_top = vertdomain.coord_max.z - return (z - z_surface) / (1 - z_surface / z_top) - end - - val_or_NaN(x, z, val) = ref_z(x, z) >= 0 ? val : NaN - - interp_x = Remapping.interpolate(remapper, coords.x; physical_z = true) - if ClimaComms.iamroot(context) - @test interp_x ≈ [val_or_NaN(x, z, x) for x in xpts, z in zpts] nans = true - end - interp_z = Remapping.interpolate(remapper, coords.z; physical_z = true) - expected_z = [val_or_NaN(x, z, z) for x in xpts, z in zpts] - if ClimaComms.iamroot(context) - @test interp_z[:, 2:(end - 1)] ≈ expected_z[:, 2:(end - 1)] nans = true - - z_bottom = 1000.0 * (0 / 30 + 1 / 30) / 2 - @test interp_z[:, 1] ≈ [val_or_NaN(x, z_bottom, z_bottom) for x in xpts] nans = true - - z_top = 1000.0 * (29 / 30 + 30 / 30) / 2 - @test interp_z[:, end] ≈ [val_or_NaN(x, z_top, z_top) for x in xpts] rtol = 0.02 nans = true +if !(device isa ClimaComms.CUDADevice) + @testset "2D extruded" begin + vertdomain = Domains.IntervalDomain( + Geometry.ZPoint(0.0), + Geometry.ZPoint(1000.0); + boundary_tags = (:bottom, :top), + ) + + vertmesh = Meshes.IntervalMesh(vertdomain, nelems = 30) + verttopo = Topologies.IntervalTopology( + ClimaComms.SingletonCommsContext(device), + vertmesh, + ) + vert_center_space = Spaces.CenterFiniteDifferenceSpace(verttopo) + + horzdomain = Domains.IntervalDomain( + Geometry.XPoint(-500.0) .. Geometry.XPoint(500.0), + periodic = true, + ) + + quad = Spaces.Quadratures.GLL{4}() + horzmesh = Meshes.IntervalMesh(horzdomain, nelems = 10) + horztopology = Topologies.IntervalTopology( + ClimaComms.SingletonCommsContext(device), + horzmesh, + ) + horzspace = Spaces.SpectralElementSpace1D(horztopology, quad) + + hv_center_space = + Spaces.ExtrudedFiniteDifferenceSpace(horzspace, vert_center_space) + + coords = Fields.coordinate_field(hv_center_space) + + xpts = range(-500.0, 500.0, length = 21) + zpts = range(0.0, 1000.0, length = 21) + hcoords = [Geometry.XPoint(x) for x in xpts] + zcoords = [Geometry.ZPoint(z) for z in zpts] + + remapper = Remapping.Remapper(hcoords, zcoords, hv_center_space) + + interp_x = Remapping.interpolate(remapper, coords.x) + if ClimaComms.iamroot(context) + @test interp_x ≈ [x for x in xpts, z in zpts] + end + + interp_z = Remapping.interpolate(remapper, coords.z) + expected_z = [z for x in xpts, z in zpts] + if ClimaComms.iamroot(context) + @test interp_z[:, 2:(end - 1)] ≈ expected_z[:, 2:(end - 1)] + @test interp_z[:, 1] ≈ + [1000.0 * (0 / 30 + 1 / 30) / 2 for x in xpts] + @test interp_z[:, end] ≈ + [1000.0 * (29 / 30 + 30 / 30) / 2 for x in xpts] + end + + # With hypsography + z_surface = cosd.(Fields.coordinate_field(horzspace).x) .+ 20 + hv_center_space = Spaces.ExtrudedFiniteDifferenceSpace( + horzspace, + vert_center_space, + Hypsography.LinearAdaption(z_surface), + ) + + coords = Fields.coordinate_field(hv_center_space) + + remapper = Remapping.Remapper(hcoords, zcoords, hv_center_space;) + + function ref_z(x, z) + z_surface = cosd(x) + 20 + z_top = vertdomain.coord_max.z + return (z - z_surface) / (1 - z_surface / z_top) + end + + val_or_NaN(x, z, val) = ref_z(x, z) >= 0 ? val : NaN + + interp_x = Remapping.interpolate(remapper, coords.x; physical_z = true) + if ClimaComms.iamroot(context) + @test interp_x ≈ [val_or_NaN(x, z, x) for x in xpts, z in zpts] nans = true + end + interp_z = Remapping.interpolate(remapper, coords.z; physical_z = true) + expected_z = [val_or_NaN(x, z, z) for x in xpts, z in zpts] + if ClimaComms.iamroot(context) + @test interp_z[:, 2:(end - 1)] ≈ expected_z[:, 2:(end - 1)] nans = + true + + z_bottom = 1000.0 * (0 / 30 + 1 / 30) / 2 + @test interp_z[:, 1] ≈ + [val_or_NaN(x, z_bottom, z_bottom) for x in xpts] nans = true + + z_top = 1000.0 * (29 / 30 + 30 / 30) / 2 + @test interp_z[:, end] ≈ [val_or_NaN(x, z_top, z_top) for x in xpts] rtol = 0.02 nans = true + end end end