diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index cba1ca5ace..d196a2b812 100755 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -521,6 +521,18 @@ steps: slurm_mem: 20GB slurm_gpus: 1 + - label: "Unit: Integrals (CPU)" + key: "cpu_integrals" + command: + - "julia --color=yes --check-bounds=yes --project=test test/Operators/integrals.jl" + + - label: "Unit: Integrals (GPU)" + key: "gpu_integrals" + command: + - "julia --color=yes --check-bounds=yes --project=test test/Operators/integrals.jl" + agents: + slurm_gpus: 1 + - label: "Unit: Thomas Algorithm" key: "cpu_thomas_algorithm" command: diff --git a/docs/src/operators.md b/docs/src/operators.md index b9133df8b4..fc10835140 100644 --- a/docs/src/operators.md +++ b/docs/src/operators.md @@ -97,6 +97,14 @@ SetDivergence Extrapolate ``` +## Integrals + +```@docs +column_integral_definite! +column_integral_indefinite! +column_mapreduce! +``` + ## Internal APIs ```@docs diff --git a/src/Operators/integrals.jl b/src/Operators/integrals.jl index 9dddf35d35..401dfa0527 100644 --- a/src/Operators/integrals.jl +++ b/src/Operators/integrals.jl @@ -1,86 +1,310 @@ -##### -##### Column integrals (definite) -##### +import ..RecursiveApply: rzero, ⊠, ⊞ +import ..Utilities +import ..DataLayouts """ - column_integral_definite!(col∫field::Field, field::Field) + column_integral_definite!(∫field::Field, ᶜfield::Field) -The definite vertical column integral, `col∫field`, of field `field`. +Sets `∫field```{}= \\int_0^{z_{max}}\\,```ᶜfield```(z)\\,dz``, where ``z_{max}`` +is the value of `z` at the top of the domain. The input `ᶜfield` must lie on a +cell-center space, and the output `∫field` must lie on the corresponding +horizontal space. """ -column_integral_definite!(col∫field::Fields.Field, field::Fields.Field) = - column_integral_definite!(ClimaComms.device(axes(field)), col∫field, field) +column_integral_definite!(∫field::Fields.Field, ᶜfield::Fields.Field) = + column_integral_definite!(ClimaComms.device(axes(ᶜfield)), ∫field, ᶜfield) function column_integral_definite!( ::ClimaComms.CUDADevice, - col∫field::Fields.Field, - field::Fields.Field, + ∫field::Fields.Field, + ᶜfield::Fields.Field, ) - Ni, Nj, _, _, Nh = size(Fields.field_values(col∫field)) + space = axes(∫field) + Ni, Nj, _, _, Nh = size(Fields.field_values(∫field)) nthreads, nblocks = Spaces._configure_threadblock(Ni * Nj * Nh) @cuda threads = nthreads blocks = nblocks column_integral_definite_kernel!( - col∫field, - field, + strip_space(∫field, space), + strip_space(ᶜfield, space), ) end function column_integral_definite_kernel!( - col∫field::Fields.SpectralElementField, - field::Fields.ExtrudedFiniteDifferenceField, + ∫field, + ᶜfield::Fields.CenterExtrudedFiniteDifferenceField, ) idx = threadIdx().x + (blockIdx().x - 1) * blockDim().x - Ni, Nj, _, _, Nh = size(Fields.field_values(field)) + Ni, Nj, _, _, Nh = size(Fields.field_values(ᶜfield)) if idx <= Ni * Nj * Nh i, j, h = Spaces._get_idx((Ni, Nj, Nh), idx) - colfield = Spaces.column(field, i, j, h) - _column_integral_definite!(Spaces.column(col∫field, i, j, h), colfield) + ∫field_column = Spaces.column(∫field, i, j, h) + ᶜfield_column = Spaces.column(ᶜfield, i, j, h) + _column_integral_definite!(∫field_column, ᶜfield_column) end return nothing end -column_integral_definite_kernel!( - col∫field::Fields.PointField, - field::Fields.FiniteDifferenceField, -) = _column_integral_definite!(col∫field, field) +function column_integral_definite_kernel!( + ∫field, + ᶜfield::Fields.CenterFiniteDifferenceField, +) + _column_integral_definite!(∫field, ᶜfield) + return nothing +end -function column_integral_definite!( +column_integral_definite!( ::ClimaComms.AbstractCPUDevice, - col∫field::Fields.SpectralElementField, - field::Fields.ExtrudedFiniteDifferenceField, + ∫field::Fields.SpectralElementField, + ᶜfield::Fields.CenterExtrudedFiniteDifferenceField, +) = + Fields.bycolumn(axes(ᶜfield)) do colidx + _column_integral_definite!(∫field[colidx], ᶜfield[colidx]) + end + +column_integral_definite!( + ::ClimaComms.AbstractCPUDevice, + ∫field::Fields.PointField, + ᶜfield::Fields.CenterFiniteDifferenceField, +) = _column_integral_definite!(∫field, ᶜfield) + +function _column_integral_definite!(∫field, ᶜfield::Fields.ColumnField) + Δz = Fields.Δz_field(ᶜfield) + first_level = Operators.left_idx(axes(ᶜfield)) + last_level = Operators.right_idx(axes(ᶜfield)) + ∫field_data = Fields.field_values(∫field) + Base.setindex!(∫field_data, rzero(eltype(∫field))) + @inbounds for level in first_level:last_level + val = + ∫field_data[] ⊞ + Fields.level(ᶜfield, level)[] ⊠ Fields.level(Δz, level)[] + Base.setindex!(∫field_data, val) + end + return nothing +end + +""" + column_integral_indefinite!(ᶠ∫field::Field, ᶜfield::Field) + +Sets `ᶠ∫field```(z) = \\int_0^z\\,```ᶜfield```(z')\\,dz'``. The input `ᶜfield` +must lie on a cell-center space, and the output `ᶠ∫field` must lie on the +corresponding cell-face space. +""" +column_integral_indefinite!(ᶠ∫field::Fields.Field, ᶜfield::Fields.Field) = + column_integral_indefinite!(ClimaComms.device(ᶠ∫field), ᶠ∫field, ᶜfield) + +function column_integral_indefinite!( + ::ClimaComms.CUDADevice, + ᶠ∫field::Fields.Field, + ᶜfield::Fields.Field, +) + Ni, Nj, _, _, Nh = size(Fields.field_values(ᶠ∫field)) + nthreads, nblocks = Spaces._configure_threadblock(Ni * Nj * Nh) + @cuda threads = nthreads blocks = nblocks column_integral_indefinite_kernel!( + ᶠ∫field, + ᶜfield, + ) +end + +function column_integral_indefinite_kernel!( + ᶠ∫field::Fields.FaceExtrudedFiniteDifferenceField, + ᶜfield::Fields.CenterExtrudedFiniteDifferenceField, ) - Fields.bycolumn(axes(field)) do colidx - _column_integral_definite!(col∫field[colidx], field[colidx]) - nothing + idx = threadIdx().x + (blockIdx().x - 1) * blockDim().x + Ni, Nj, _, _, Nh = size(Fields.field_values(ᶜfield)) + if idx <= Ni * Nj * Nh + i, j, h = Spaces._get_idx((Ni, Nj, Nh), idx) + ᶠ∫field_column = Spaces.column(ᶠ∫field, i, j, h) + ᶜfield_column = Spaces.column(ᶜfield, i, j, h) + _column_integral_indefinite!(ᶠ∫field_column, ᶜfield_column) end return nothing end -column_integral_definite!( +column_integral_indefinite_kernel!( + ᶠ∫field::Fields.FaceFiniteDifferenceField, + ᶜfield::Fields.CenterFiniteDifferenceField, +) = _column_integral_indefinite!(ᶠ∫field, ᶜfield) + +column_integral_indefinite!( + ::ClimaComms.AbstractCPUDevice, + ᶠ∫field::Fields.FaceExtrudedFiniteDifferenceField, + ᶜfield::Fields.CenterExtrudedFiniteDifferenceField, +) = + Fields.bycolumn(axes(ᶜfield)) do colidx + _column_integral_indefinite!(ᶠ∫field[colidx], ᶜfield[colidx]) + end + +column_integral_indefinite!( ::ClimaComms.AbstractCPUDevice, - col∫field::Fields.PointField, - field::Fields.FiniteDifferenceField, -) = _column_integral_definite!(col∫field, field) + ᶠ∫field::Fields.FaceFiniteDifferenceField, + ᶜfield::Fields.CenterFiniteDifferenceField, +) = _column_integral_indefinite!(ᶠ∫field, ᶜfield) -function _column_integral_definite!( - col∫field::Fields.PointField, - field::Fields.ColumnField, +function _column_integral_indefinite!( + ᶠ∫field::Fields.ColumnField, + ᶜfield::Fields.ColumnField, ) - space = axes(field) - Δz_field = Fields.Δz_field(space) - Nv = Spaces.nlevels(space) - - col∫field[] = 0 - @inbounds for idx in 1:Nv - col∫field[] += - reduction_getindex(field, idx) * reduction_getindex(Δz_field, idx) + face_space = axes(ᶠ∫field) + ᶜΔz = Fields.Δz_field(ᶜfield) + first_level = Operators.left_idx(face_space) + last_level = Operators.right_idx(face_space) + @inbounds Fields.level(ᶠ∫field, first_level)[] = rzero(eltype(ᶜfield)) + @inbounds for level in (first_level + 1):last_level + Fields.level(ᶠ∫field, level)[] = + Fields.level(ᶠ∫field, level - 1)[] ⊞ + Fields.level(ᶜfield, level - half)[] ⊠ + Fields.level(ᶜΔz, level - half)[] end - return nothing end -reduction_getindex(column_field, index) = @inbounds getidx( - axes(column_field), - column_field, - Interior(), - index - 1 + left_idx(axes(column_field)), +""" + column_mapreduce!(fn, op, reduced_field::Field, fields::Field...) + +Applies mapreduce along the vertical direction. The input `fields` must all lie +on the same space, and the output `reduced_field` must lie on the corresponding +horizontal space. The function `fn` is mapped over every input, and the function +`op` is used to reduce the outputs of `fn`. +""" +column_mapreduce!( + fn::F, + op::O, + reduced_field::Fields.Field, + fields::Fields.Field..., +) where {F, O} = column_mapreduce_device!( + ClimaComms.device(reduced_field), + fn, + op, + reduced_field, + fields..., ) -# TODO: add support for indefinite integrals +function column_mapreduce_device!( + ::ClimaComms.CUDADevice, + fn::F, + op::O, + reduced_field::Fields.Field, + fields::Fields.Field..., +) where {F, O} + Ni, Nj, _, _, Nh = size(Fields.field_values(reduced_field)) + nthreads, nblocks = Spaces._configure_threadblock(Ni * Nj * Nh) + kernel! = if first(fields) isa Fields.ExtrudedFiniteDifferenceField + column_mapreduce_kernel_extruded! + else + column_mapreduce_kernel! + end + @cuda threads = nthreads blocks = nblocks kernel!( + fn, + op, + # reduced_field, + strip_space(reduced_field, axes(reduced_field)), + # fields..., + map(field -> strip_space(field, axes(field)), fields)..., + ) +end + +function column_mapreduce_kernel_extruded!( + fn::F, + op::O, + reduced_field, + fields..., +) where {F, O} + idx = threadIdx().x + (blockIdx().x - 1) * blockDim().x + Ni, Nj, _, _, Nh = size(Fields.field_values(reduced_field)) + if idx <= Ni * Nj * Nh + i, j, h = Spaces._get_idx((Ni, Nj, Nh), idx) + reduced_field_column = Spaces.column(reduced_field, i, j, h) + field_columns = map(field -> Spaces.column(field, i, j, h), fields) + _column_mapreduce!(fn, op, reduced_field_column, field_columns...) + end + return nothing +end + +column_mapreduce_kernel!(fn::F, op::O, reduced_field, fields...) where {F, O} = + _column_mapreduce!(fn, op, reduced_field, fields...) + +column_mapreduce_device!( + ::ClimaComms.AbstractCPUDevice, + fn::F, + op::O, + reduced_field::Fields.SpectralElementField, + fields::Fields.ExtrudedFiniteDifferenceField..., +) where {F, O} = + Fields.bycolumn(axes(reduced_field)) do colidx + column_fields = map(field -> field[colidx], fields) + _column_mapreduce!(fn, op, reduced_field[colidx], column_fields...) + end + +column_mapreduce_device!( + ::ClimaComms.AbstractCPUDevice, + fn::F, + op::O, + reduced_field::Fields.PointField, + fields::Fields.FiniteDifferenceField..., +) where {F, O} = _column_mapreduce!(fn, op, reduced_field, fields...) + +function _column_mapreduce!(fn::F, op::O, reduced_field, fields...) where {F, O} + space = axes(first(fields)) + for field in Base.tail(fields) # Base.rest breaks on the gpu + axes(field) === space || + error("All inputs to column_mapreduce must lie on the same space") + end + (_, _, _, Nv, _) = size(Fields.field_values(first(fields))) + first_level = left_boundary_idx(Nv, space) + last_level = right_boundary_idx(Nv, space) + # TODO: This code is allocating memory. In particular, even if we comment + # out the rest of this function, the first line alone allocates memory. + # This problem is not fixed by replacing map with ntuple or unrolled_map. + fields_data = map(field -> Fields.field_values(field), fields) + first_level_values = map( + field_data -> + (@inbounds data_level(field_data, space, first_level)[]), + fields_data, + ) + reduced_field_data = Fields.field_values(reduced_field) + Base.setindex!(reduced_field_data, fn(first_level_values...)) + for level in (first_level + 1):last_level + values = map( + field_data -> + (@inbounds data_level(field_data, space, level)[]), + fields_data, + ) + Base.setindex!( + reduced_field_data, + op(reduced_field_data[], fn(values...)), + ) + end + return nothing +end + +import ..Utilities +Base.@propagate_inbounds data_level( + data, + ::Operators.CenterPlaceholderSpace, + v::Int, +) = DataLayouts.level(data, v) +Base.@propagate_inbounds data_level( + data, + ::Spaces.FiniteDifferenceSpace{Spaces.CellCenter}, + v::Int, +) = DataLayouts.level(data, v) + +Base.@propagate_inbounds data_level( + data, + ::Operators.FacePlaceholderSpace, + v::Utilities.PlusHalf, +) = DataLayouts.level(data, v.i + 1) +Base.@propagate_inbounds data_level( + data, + ::Spaces.FiniteDifferenceSpace{Spaces.CellFace}, + v::Utilities.PlusHalf, +) = DataLayouts.level(data, v.i + 1) + +left_boundary_idx(n, ::Operators.CenterPlaceholderSpace) = 1 +right_boundary_idx(n, ::Operators.CenterPlaceholderSpace) = n +left_boundary_idx(n, ::Operators.FacePlaceholderSpace) = Utilities.half +right_boundary_idx(n, ::Operators.FacePlaceholderSpace) = n - Utilities.half + +left_boundary_idx(n, ::Spaces.FiniteDifferenceSpace{Spaces.CellCenter}) = 1 +right_boundary_idx(n, ::Spaces.FiniteDifferenceSpace{Spaces.CellCenter}) = n +left_boundary_idx(n, ::Spaces.FiniteDifferenceSpace{Spaces.CellFace}) = + Utilities.half +right_boundary_idx(n, ::Spaces.FiniteDifferenceSpace{Spaces.CellFace}) = + n - Utilities.half diff --git a/test/Operators/integrals.jl b/test/Operators/integrals.jl new file mode 100644 index 0000000000..8eefe9793d --- /dev/null +++ b/test/Operators/integrals.jl @@ -0,0 +1,175 @@ +using Test +using JET +import CUDA +import ClimaComms +import ClimaCore +import ClimaCore: Spaces, Fields, Operators +import ClimaCore.RecursiveApply: rmax +import ClimaCore.Operators: + column_integral_definite!, column_integral_indefinite!, column_mapreduce! + +include( + joinpath(pkgdir(ClimaCore), "test", "TestUtilities", "TestUtilities.jl"), +) +import .TestUtilities as TU + +center_to_face_space(center_space::Spaces.CenterFiniteDifferenceSpace) = + Spaces.FaceFiniteDifferenceSpace(center_space) +center_to_face_space(center_space::Spaces.CenterExtrudedFiniteDifferenceSpace) = + Spaces.FaceExtrudedFiniteDifferenceSpace(center_space) + +function test_column_definite_integral!(center_space, alloc_lim) + face_space = center_to_face_space(center_space) + ᶜz = Fields.coordinate_field(center_space).z + ᶠz = Fields.coordinate_field(face_space).z + z_top = Fields.level(ᶠz, Operators.right_idx(face_space)) + ᶜu = map(z -> (; one = one(z), powers = (z, z^2, z^3)), ᶜz) + ∫u_ref = map(z -> (; one = z, powers = (z^2 / 2, z^3 / 3, z^4 / 4)), z_top) + ∫u_test = similar(∫u_ref) + + column_integral_definite!(∫u_test, ᶜu) + ref_array = parent(∫u_ref) + test_array = parent(∫u_test) + max_relative_error = maximum(@. abs((ref_array - test_array) / ref_array)) + @test max_relative_error <= 0.006 # Less than 0.6% error. + + cuda = (AnyFrameModule(CUDA),) + @test_opt ignored_modules = cuda column_integral_definite!(∫u_test, ᶜu) + + @test (@allocated column_integral_definite!(∫u_test, ᶜu)) ≤ alloc_lim + alloc_lim == 0 || + @test_broken (@allocated column_integral_definite!(∫u_test, ᶜu)) < + alloc_lim +end + +function test_column_integral_indefinite!(center_space, alloc_lim) + face_space = center_to_face_space(center_space) + ᶜz = Fields.coordinate_field(center_space).z + ᶠz = Fields.coordinate_field(face_space).z + ᶜu = map(z -> (; one = one(z), powers = (z, z^2, z^3)), ᶜz) + ᶠ∫u_ref = map(z -> (; one = z, powers = (z^2 / 2, z^3 / 3, z^4 / 4)), ᶠz) + ᶠ∫u_test = similar(ᶠ∫u_ref) + + column_integral_indefinite!(ᶠ∫u_test, ᶜu) + ref_array = parent(Fields.level(ᶠ∫u_ref, Operators.right_idx(face_space))) + test_array = parent(Fields.level(ᶠ∫u_test, Operators.right_idx(face_space))) + max_relative_error = maximum(@. abs((ref_array - test_array) / ref_array)) + @test max_relative_error <= 0.006 # Less than 0.6% error at the top level. + + cuda = (AnyFrameModule(CUDA),) + @test_opt ignored_modules = cuda column_integral_indefinite!(ᶠ∫u_test, ᶜu) + + @test (@allocated column_integral_indefinite!(ᶠ∫u_test, ᶜu)) ≤ alloc_lim + alloc_lim == 0 || + @test_broken (@allocated column_integral_indefinite!(ᶠ∫u_test, ᶜu)) < + alloc_lim +end + +function test_column_mapreduce!(space, alloc_lim; broken = false) + z_field = Fields.coordinate_field(space).z + z_top_field = Fields.level(z_field, Operators.right_idx(space)) + sin_field = @. sin(pi * z_field / z_top_field) + square_and_sin(z, sin_value) = (; square = z^2, sin = sin_value) + reduced_field_ref = map(z -> (; square = z^2, sin = one(z)), z_top_field) + reduced_field_test = similar(reduced_field_ref) + args = (square_and_sin, rmax, reduced_field_test, z_field, sin_field) + + broken || column_mapreduce!(args...) + ref_array = parent(reduced_field_ref) + test_array = parent(reduced_field_test) + max_relative_error = maximum(@. abs((ref_array - test_array) / ref_array)) + broken || @test max_relative_error <= 0.004 # Less than 0.4% error. + + cuda = (AnyFrameModule(CUDA),) + broken || + ClimaComms.device() isa ClimaComms.CUDADevice || + @test_opt ignored_modules = cuda column_mapreduce!(args...) + + # TODO: column_mapreduce! currently allocates memory + broken || + ClimaComms.device() isa ClimaComms.CUDADevice || + @test (@allocated column_mapreduce!(args...)) ≤ alloc_lim + broken || + ClimaComms.device() isa ClimaComms.CUDADevice || + @test_broken (@allocated column_mapreduce!(args...)) < alloc_lim +end + +@testset "Integral operations unit tests" begin + # device = ClimaComms.CPUSingleThreaded(); + device = ClimaComms.device() + context = ClimaComms.SingletonCommsContext(device) + broken = device isa ClimaComms.CUDADevice + if device isa ClimaComms.CPUSingleThreaded + i_lim = Dict() + i_lim[(1, Float32)] = 0 + i_lim[(2, Float32)] = 0 + i_lim[(3, Float32)] = 0 + i_lim[(4, Float32)] = 0 + + i_lim[(1, Float64)] = 0 + i_lim[(2, Float64)] = 0 + i_lim[(3, Float64)] = 0 + i_lim[(4, Float64)] = 0 + lim = i_lim + else + i_lim = Dict() + i_lim[(1, Float32)] = 1648 + i_lim[(2, Float32)] = 4640 + i_lim[(3, Float32)] = 2224 + i_lim[(4, Float32)] = 8096 + + i_lim[(1, Float64)] = 1680 + i_lim[(2, Float64)] = 4640 + i_lim[(3, Float64)] = 2288 + i_lim[(4, Float64)] = 8096 + + lim = Dict() + lim[(1, Float32)] = 1808 + lim[(2, Float32)] = 1920 + lim[(3, Float32)] = 4399104 + lim[(4, Float32)] = 4571136 + + lim[(1, Float64)] = 2512 + lim[(2, Float64)] = 2688 + lim[(3, Float64)] = 5455872 + lim[(4, Float64)] = 5726208 + end + + for FT in (Float32, Float64) + test_column_definite_integral!( + TU.ColumnCenterFiniteDifferenceSpace(FT; context), + i_lim[(1, FT)], + ) + test_column_definite_integral!( + TU.CenterExtrudedFiniteDifferenceSpace(FT; context), + i_lim[(2, FT)], + ) + test_column_integral_indefinite!( + TU.ColumnCenterFiniteDifferenceSpace(FT; context), + i_lim[(3, FT)], + ) + test_column_integral_indefinite!( + TU.CenterExtrudedFiniteDifferenceSpace(FT; context), + i_lim[(4, FT)], + ) + + test_column_mapreduce!( + TU.ColumnCenterFiniteDifferenceSpace(FT; context), + lim[(1, FT)], + ) + test_column_mapreduce!( + TU.ColumnFaceFiniteDifferenceSpace(FT; context), + lim[(2, FT)], + ) + test_column_mapreduce!( + TU.CenterExtrudedFiniteDifferenceSpace(FT; context), + lim[(3, FT)]; + broken = broken, + ) + test_column_mapreduce!( + TU.FaceExtrudedFiniteDifferenceSpace(FT; context), + lim[(4, FT)]; + broken = broken, + ) + end +end