From 4c22fcff13d5994d5df5fb492bd72877c2c41ca8 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Tue, 4 Jun 2019 18:06:33 -0400 Subject: [PATCH 001/100] Sandbox implementation of fill_halo_regions_tiled for MPI Former-commit-id: f9286e6f9e1c1b1f3fb19bb528cbb66728202276 --- sandbox/tiled_halos.jl | 65 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 sandbox/tiled_halos.jl diff --git a/sandbox/tiled_halos.jl b/sandbox/tiled_halos.jl new file mode 100644 index 0000000000..01baeb469e --- /dev/null +++ b/sandbox/tiled_halos.jl @@ -0,0 +1,65 @@ +using Oceananigans + +@inline incmod1(a, n) = ifelse(a==n, 1, a + 1) +@inline decmod1(a, n) = ifelse(a==1, n, a - 1) +@inline index2rank(I, J, Mx, My) = J*My + I + +@inline east_halo(tile) = @views @inbounds tile.data[tile.grid.Nx+1:tile.grid.Nx+tile.grid.Hx, :, :] +@inline west_halo(tile) = @views @inbounds tile.data[1-tile.grid.Hx:0, :, :] +@inline north_halo(tile) = @views @inbounds tile.data[:, 1-tile.grid.Hy:0, :] +@inline south_halo(tile) = @views @inbounds tile.data[:, tile.grid.Ny+1:tile.grid.Ny+tile.grid.Hy, :] + +@inline east_data(tile) = @views @inbounds tile.data[1:tile.grid.Hx, :, :] +@inline west_data(tile) = @views @inbounds tile.data[tile.grid.Nx-tile.grid.Hx+1:tile.grid.Nx, :, :] +@inline north_data(tile) = @views @inbounds tile.data[:, 1:tile.grid.Hy, :] +@inline south_data(tile) = @views @inbounds tile.data[:, tile.grid.Ny-tile.grid.Hy+1:tile.grid.Ny, :] + +function fill_halo_regions_tiled!(tiles, Mx, My) + for J in 0:My-1, I in 0:Mx-1 + rank = index2rank(I, J, Mx, My) + + I⁻, I⁺ = mod(I-1, Mx), mod(I+1, Mx) + J⁻, J⁺ = mod(J-1, My), mod(J+1, My) + + north_rank = index2rank(I, J⁻, Mx, My) + south_rank = index2rank(I, J⁺, Mx, My) + east_rank = index2rank(I⁺, J, Mx, My) + west_rank = index2rank(I⁻, J, Mx, My) + + east_halo(tiles[rank+1]) .= west_data(tiles[east_rank+1]) + west_halo(tiles[rank+1]) .= east_data(tiles[west_rank+1]) + north_halo(tiles[rank+1]) .= south_data(tiles[north_rank+1]) + south_halo(tiles[rank+1]) .= north_data(tiles[south_rank+1]) + end +end + +FT, arch = Float64, CPU() + +Nx, Ny, Nz = 16, 16, 16 +Lx, Ly, Lz = 10, 10, 10 +N, L = (Nx, Ny, Nz), (Lx, Ly, Lz) + +grid = RegularCartesianGrid(N, L) + +# MPI ranks along each dimension +Mx, My = 2, 2 + +R = rand(Nx, Ny, Nz) + +tiles = [] +for I in 0:Mx-1, J in 0:My-1 + Nx′, Ny′, Nz′ = Int(Nx/Mx), Int(Ny/My), Nz + Lx′, Ly′, Lz′ = Lx/Mx, Ly/My, Lz + tile_grid = RegularCartesianGrid((Nx′, Ny′, Nz′), (Lx′, Ly′, Lz′)) + + tile = CellField(FT, arch, tile_grid) + + i1, i2 = I*Nx′+1, (I+1)*Nx′ + j1, j2 = J*Ny′+1, (J+1)*Ny′ + data(tile) .= R[i1:i2, j1:j2, :] + + push!(tiles, tile) +end + +fill_halo_regions_tiled!(tiles, Mx, My) +fill_halo_regions_tiled!(tiles, Mx, My) From f6453eb55f65d7e6fbea225635698ccf1ca251df Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Wed, 5 Jun 2019 10:35:38 -0400 Subject: [PATCH 002/100] Fix north/south/east/west indexing. Former-commit-id: c78d5fa5758a34d35c7554f0a31c96b24e077e49 --- sandbox/tiled_halos.jl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sandbox/tiled_halos.jl b/sandbox/tiled_halos.jl index 01baeb469e..2d6d547a95 100644 --- a/sandbox/tiled_halos.jl +++ b/sandbox/tiled_halos.jl @@ -4,15 +4,15 @@ using Oceananigans @inline decmod1(a, n) = ifelse(a==1, n, a - 1) @inline index2rank(I, J, Mx, My) = J*My + I -@inline east_halo(tile) = @views @inbounds tile.data[tile.grid.Nx+1:tile.grid.Nx+tile.grid.Hx, :, :] -@inline west_halo(tile) = @views @inbounds tile.data[1-tile.grid.Hx:0, :, :] -@inline north_halo(tile) = @views @inbounds tile.data[:, 1-tile.grid.Hy:0, :] -@inline south_halo(tile) = @views @inbounds tile.data[:, tile.grid.Ny+1:tile.grid.Ny+tile.grid.Hy, :] - -@inline east_data(tile) = @views @inbounds tile.data[1:tile.grid.Hx, :, :] -@inline west_data(tile) = @views @inbounds tile.data[tile.grid.Nx-tile.grid.Hx+1:tile.grid.Nx, :, :] -@inline north_data(tile) = @views @inbounds tile.data[:, 1:tile.grid.Hy, :] -@inline south_data(tile) = @views @inbounds tile.data[:, tile.grid.Ny-tile.grid.Hy+1:tile.grid.Ny, :] +@inline north_halo(tile) = @views @inbounds tile.data[1-tile.grid.Hx:0, :, :] +@inline south_halo(tile) = @views @inbounds tile.data[tile.grid.Nx+1:tile.grid.Nx+tile.grid.Hx, :, :] +@inline west_halo(tile) = @views @inbounds tile.data[:, 1-tile.grid.Hy:0, :] +@inline east_halo(tile) = @views @inbounds tile.data[:, tile.grid.Ny+1:tile.grid.Ny+tile.grid.Hy, :] + +@inline north_data(tile) = @views @inbounds tile.data[1:tile.grid.Hx, :, :] +@inline south_data(tile) = @views @inbounds tile.data[tile.grid.Nx-tile.grid.Hx+1:tile.grid.Nx, :, :] +@inline west_data(tile) = @views @inbounds tile.data[:, 1:tile.grid.Hy, :] +@inline east_data(tile) = @views @inbounds tile.data[:, tile.grid.Ny-tile.grid.Hy+1:tile.grid.Ny, :] function fill_halo_regions_tiled!(tiles, Mx, My) for J in 0:My-1, I in 0:Mx-1 From b8a1f705451d9d032c580cd73715930e115f16fe Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Wed, 5 Jun 2019 11:14:30 -0400 Subject: [PATCH 003/100] Tests for correct halo comm. Former-commit-id: b544d24bd14cb5b2d5ff797592df33b5cc9a695d --- sandbox/tiled_halos.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sandbox/tiled_halos.jl b/sandbox/tiled_halos.jl index 2d6d547a95..e2f90babb4 100644 --- a/sandbox/tiled_halos.jl +++ b/sandbox/tiled_halos.jl @@ -1,4 +1,4 @@ -using Oceananigans +using Oceananigans, Test @inline incmod1(a, n) = ifelse(a==n, 1, a + 1) @inline decmod1(a, n) = ifelse(a==1, n, a - 1) @@ -63,3 +63,8 @@ end fill_halo_regions_tiled!(tiles, Mx, My) fill_halo_regions_tiled!(tiles, Mx, My) + +@test all(tiles[1].data[1:end, 1:end, :] .== R[1:9, 1:9, :]) +@test all(tiles[2].data[1:end, 0:end-1, :] .== R[1:9, 8:end, :]) +@test all(tiles[3].data[0:end-1, 1:end, :] .== R[8:end, 1:9, :]) +@test all(tiles[4].data[0:end-1, 0:end-1, :] .== R[8:end, 8:end, :]) From e8632d8532453f329c42efd9bbe00ba5bac48ab5 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Wed, 5 Jun 2019 17:52:18 -0400 Subject: [PATCH 004/100] Starting an MPI version of filling halo regions. Former-commit-id: 50599f8afd695a837b8749c017105ef64b171821 --- sandbox/tiled_halos_mpi.jl | 62 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 sandbox/tiled_halos_mpi.jl diff --git a/sandbox/tiled_halos_mpi.jl b/sandbox/tiled_halos_mpi.jl new file mode 100644 index 0000000000..6f241c08fc --- /dev/null +++ b/sandbox/tiled_halos_mpi.jl @@ -0,0 +1,62 @@ +import MPI + +using Oceananigans + +@inline index2rank(I, J, Mx, My) = J*My + I + +function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) + comm = MPI.COMM_WORLD + + MPI.Barrier(comm) + + rank = MPI.Comm_rank(comm) + size = MPI.Comm_size(comm) + + I⁻, I⁺ = mod(I-1, Mx), mod(I+1, Mx) + J⁻, J⁺ = mod(J-1, My), mod(J+1, My) + + north_rank = index2rank(I, J⁻, Mx, My) + south_rank = index2rank(I, J⁺, Mx, My) + east_rank = index2rank(I⁺, J, Mx, My) + west_rank = index2rank(I⁻, J, Mx, My) + + send_reqs = [] + if rank == 0 + rands = rand(Nx, Ny, Nz) + + for r in 0:Mx*My + i1, i2 = I*Nx′+1, (I+1)*Nx′ + j1, j2 = J*Ny′+1, (J+1)*Ny′ + send_mesg = R[i1:i2, j1:j2, :] + + tag = 100 + r + println("[rank $rank] Sending R[$i1:$i2, $j1:$j2, :] to rank $r with tag $tag...") + + sreq = MPI.Isend(send_mesg, r, tag, comm) + push!(send_reqs, sreq) + end + + MPI.Waitall!(send_reqs) + end + + Nx′, Ny′, Nz′ = Int(Nx/Mx), Int(Ny/My), Nz + Lx′, Ly′, Lz′ = Lx/Mx, Ly/My, Lz + tile_grid = RegularCartesianGrid((Nx′, Ny′, Nz′), (Lx′, Ly′, Lz′)) + tile = CellField(FT, arch, tile_grid) + + recv_mesg = zeros(FT, Nx′, Ny′, Nz′) + + tag = 100 + r + println("[rank $rank] Receiving message from rank $src with tag $tag...") + rreq = MPI.Irecv!(recv_mesg, 0, tag, comm) + + data(tile) .= recv_mesg + + stats = MPI.Waitall!([rreq]) + + MPI.Barrier(comm) +end + +MPI.Init() +fill_halo_regions_mpi!(Float64, CPU(), 16, 16, 16, 2, 2) +MPI.Finalize() From ff0f875a943407022a3c5b3480d0db4e31ca88cb Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Wed, 5 Jun 2019 18:30:00 -0400 Subject: [PATCH 005/100] Fix typos and stuf Former-commit-id: 7da2fbaf7f2e4b0a6fae4c890e1a3b91a3c97718 --- sandbox/tiled_halos_mpi.jl | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/sandbox/tiled_halos_mpi.jl b/sandbox/tiled_halos_mpi.jl index 6f241c08fc..e82ad44a27 100644 --- a/sandbox/tiled_halos_mpi.jl +++ b/sandbox/tiled_halos_mpi.jl @@ -3,8 +3,14 @@ import MPI using Oceananigans @inline index2rank(I, J, Mx, My) = J*My + I +@inline rank2index(r, Mx, My) = div(r, Mx), mod(r, My) function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) + Lx, Ly, Lz = 10, 10, 10 + + Nx′, Ny′, Nz′ = Int(Nx/Mx), Int(Ny/My), Nz + Lx′, Ly′, Lz′ = Lx/Mx, Ly/My, Lz + comm = MPI.COMM_WORLD MPI.Barrier(comm) @@ -12,22 +18,26 @@ function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) rank = MPI.Comm_rank(comm) size = MPI.Comm_size(comm) + I, J = rank2index(rank, Mx, My) I⁻, I⁺ = mod(I-1, Mx), mod(I+1, Mx) J⁻, J⁺ = mod(J-1, My), mod(J+1, My) + Nx′, Ny′, Nz′ = Int(Nx/Mx), Int(Ny/My), Nz + Lx′, Ly′, Lz′ = Lx/Mx, Ly/My, Lz north_rank = index2rank(I, J⁻, Mx, My) south_rank = index2rank(I, J⁺, Mx, My) east_rank = index2rank(I⁺, J, Mx, My) west_rank = index2rank(I⁻, J, Mx, My) - send_reqs = [] + send_reqs = MPI.Request[] if rank == 0 rands = rand(Nx, Ny, Nz) - for r in 0:Mx*My - i1, i2 = I*Nx′+1, (I+1)*Nx′ - j1, j2 = J*Ny′+1, (J+1)*Ny′ - send_mesg = R[i1:i2, j1:j2, :] + for r in 0:Mx*My-1 + I′, J′ = rank2index(r, Mx, My) + i1, i2 = I′*Nx′+1, (I′+1)*Nx′ + j1, j2 = J′*Ny′+1, (J′+1)*Ny′ + send_mesg = rands[i1:i2, j1:j2, :] tag = 100 + r println("[rank $rank] Sending R[$i1:$i2, $j1:$j2, :] to rank $r with tag $tag...") @@ -39,15 +49,13 @@ function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) MPI.Waitall!(send_reqs) end - Nx′, Ny′, Nz′ = Int(Nx/Mx), Int(Ny/My), Nz - Lx′, Ly′, Lz′ = Lx/Mx, Ly/My, Lz tile_grid = RegularCartesianGrid((Nx′, Ny′, Nz′), (Lx′, Ly′, Lz′)) tile = CellField(FT, arch, tile_grid) recv_mesg = zeros(FT, Nx′, Ny′, Nz′) - tag = 100 + r - println("[rank $rank] Receiving message from rank $src with tag $tag...") + tag = 100 + rank + println("[rank $rank] Receiving message from rank 0 with tag $tag...") rreq = MPI.Irecv!(recv_mesg, 0, tag, comm) data(tile) .= recv_mesg From 0defdc9653881990ead849f7fa693d26cbb9e0d0 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Wed, 5 Jun 2019 18:51:57 -0400 Subject: [PATCH 006/100] Sending and reciving halo data. Former-commit-id: 541c389f3d5fd88ba0f2752c71e0ae5ef9cb1636 --- sandbox/tiled_halos_mpi.jl | 45 +++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/sandbox/tiled_halos_mpi.jl b/sandbox/tiled_halos_mpi.jl index e82ad44a27..23f83d48d0 100644 --- a/sandbox/tiled_halos_mpi.jl +++ b/sandbox/tiled_halos_mpi.jl @@ -5,12 +5,28 @@ using Oceananigans @inline index2rank(I, J, Mx, My) = J*My + I @inline rank2index(r, Mx, My) = div(r, Mx), mod(r, My) +@inline north_halo(tile) = @views @inbounds tile.data[1-tile.grid.Hx:0, :, :] +@inline south_halo(tile) = @views @inbounds tile.data[tile.grid.Nx+1:tile.grid.Nx+tile.grid.Hx, :, :] +@inline west_halo(tile) = @views @inbounds tile.data[:, 1-tile.grid.Hy:0, :] +@inline east_halo(tile) = @views @inbounds tile.data[:, tile.grid.Ny+1:tile.grid.Ny+tile.grid.Hy, :] + +@inline north_data(tile) = @views @inbounds tile.data[1:tile.grid.Hx, :, :] +@inline south_data(tile) = @views @inbounds tile.data[tile.grid.Nx-tile.grid.Hx+1:tile.grid.Nx, :, :] +@inline west_data(tile) = @views @inbounds tile.data[:, 1:tile.grid.Hy, :] +@inline east_data(tile) = @views @inbounds tile.data[:, tile.grid.Ny-tile.grid.Hy+1:tile.grid.Ny, :] + +@inline distribute_tag(rank) = 100 + rank +@inline send_west_tag(rank) = 200 + rank +@inline send_east_tag(rank) = 300 + rank +@inline send_north_tag(rank) = 400 + rank +@inline send_south_tag(rank) = 500 + rank + function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) Lx, Ly, Lz = 10, 10, 10 Nx′, Ny′, Nz′ = Int(Nx/Mx), Int(Ny/My), Nz Lx′, Ly′, Lz′ = Lx/Mx, Ly/My, Lz - + comm = MPI.COMM_WORLD MPI.Barrier(comm) @@ -39,10 +55,8 @@ function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) j1, j2 = J′*Ny′+1, (J′+1)*Ny′ send_mesg = rands[i1:i2, j1:j2, :] - tag = 100 + r - println("[rank $rank] Sending R[$i1:$i2, $j1:$j2, :] to rank $r with tag $tag...") - - sreq = MPI.Isend(send_mesg, r, tag, comm) + println("[rank $rank] Sending R[$i1:$i2, $j1:$j2, :] to rank $r...") + sreq = MPI.Isend(send_mesg, r, distribute_tag(r), comm) push!(send_reqs, sreq) end @@ -52,15 +66,26 @@ function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) tile_grid = RegularCartesianGrid((Nx′, Ny′, Nz′), (Lx′, Ly′, Lz′)) tile = CellField(FT, arch, tile_grid) + println("[rank $rank] Receiving message from rank 0...") recv_mesg = zeros(FT, Nx′, Ny′, Nz′) + rreq = MPI.Irecv!(recv_mesg, 0, distribute_tag(rank), comm) - tag = 100 + rank - println("[rank $rank] Receiving message from rank 0 with tag $tag...") - rreq = MPI.Irecv!(recv_mesg, 0, tag, comm) - + stats = MPI.Waitall!([rreq]) data(tile) .= recv_mesg - stats = MPI.Waitall!([rreq]) + println("[rank $rank] Sending halo data...") + se_req = MPI.Isend(west_data(tile), east_rank, send_east_tag(rank), comm) + sw_req = MPI.Isend(east_data(tile), west_rank, send_west_tag(rank), comm) + sn_req = MPI.Isend(south_data(tile), north_rank, send_north_tag(rank), comm) + ss_req = MPI.Isend(north_data(tile), south_rank, send_south_tag(rank), comm) + + MPI.Barrier(comm) + + println("[rank $rank] Receiving halo data...") + re_req = MPI.Irecv!(west_halo(tile), east_rank, send_west_tag(east_rank), comm) + rw_req = MPI.Irecv!(east_halo(tile), west_rank, send_east_tag(west_rank), comm) + rn_req = MPI.Irecv!(south_halo(tile), north_rank, send_south_tag(north_rank), comm) + rs_req = MPI.Irecv!(north_halo(tile), south_rank, send_north_tag(south_rank), comm) MPI.Barrier(comm) end From a75768b7c2bb8a1958713adb224751b99d9bcf17 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Wed, 5 Jun 2019 18:57:35 -0400 Subject: [PATCH 007/100] Receiving into contiguous buffers. Former-commit-id: 8eab25e94d46d0bc1dd3409c69a29defc6af8ea8 --- sandbox/tiled_halos_mpi.jl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/sandbox/tiled_halos_mpi.jl b/sandbox/tiled_halos_mpi.jl index 23f83d48d0..860baba977 100644 --- a/sandbox/tiled_halos_mpi.jl +++ b/sandbox/tiled_halos_mpi.jl @@ -81,11 +81,16 @@ function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) MPI.Barrier(comm) + west_buf = zeros(size(west_halo(tile))) + east_buf = zeros(size(east_halo(tile))) + north_buf = zeros(size(north_halo(tile))) + south_buf = zeros(size(south_halo(tile))) + println("[rank $rank] Receiving halo data...") - re_req = MPI.Irecv!(west_halo(tile), east_rank, send_west_tag(east_rank), comm) - rw_req = MPI.Irecv!(east_halo(tile), west_rank, send_east_tag(west_rank), comm) - rn_req = MPI.Irecv!(south_halo(tile), north_rank, send_south_tag(north_rank), comm) - rs_req = MPI.Irecv!(north_halo(tile), south_rank, send_north_tag(south_rank), comm) + re_req = MPI.Irecv!(west_buf, east_rank, send_west_tag(east_rank), comm) + rw_req = MPI.Irecv!(east_buf, west_rank, send_east_tag(west_rank), comm) + rn_req = MPI.Irecv!(south_buf, north_rank, send_south_tag(north_rank), comm) + rs_req = MPI.Irecv!(north_buf, south_rank, send_north_tag(south_rank), comm) MPI.Barrier(comm) end From d4718c834261223fb6323d84090c24d6cb6d54a9 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Wed, 5 Jun 2019 19:01:42 -0400 Subject: [PATCH 008/100] Sending buffers too... Former-commit-id: 5435f78c792a4cd62225e24025ff4e0ae526a631 --- sandbox/tiled_halos_mpi.jl | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/sandbox/tiled_halos_mpi.jl b/sandbox/tiled_halos_mpi.jl index 860baba977..f4bf1a194f 100644 --- a/sandbox/tiled_halos_mpi.jl +++ b/sandbox/tiled_halos_mpi.jl @@ -74,23 +74,29 @@ function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) data(tile) .= recv_mesg println("[rank $rank] Sending halo data...") - se_req = MPI.Isend(west_data(tile), east_rank, send_east_tag(rank), comm) - sw_req = MPI.Isend(east_data(tile), west_rank, send_west_tag(rank), comm) - sn_req = MPI.Isend(south_data(tile), north_rank, send_north_tag(rank), comm) - ss_req = MPI.Isend(north_data(tile), south_rank, send_south_tag(rank), comm) + + west_data_buf = zeros(size(west_data(tile))) + east_data_buf = zeros(size(east_data(tile))) + north_data_buf = zeros(size(north_data(tile))) + south_data_buf = zeros(size(south_data(tile))) + + se_req = MPI.Isend(west_data_buf, east_rank, send_east_tag(rank), comm) + sw_req = MPI.Isend(east_data_buf, west_rank, send_west_tag(rank), comm) + sn_req = MPI.Isend(south_data_buf, north_rank, send_north_tag(rank), comm) + ss_req = MPI.Isend(north_data_buf, south_rank, send_south_tag(rank), comm) MPI.Barrier(comm) - west_buf = zeros(size(west_halo(tile))) - east_buf = zeros(size(east_halo(tile))) - north_buf = zeros(size(north_halo(tile))) - south_buf = zeros(size(south_halo(tile))) + west_halo_buf = zeros(size(west_halo(tile))) + east_halo_buf = zeros(size(east_halo(tile))) + north_halo_buf = zeros(size(north_halo(tile))) + south_halo_buf = zeros(size(south_halo(tile))) println("[rank $rank] Receiving halo data...") - re_req = MPI.Irecv!(west_buf, east_rank, send_west_tag(east_rank), comm) - rw_req = MPI.Irecv!(east_buf, west_rank, send_east_tag(west_rank), comm) - rn_req = MPI.Irecv!(south_buf, north_rank, send_south_tag(north_rank), comm) - rs_req = MPI.Irecv!(north_buf, south_rank, send_north_tag(south_rank), comm) + re_req = MPI.Irecv!(west_halo_buf, east_rank, send_west_tag(east_rank), comm) + rw_req = MPI.Irecv!(east_halo_buf, west_rank, send_east_tag(west_rank), comm) + rn_req = MPI.Irecv!(south_halo_buf, north_rank, send_south_tag(north_rank), comm) + rs_req = MPI.Irecv!(north_halo_buf, south_rank, send_north_tag(south_rank), comm) MPI.Barrier(comm) end From e4083ee07c31ab8aed1e053a8a374f90a4f5c5bd Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Thu, 6 Jun 2019 08:21:38 -0400 Subject: [PATCH 009/100] Woops, actually read from MPI buffer. Former-commit-id: c1e3375c3f7942065f66f527161fb9bfdc575f6c --- sandbox/tiled_halos_mpi.jl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/sandbox/tiled_halos_mpi.jl b/sandbox/tiled_halos_mpi.jl index f4bf1a194f..d35c5e80ce 100644 --- a/sandbox/tiled_halos_mpi.jl +++ b/sandbox/tiled_halos_mpi.jl @@ -75,10 +75,10 @@ function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) println("[rank $rank] Sending halo data...") - west_data_buf = zeros(size(west_data(tile))) - east_data_buf = zeros(size(east_data(tile))) - north_data_buf = zeros(size(north_data(tile))) - south_data_buf = zeros(size(south_data(tile))) + west_data_buf = copy(west_data(tile)) + east_data_buf = copy(east_data(tile)) + north_data_buf = copy(north_data(tile)) + south_data_buf = copy(south_data(tile)) se_req = MPI.Isend(west_data_buf, east_rank, send_east_tag(rank), comm) sw_req = MPI.Isend(east_data_buf, west_rank, send_west_tag(rank), comm) @@ -99,6 +99,11 @@ function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) rs_req = MPI.Irecv!(north_halo_buf, south_rank, send_north_tag(south_rank), comm) MPI.Barrier(comm) + + east_halo(tile) .= west_halo_buf + west_halo(tile) .= east_halo_buf + north_halo(tile) .= south_halo_buf + south_halo(tile) .= north_halo_buf end MPI.Init() From 95b2edff92f0564c4091763688459b24b690eb5e Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Thu, 6 Jun 2019 15:18:06 -0400 Subject: [PATCH 010/100] Halo comm working on 4 ranks Former-commit-id: bd5cf58b730ff42d5cd87a5283ba196a1bc6da48 --- sandbox/tiled_halos_mpi.jl | 77 +++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 31 deletions(-) diff --git a/sandbox/tiled_halos_mpi.jl b/sandbox/tiled_halos_mpi.jl index d35c5e80ce..f8fa566caa 100644 --- a/sandbox/tiled_halos_mpi.jl +++ b/sandbox/tiled_halos_mpi.jl @@ -3,17 +3,17 @@ import MPI using Oceananigans @inline index2rank(I, J, Mx, My) = J*My + I -@inline rank2index(r, Mx, My) = div(r, Mx), mod(r, My) +@inline rank2index(r, Mx, My) = mod(r, Mx), div(r, My) -@inline north_halo(tile) = @views @inbounds tile.data[1-tile.grid.Hx:0, :, :] -@inline south_halo(tile) = @views @inbounds tile.data[tile.grid.Nx+1:tile.grid.Nx+tile.grid.Hx, :, :] -@inline west_halo(tile) = @views @inbounds tile.data[:, 1-tile.grid.Hy:0, :] -@inline east_halo(tile) = @views @inbounds tile.data[:, tile.grid.Ny+1:tile.grid.Ny+tile.grid.Hy, :] +@inline north_halo(tile) = @views @inbounds tile.data.parent[1:tile.grid.Hx, :, :] +@inline south_halo(tile) = @views @inbounds tile.data.parent[tile.grid.Nx+tile.grid.Hx+1:tile.grid.Nx+2tile.grid.Hx, :, :] +@inline west_halo(tile) = @views @inbounds tile.data.parent[:, 1:tile.grid.Hy, :] +@inline east_halo(tile) = @views @inbounds tile.data.parent[:, tile.grid.Ny+tile.grid.Hy+1:tile.grid.Ny+2tile.grid.Hy, :] -@inline north_data(tile) = @views @inbounds tile.data[1:tile.grid.Hx, :, :] -@inline south_data(tile) = @views @inbounds tile.data[tile.grid.Nx-tile.grid.Hx+1:tile.grid.Nx, :, :] -@inline west_data(tile) = @views @inbounds tile.data[:, 1:tile.grid.Hy, :] -@inline east_data(tile) = @views @inbounds tile.data[:, tile.grid.Ny-tile.grid.Hy+1:tile.grid.Ny, :] +@inline north_data(tile) = @views @inbounds tile.data.parent[1+tile.grid.Hx:2tile.grid.Hx, :, :] +@inline south_data(tile) = @views @inbounds tile.data.parent[tile.grid.Nx+1:tile.grid.Nx+tile.grid.Hx, :, :] +@inline west_data(tile) = @views @inbounds tile.data.parent[:, 1+tile.grid.Hy:2tile.grid.Hy, :] +@inline east_data(tile) = @views @inbounds tile.data.parent[:, tile.grid.Ny+1:tile.grid.Ny+tile.grid.Hy, :] @inline distribute_tag(rank) = 100 + rank @inline send_west_tag(rank) = 200 + rank @@ -32,7 +32,7 @@ function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) MPI.Barrier(comm) rank = MPI.Comm_rank(comm) - size = MPI.Comm_size(comm) + # size = MPI.Comm_size(comm) I, J = rank2index(rank, Mx, My) I⁻, I⁺ = mod(I-1, Mx), mod(I+1, Mx) @@ -70,22 +70,32 @@ function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) recv_mesg = zeros(FT, Nx′, Ny′, Nz′) rreq = MPI.Irecv!(recv_mesg, 0, distribute_tag(rank), comm) - stats = MPI.Waitall!([rreq]) + stats = MPI.Wait!(rreq) data(tile) .= recv_mesg println("[rank $rank] Sending halo data...") - west_data_buf = copy(west_data(tile)) - east_data_buf = copy(east_data(tile)) - north_data_buf = copy(north_data(tile)) - south_data_buf = copy(south_data(tile)) + west_data_buf = zeros(size(west_data(tile))) + east_data_buf = zeros(size(east_data(tile))) + north_data_buf = zeros(size(north_data(tile))) + south_data_buf = zeros(size(south_data(tile))) - se_req = MPI.Isend(west_data_buf, east_rank, send_east_tag(rank), comm) - sw_req = MPI.Isend(east_data_buf, west_rank, send_west_tag(rank), comm) - sn_req = MPI.Isend(south_data_buf, north_rank, send_north_tag(rank), comm) - ss_req = MPI.Isend(north_data_buf, south_rank, send_south_tag(rank), comm) + west_data_buf .= copy(west_data(tile)) + east_data_buf .= copy(east_data(tile)) + north_data_buf .= copy(north_data(tile)) + south_data_buf .= copy(south_data(tile)) - MPI.Barrier(comm) + se_req = MPI.Isend(east_data_buf, east_rank, send_east_tag(rank), comm) + sw_req = MPI.Isend(west_data_buf, west_rank, send_west_tag(rank), comm) + sn_req = MPI.Isend(north_data_buf, north_rank, send_north_tag(rank), comm) + ss_req = MPI.Isend(south_data_buf, south_rank, send_south_tag(rank), comm) + + @debug "[rank $rank] sending #$(send_east_tag(rank)) to rank $east_rank" + @debug "[rank $rank] sending #$(send_west_tag(rank)) to rank $west_rank" + @debug "[rank $rank] sending #$(send_north_tag(rank)) to rank $north_rank" + @debug "[rank $rank] sending #$(send_south_tag(rank)) to rank $south_rank" + + MPI.Waitall!([se_req, sw_req, sn_req, ss_req]) west_halo_buf = zeros(size(west_halo(tile))) east_halo_buf = zeros(size(east_halo(tile))) @@ -93,17 +103,22 @@ function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) south_halo_buf = zeros(size(south_halo(tile))) println("[rank $rank] Receiving halo data...") - re_req = MPI.Irecv!(west_halo_buf, east_rank, send_west_tag(east_rank), comm) - rw_req = MPI.Irecv!(east_halo_buf, west_rank, send_east_tag(west_rank), comm) - rn_req = MPI.Irecv!(south_halo_buf, north_rank, send_south_tag(north_rank), comm) - rs_req = MPI.Irecv!(north_halo_buf, south_rank, send_north_tag(south_rank), comm) - - MPI.Barrier(comm) - - east_halo(tile) .= west_halo_buf - west_halo(tile) .= east_halo_buf - north_halo(tile) .= south_halo_buf - south_halo(tile) .= north_halo_buf + re_req = MPI.Irecv!(west_halo_buf, west_rank, send_east_tag(west_rank), comm) + rw_req = MPI.Irecv!(east_halo_buf, east_rank, send_west_tag(east_rank), comm) + rn_req = MPI.Irecv!(south_halo_buf, south_rank, send_north_tag(south_rank), comm) + rs_req = MPI.Irecv!(north_halo_buf, north_rank, send_south_tag(north_rank), comm) + + @debug "[rank $rank] waiting for #$(send_east_tag(west_rank)) from rank $west_rank..." + @debug "[rank $rank] waiting for #$(send_west_tag(east_rank)) from rank $east_rank..." + @debug "[rank $rank] waiting for #$(send_north_tag(south_rank)) from rank $south_rank..." + @debug "[rank $rank] waiting for #$(send_south_tag(north_rank)) from rank $north_rank..." + + MPI.Waitall!([re_req, rw_req, rn_req, rs_req]) + + east_halo(tile) .= east_halo_buf + west_halo(tile) .= west_halo_buf + north_halo(tile) .= north_halo_buf + south_halo(tile) .= south_halo_buf end MPI.Init() From 800cb8008c122100056f0747d98cb959c2f0fddc Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Thu, 6 Jun 2019 15:28:28 -0400 Subject: [PATCH 011/100] Nice send and receive halo functions Former-commit-id: 971a4f646b080b1b34f0d6e2b591acbd9a1b5b77 --- sandbox/tiled_halos_mpi.jl | 113 +++++++++++++++++++++++-------------- 1 file changed, 71 insertions(+), 42 deletions(-) diff --git a/sandbox/tiled_halos_mpi.jl b/sandbox/tiled_halos_mpi.jl index f8fa566caa..d3d5aad97a 100644 --- a/sandbox/tiled_halos_mpi.jl +++ b/sandbox/tiled_halos_mpi.jl @@ -21,6 +21,67 @@ using Oceananigans @inline send_north_tag(rank) = 400 + rank @inline send_south_tag(rank) = 500 + rank +function send_halo_data(tile) + rank = MPI.Comm_rank(comm) + + I, J = rank2index(rank, Mx, My) + I⁻, I⁺ = mod(I-1, Mx), mod(I+1, Mx) + J⁻, J⁺ = mod(J-1, My), mod(J+1, My) + Nx′, Ny′, Nz′ = Int(Nx/Mx), Int(Ny/My), Nz + Lx′, Ly′, Lz′ = Lx/Mx, Ly/My, Lz + + north_rank = index2rank(I, J⁻, Mx, My) + south_rank = index2rank(I, J⁺, Mx, My) + east_rank = index2rank(I⁺, J, Mx, My) + west_rank = index2rank(I⁻, J, Mx, My) + + west_data_buf = zeros(size(west_data(tile))) + east_data_buf = zeros(size(east_data(tile))) + north_data_buf = zeros(size(north_data(tile))) + south_data_buf = zeros(size(south_data(tile))) + + west_data_buf .= copy(west_data(tile)) + east_data_buf .= copy(east_data(tile)) + north_data_buf .= copy(north_data(tile)) + south_data_buf .= copy(south_data(tile)) + + se_req = MPI.Isend(east_data_buf, east_rank, send_east_tag(rank), comm) + sw_req = MPI.Isend(west_data_buf, west_rank, send_west_tag(rank), comm) + sn_req = MPI.Isend(north_data_buf, north_rank, send_north_tag(rank), comm) + ss_req = MPI.Isend(south_data_buf, south_rank, send_south_tag(rank), comm) + + @debug "[rank $rank] sending #$(send_east_tag(rank)) to rank $east_rank" + @debug "[rank $rank] sending #$(send_west_tag(rank)) to rank $west_rank" + @debug "[rank $rank] sending #$(send_north_tag(rank)) to rank $north_rank" + @debug "[rank $rank] sending #$(send_south_tag(rank)) to rank $south_rank" + + MPI.Waitall!([se_req, sw_req, sn_req, ss_req]) +end + +function receive_halo_data(tile) + west_halo_buf = zeros(size(west_halo(tile))) + east_halo_buf = zeros(size(east_halo(tile))) + north_halo_buf = zeros(size(north_halo(tile))) + south_halo_buf = zeros(size(south_halo(tile))) + + re_req = MPI.Irecv!(west_halo_buf, west_rank, send_east_tag(west_rank), comm) + rw_req = MPI.Irecv!(east_halo_buf, east_rank, send_west_tag(east_rank), comm) + rn_req = MPI.Irecv!(south_halo_buf, south_rank, send_north_tag(south_rank), comm) + rs_req = MPI.Irecv!(north_halo_buf, north_rank, send_south_tag(north_rank), comm) + + @debug "[rank $rank] waiting for #$(send_east_tag(west_rank)) from rank $west_rank..." + @debug "[rank $rank] waiting for #$(send_west_tag(east_rank)) from rank $east_rank..." + @debug "[rank $rank] waiting for #$(send_north_tag(south_rank)) from rank $south_rank..." + @debug "[rank $rank] waiting for #$(send_south_tag(north_rank)) from rank $north_rank..." + + MPI.Waitall!([re_req, rw_req, rn_req, rs_req]) + + east_halo(tile) .= east_halo_buf + west_halo(tile) .= west_halo_buf + north_halo(tile) .= north_halo_buf + south_halo(tile) .= south_halo_buf +end + function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) Lx, Ly, Lz = 10, 10, 10 @@ -32,7 +93,6 @@ function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) MPI.Barrier(comm) rank = MPI.Comm_rank(comm) - # size = MPI.Comm_size(comm) I, J = rank2index(rank, Mx, My) I⁻, I⁺ = mod(I-1, Mx), mod(I+1, Mx) @@ -66,7 +126,7 @@ function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) tile_grid = RegularCartesianGrid((Nx′, Ny′, Nz′), (Lx′, Ly′, Lz′)) tile = CellField(FT, arch, tile_grid) - println("[rank $rank] Receiving message from rank 0...") + println("[rank $rank] Receiving tile from rank 0...") recv_mesg = zeros(FT, Nx′, Ny′, Nz′) rreq = MPI.Irecv!(recv_mesg, 0, distribute_tag(rank), comm) @@ -74,51 +134,20 @@ function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) data(tile) .= recv_mesg println("[rank $rank] Sending halo data...") - - west_data_buf = zeros(size(west_data(tile))) - east_data_buf = zeros(size(east_data(tile))) - north_data_buf = zeros(size(north_data(tile))) - south_data_buf = zeros(size(south_data(tile))) - - west_data_buf .= copy(west_data(tile)) - east_data_buf .= copy(east_data(tile)) - north_data_buf .= copy(north_data(tile)) - south_data_buf .= copy(south_data(tile)) - - se_req = MPI.Isend(east_data_buf, east_rank, send_east_tag(rank), comm) - sw_req = MPI.Isend(west_data_buf, west_rank, send_west_tag(rank), comm) - sn_req = MPI.Isend(north_data_buf, north_rank, send_north_tag(rank), comm) - ss_req = MPI.Isend(south_data_buf, south_rank, send_south_tag(rank), comm) - - @debug "[rank $rank] sending #$(send_east_tag(rank)) to rank $east_rank" - @debug "[rank $rank] sending #$(send_west_tag(rank)) to rank $west_rank" - @debug "[rank $rank] sending #$(send_north_tag(rank)) to rank $north_rank" - @debug "[rank $rank] sending #$(send_south_tag(rank)) to rank $south_rank" - - MPI.Waitall!([se_req, sw_req, sn_req, ss_req]) - - west_halo_buf = zeros(size(west_halo(tile))) - east_halo_buf = zeros(size(east_halo(tile))) - north_halo_buf = zeros(size(north_halo(tile))) - south_halo_buf = zeros(size(south_halo(tile))) + send_halo_data(tile) println("[rank $rank] Receiving halo data...") - re_req = MPI.Irecv!(west_halo_buf, west_rank, send_east_tag(west_rank), comm) - rw_req = MPI.Irecv!(east_halo_buf, east_rank, send_west_tag(east_rank), comm) - rn_req = MPI.Irecv!(south_halo_buf, south_rank, send_north_tag(south_rank), comm) - rs_req = MPI.Irecv!(north_halo_buf, north_rank, send_south_tag(north_rank), comm) + receive_halo_data(tile) - @debug "[rank $rank] waiting for #$(send_east_tag(west_rank)) from rank $west_rank..." - @debug "[rank $rank] waiting for #$(send_west_tag(east_rank)) from rank $east_rank..." - @debug "[rank $rank] waiting for #$(send_north_tag(south_rank)) from rank $south_rank..." - @debug "[rank $rank] waiting for #$(send_south_tag(north_rank)) from rank $north_rank..." + println("[rank $rank] Sending halo data...") + send_halo_data(tile) - MPI.Waitall!([re_req, rw_req, rn_req, rs_req]) + println("[rank $rank] Receiving halo data...") + receive_halo_data(tile) - east_halo(tile) .= east_halo_buf - west_halo(tile) .= west_halo_buf - north_halo(tile) .= north_halo_buf - south_halo(tile) .= south_halo_buf + if rank == 3 + display(tile.data) + end end MPI.Init() From 1e7a910d857d30854be4be56ad53dcadd8ea8f1d Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Thu, 6 Jun 2019 15:34:37 -0400 Subject: [PATCH 012/100] Fix nice functions Former-commit-id: f7f4c2279954b2daeee53f20262fc02c62835adc --- sandbox/tiled_halos_mpi.jl | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/sandbox/tiled_halos_mpi.jl b/sandbox/tiled_halos_mpi.jl index d3d5aad97a..83eaa9b648 100644 --- a/sandbox/tiled_halos_mpi.jl +++ b/sandbox/tiled_halos_mpi.jl @@ -21,14 +21,13 @@ using Oceananigans @inline send_north_tag(rank) = 400 + rank @inline send_south_tag(rank) = 500 + rank -function send_halo_data(tile) +function send_halo_data(tile, Mx, My) + comm = MPI.COMM_WORLD rank = MPI.Comm_rank(comm) I, J = rank2index(rank, Mx, My) I⁻, I⁺ = mod(I-1, Mx), mod(I+1, Mx) J⁻, J⁺ = mod(J-1, My), mod(J+1, My) - Nx′, Ny′, Nz′ = Int(Nx/Mx), Int(Ny/My), Nz - Lx′, Ly′, Lz′ = Lx/Mx, Ly/My, Lz north_rank = index2rank(I, J⁻, Mx, My) south_rank = index2rank(I, J⁺, Mx, My) @@ -58,7 +57,19 @@ function send_halo_data(tile) MPI.Waitall!([se_req, sw_req, sn_req, ss_req]) end -function receive_halo_data(tile) +function receive_halo_data(tile, Mx, My) + comm = MPI.COMM_WORLD + rank = MPI.Comm_rank(comm) + + I, J = rank2index(rank, Mx, My) + I⁻, I⁺ = mod(I-1, Mx), mod(I+1, Mx) + J⁻, J⁺ = mod(J-1, My), mod(J+1, My) + + north_rank = index2rank(I, J⁻, Mx, My) + south_rank = index2rank(I, J⁺, Mx, My) + east_rank = index2rank(I⁺, J, Mx, My) + west_rank = index2rank(I⁻, J, Mx, My) + west_halo_buf = zeros(size(west_halo(tile))) east_halo_buf = zeros(size(east_halo(tile))) north_halo_buf = zeros(size(north_halo(tile))) @@ -134,16 +145,16 @@ function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) data(tile) .= recv_mesg println("[rank $rank] Sending halo data...") - send_halo_data(tile) + send_halo_data(tile, Mx, My) println("[rank $rank] Receiving halo data...") - receive_halo_data(tile) + receive_halo_data(tile, Mx, My) println("[rank $rank] Sending halo data...") - send_halo_data(tile) + send_halo_data(tile, Mx, My) println("[rank $rank] Receiving halo data...") - receive_halo_data(tile) + receive_halo_data(tile, Mx, My) if rank == 3 display(tile.data) From d455bd9a1638df04ee01540bc98587843e16b2fa Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Thu, 6 Jun 2019 18:02:01 -0400 Subject: [PATCH 013/100] Less MPI issues, bigger problem Former-commit-id: 971d12a9bada3d630414f91e46e1738662384b42 --- sandbox/tiled_halos_mpi.jl | 101 +++++++++++++++++++++++++++---------- 1 file changed, 74 insertions(+), 27 deletions(-) diff --git a/sandbox/tiled_halos_mpi.jl b/sandbox/tiled_halos_mpi.jl index 83eaa9b648..15f433c94c 100644 --- a/sandbox/tiled_halos_mpi.jl +++ b/sandbox/tiled_halos_mpi.jl @@ -1,7 +1,41 @@ +using Printf + import MPI using Oceananigans +# Source: https://github.com/JuliaCI/BenchmarkTools.jl/blob/master/src/trials.jl +function prettytime(t) + if t < 1e3 + value, units = t, "ns" + elseif t < 1e6 + value, units = t / 1e3, "μs" + elseif t < 1e9 + value, units = t / 1e6, "ms" + else + s = t / 1e9 + if s < 60 + value, units = s, "s" + else + value, units = (s / 60), "min" + end + end + return string(@sprintf("%.3f", value), " ", units) +end + +function prettybandwidth(b) + if b < 1024 + val, units = b, "B/s" + elseif b < 1024^2 + val, units = b / 1024, "KiB/s" + elseif b < 1024^3 + val, units = b / 1024^2, "MiB/s" + else + val, units = b / 1024^3, "GiB/s" + end + return string(@sprintf("%.3f", val), " ", units) +end + @inline index2rank(I, J, Mx, My) = J*My + I @inline rank2index(r, Mx, My) = mod(r, Mx), div(r, My) @@ -21,8 +55,7 @@ using Oceananigans @inline send_north_tag(rank) = 400 + rank @inline send_south_tag(rank) = 500 + rank -function send_halo_data(tile, Mx, My) - comm = MPI.COMM_WORLD +function send_halo_data(tile, Mx, My, comm) rank = MPI.Comm_rank(comm) I, J = rank2index(rank, Mx, My) @@ -53,12 +86,9 @@ function send_halo_data(tile, Mx, My) @debug "[rank $rank] sending #$(send_west_tag(rank)) to rank $west_rank" @debug "[rank $rank] sending #$(send_north_tag(rank)) to rank $north_rank" @debug "[rank $rank] sending #$(send_south_tag(rank)) to rank $south_rank" - - MPI.Waitall!([se_req, sw_req, sn_req, ss_req]) end -function receive_halo_data(tile, Mx, My) - comm = MPI.COMM_WORLD +function receive_halo_data(tile, Mx, My, comm) rank = MPI.Comm_rank(comm) I, J = rank2index(rank, Mx, My) @@ -100,10 +130,11 @@ function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) Lx′, Ly′, Lz′ = Lx/Mx, Ly/My, Lz comm = MPI.COMM_WORLD - + MPI.Barrier(comm) rank = MPI.Comm_rank(comm) + R = MPI.Comm_size(comm) I, J = rank2index(rank, Mx, My) I⁻, I⁺ = mod(I-1, Mx), mod(I+1, Mx) @@ -116,51 +147,67 @@ function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) east_rank = index2rank(I⁺, J, Mx, My) west_rank = index2rank(I⁻, J, Mx, My) + tile_grid = RegularCartesianGrid((Nx′, Ny′, Nz′), (Lx′, Ly′, Lz′)) + tile = CellField(FT, arch, tile_grid) + send_reqs = MPI.Request[] if rank == 0 rands = rand(Nx, Ny, Nz) - for r in 0:Mx*My-1 + for r in 1:Mx*My-1 I′, J′ = rank2index(r, Mx, My) i1, i2 = I′*Nx′+1, (I′+1)*Nx′ j1, j2 = J′*Ny′+1, (J′+1)*Ny′ send_mesg = rands[i1:i2, j1:j2, :] - println("[rank $rank] Sending R[$i1:$i2, $j1:$j2, :] to rank $r...") + println("[rank $rank] Sending rands[$i1:$i2, $j1:$j2, :] to rank $r...") sreq = MPI.Isend(send_mesg, r, distribute_tag(r), comm) push!(send_reqs, sreq) end + data(tile) .= rands[1:Nx′, 1:Ny′, :] + MPI.Waitall!(send_reqs) end - tile_grid = RegularCartesianGrid((Nx′, Ny′, Nz′), (Lx′, Ly′, Lz′)) - tile = CellField(FT, arch, tile_grid) - - println("[rank $rank] Receiving tile from rank 0...") - recv_mesg = zeros(FT, Nx′, Ny′, Nz′) - rreq = MPI.Irecv!(recv_mesg, 0, distribute_tag(rank), comm) - - stats = MPI.Wait!(rreq) - data(tile) .= recv_mesg + if rank != 0 + println("[rank $rank] Receiving tile from rank 0...") + recv_mesg = zeros(FT, Nx′, Ny′, Nz′) + rreq = MPI.Irecv!(recv_mesg, 0, distribute_tag(rank), comm) + stats = MPI.Wait!(rreq) + data(tile) .= recv_mesg + end + println("[rank $rank] Sending halo data...") - send_halo_data(tile, Mx, My) + send_halo_data(tile, Mx, My, comm) println("[rank $rank] Receiving halo data...") - receive_halo_data(tile, Mx, My) + receive_halo_data(tile, Mx, My, comm) + + MPI.Barrier(comm) + if rank == 0 + tic = time_ns() + end println("[rank $rank] Sending halo data...") - send_halo_data(tile, Mx, My) + send_halo_data(tile, Mx, My, comm) println("[rank $rank] Receiving halo data...") - receive_halo_data(tile, Mx, My) - - if rank == 3 - display(tile.data) - end + receive_halo_data(tile, Mx, My, comm) + + MPI.Barrier(comm) + if rank == 0 + t = (time_ns() - tic) + ts = t / 1e9 + @info "$R ranks halo communication time: $(prettytime(t))" + + Hx, Hy = 1, 1 + data_size = sizeof(FT) * 2Nz*(Hx*Nx + Hy*Ny) + @info "$R ranks halo communication bandwidth: $(prettybandwidth(data_size/ts))" + end end MPI.Init() -fill_halo_regions_mpi!(Float64, CPU(), 16, 16, 16, 2, 2) +fill_halo_regions_mpi!(Float64, CPU(), 512, 512, 512, 2, 2) MPI.Finalize() From 9c0748ffb074b65483dd1534c1f8a09d28ddab10 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Fri, 7 Jun 2019 08:44:14 -0400 Subject: [PATCH 014/100] Need CuArrays for broadcast Former-commit-id: e38f3b8821ec317fc59546931d1a5cfceeaab52f --- sandbox/tiled_halos_mpi.jl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sandbox/tiled_halos_mpi.jl b/sandbox/tiled_halos_mpi.jl index 15f433c94c..04bc1d9356 100644 --- a/sandbox/tiled_halos_mpi.jl +++ b/sandbox/tiled_halos_mpi.jl @@ -1,5 +1,6 @@ using Printf +using CuArrays import MPI using Oceananigans @@ -117,10 +118,10 @@ function receive_halo_data(tile, Mx, My, comm) MPI.Waitall!([re_req, rw_req, rn_req, rs_req]) - east_halo(tile) .= east_halo_buf - west_halo(tile) .= west_halo_buf - north_halo(tile) .= north_halo_buf - south_halo(tile) .= south_halo_buf + east_halo(tile) .= CuArray(east_halo_buf) + west_halo(tile) .= CuArray(west_halo_buf) + north_halo(tile) .= CuArray(north_halo_buf) + south_halo(tile) .= CuArray(south_halo_buf) end function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) @@ -209,5 +210,5 @@ function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) end MPI.Init() -fill_halo_regions_mpi!(Float64, CPU(), 512, 512, 512, 2, 2) +fill_halo_regions_mpi!(Float64, GPU(), 192, 192, 192, 3, 3) MPI.Finalize() From 14e6a8431a2299b4ce1da61d6b66b354869c0e24 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Fri, 7 Jun 2019 09:43:43 -0400 Subject: [PATCH 015/100] note to self Former-commit-id: 357c2c94f0777cd8f4e87179c42c358ec62ec2ce --- sandbox/tiled_halos_mpi.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/sandbox/tiled_halos_mpi.jl b/sandbox/tiled_halos_mpi.jl index 04bc1d9356..4a01425482 100644 --- a/sandbox/tiled_halos_mpi.jl +++ b/sandbox/tiled_halos_mpi.jl @@ -68,6 +68,7 @@ function send_halo_data(tile, Mx, My, comm) east_rank = index2rank(I⁺, J, Mx, My) west_rank = index2rank(I⁻, J, Mx, My) + # cuzeros doesn't exist anymore. Use similar! west_data_buf = zeros(size(west_data(tile))) east_data_buf = zeros(size(east_data(tile))) north_data_buf = zeros(size(north_data(tile))) From d337ebfd7b2c9e890c6c1d6689db495a677a247f Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sat, 14 Dec 2019 22:46:55 -0500 Subject: [PATCH 016/100] Start prototyping a `DistributedModel` type. --- src/distributed_model.jl | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 src/distributed_model.jl diff --git a/src/distributed_model.jl b/src/distributed_model.jl new file mode 100644 index 0000000000..711893c165 --- /dev/null +++ b/src/distributed_model.jl @@ -0,0 +1,19 @@ +import MPI + +using Oceananigans + +struct DistributedModel{A, R, G, C} + ranks :: R + models :: A + connectivity_graph :: G + MPI_Comm :: C +end + +const FieldBoundaryConditions = NamedTuple{(:east, :west, :north, :south, :top, :bottom)} + +function validate_tupled_argument(arg, argtype, argname) + length(arg) == 3 || throw(ArgumentError("length($argname) must be 3.")) + all(isa.(arg, argtype)) || throw(ArgumentError("$argname=$arg must contain $argtype s.")) + all(arg .> 0) || throw(ArgumentError("Elements of $argname=$arg must be > 0!")) + return nothing +end From 7ca306ce786486ec656586ece5cdab29ca62ae5e Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sat, 14 Dec 2019 22:47:15 -0500 Subject: [PATCH 017/100] `DistributedModel` constructor that checks for consistent ranks --- src/distributed_model.jl | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/distributed_model.jl b/src/distributed_model.jl index 711893c165..6272e23d85 100644 --- a/src/distributed_model.jl +++ b/src/distributed_model.jl @@ -17,3 +17,34 @@ function validate_tupled_argument(arg, argtype, argname) all(arg .> 0) || throw(ArgumentError("Elements of $argname=$arg must be > 0!")) return nothing end + +function DistributedModel(; ranks, model_kwargs...) + validate_tupled_argument(ranks, Int, "ranks") + Rx, Ry, Rz = ranks + total_ranks = Rx*Ry*Rz + + MPI.Init() + comm = MPI.COMM_WORLD + + mpi_ranks = MPI.Comm_size(comm) + my_rank = MPI.Comm_rank(comm) + + if my_rank == 0 + if total_ranks != mpi_ranks + throw(ArgumentError("ranks=($Rx, $Ry, $Rz) [$total_ranks total] inconsistent " * + "with number of MPI ranks: $mpi_ranks. Exiting with code 1.")) + MPI.Finalize() + exit(code=1) + end + end + + # Ensure that ranks 1:N don't go ahead if total_ranks != mpi_ranks. + MPI.Barrier(comm) + + model_id = my_rank + 1 + println("Model #$my_rank reporting in") + + return DistributedModel(ranks, nothing, nothing, comm) +end + +dm = DistributedModel(ranks=(2, 2, 1)) From e16190e7953ece023793d6118665d7e9a728359b Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sat, 14 Dec 2019 23:03:46 -0500 Subject: [PATCH 018/100] Utility functions for converting between MPI rank and 3D index. --- src/distributed_model.jl | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/distributed_model.jl b/src/distributed_model.jl index 6272e23d85..b49ab8fbb1 100644 --- a/src/distributed_model.jl +++ b/src/distributed_model.jl @@ -9,7 +9,17 @@ struct DistributedModel{A, R, G, C} MPI_Comm :: C end -const FieldBoundaryConditions = NamedTuple{(:east, :west, :north, :south, :top, :bottom)} +const RankConnectivity = NamedTuple{(:east, :west, :north, :south, :top, :bottom)} + +@inline index2rank(i, j, k, Rx, Ry, Rz) = k*Rx*Ry + j*Rx + i + +@inline function rank2index(r, Rx, Ry, Rz) + k = div(r, Rx*Ry) + r -= k*Rx*Ry + j = div(r, Rx) + i = mod(r, Rx) + return i, j, k +end function validate_tupled_argument(arg, argtype, argname) length(arg) == 3 || throw(ArgumentError("length($argname) must be 3.")) From aff034bd8ffa45c372ec8df903f2eefbad386a2b Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sat, 14 Dec 2019 23:06:41 -0500 Subject: [PATCH 019/100] Make z the fast index and convert to 1-based indexing --- src/distributed_model.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/distributed_model.jl b/src/distributed_model.jl index b49ab8fbb1..cc407aece8 100644 --- a/src/distributed_model.jl +++ b/src/distributed_model.jl @@ -11,14 +11,14 @@ end const RankConnectivity = NamedTuple{(:east, :west, :north, :south, :top, :bottom)} -@inline index2rank(i, j, k, Rx, Ry, Rz) = k*Rx*Ry + j*Rx + i +@inline index2rank(i, j, k, Rx, Ry, Rz) = (i-1)*Ry*Rz + (j-1)*Rz + (k-1) @inline function rank2index(r, Rx, Ry, Rz) - k = div(r, Rx*Ry) - r -= k*Rx*Ry - j = div(r, Rx) - i = mod(r, Rx) - return i, j, k + i = div(r, Ry*Rz) + r -= i*Ry*Rz + j = div(r, Rz) + k = mod(r, Rz) + return i+1, j+1, k+1 end function validate_tupled_argument(arg, argtype, argname) From b7897225466f6d0f74e39b3d912b15da9a8b647b Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sat, 14 Dec 2019 23:33:57 -0500 Subject: [PATCH 020/100] Add size and length kwargs, and create proper grid for each rank --- src/distributed_model.jl | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/src/distributed_model.jl b/src/distributed_model.jl index cc407aece8..005588d539 100644 --- a/src/distributed_model.jl +++ b/src/distributed_model.jl @@ -2,6 +2,8 @@ import MPI using Oceananigans +using Oceananigans.Grids: validate_tupled_argument + struct DistributedModel{A, R, G, C} ranks :: R models :: A @@ -21,15 +23,14 @@ const RankConnectivity = NamedTuple{(:east, :west, :north, :south, :top, :bottom return i+1, j+1, k+1 end -function validate_tupled_argument(arg, argtype, argname) - length(arg) == 3 || throw(ArgumentError("length($argname) must be 3.")) - all(isa.(arg, argtype)) || throw(ArgumentError("$argname=$arg must contain $argtype s.")) - all(arg .> 0) || throw(ArgumentError("Elements of $argname=$arg must be > 0!")) - return nothing -end - -function DistributedModel(; ranks, model_kwargs...) +function DistributedModel(; size, length, ranks, model_kwargs...) + validate_tupled_argument(ranks, Int, "size") + validate_tupled_argument(ranks, Number, "length") validate_tupled_argument(ranks, Int, "ranks") + + Nx, Ny, Nz = size + Lx, Ly, Lz = length + Rx, Ry, Rz = ranks total_ranks = Rx*Ry*Rz @@ -52,9 +53,22 @@ function DistributedModel(; ranks, model_kwargs...) MPI.Barrier(comm) model_id = my_rank + 1 - println("Model #$my_rank reporting in") + index = rank2index(my_rank, Rx, Ry, Rz) + rr = index2rank(index..., Rx, Ry, Rz) + # @info "rank=$my_rank, index=$index, index2rank=$rr" + + i, j, k = rank2index(my_rank, Rx, Ry, Rz) + nx, ny, nz = Nx÷Rx, Ny÷Ry, Nz÷Rz + lx, ly, lz = Lx/Rx, Ly/Ry, Lz/Rz + x₁, x₂ = (i-1)*lx, i*lx + y₁, y₂ = (j-1)*ly, j*ly + z₁, z₂ = (k-1)*lz, k*lz + @info "rank=$my_rank, x ∈ [$x₁, $x₂], y ∈ [$y₁, $y₂], z ∈ [$z₁, $z₂]" + grid = RegularCartesianGrid(size=(nx, ny, nz), x=(x₁, x₂), y=(y₁, y₂), z=(z₁, z₂)) return DistributedModel(ranks, nothing, nothing, comm) end -dm = DistributedModel(ranks=(2, 2, 1)) +dm = DistributedModel(size=(32, 32, 32), length=(1, 2, 5), ranks=(2, 2, 2)) + +MPI.Finalize() From 2540596d4f728164c42e61fa8842f0a9f2703204 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sat, 14 Dec 2019 23:42:13 -0500 Subject: [PATCH 021/100] More general left/right endpoints for grid --- src/distributed_model.jl | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/distributed_model.jl b/src/distributed_model.jl index 005588d539..390a16d8df 100644 --- a/src/distributed_model.jl +++ b/src/distributed_model.jl @@ -23,13 +23,16 @@ const RankConnectivity = NamedTuple{(:east, :west, :north, :south, :top, :bottom return i+1, j+1, k+1 end -function DistributedModel(; size, length, ranks, model_kwargs...) +function DistributedModel(; size, x, y, z, ranks, model_kwargs...) validate_tupled_argument(ranks, Int, "size") - validate_tupled_argument(ranks, Number, "length") validate_tupled_argument(ranks, Int, "ranks") Nx, Ny, Nz = size - Lx, Ly, Lz = length + + xL, xR = x + yL, yR = y + zL, zR = z + Lx, Ly, Lz = xR-xL, yR-yL, zR-zL Rx, Ry, Rz = ranks total_ranks = Rx*Ry*Rz @@ -55,20 +58,26 @@ function DistributedModel(; size, length, ranks, model_kwargs...) model_id = my_rank + 1 index = rank2index(my_rank, Rx, Ry, Rz) rr = index2rank(index..., Rx, Ry, Rz) - # @info "rank=$my_rank, index=$index, index2rank=$rr" + @info "rank=$my_rank, index=$index, index2rank=$rr" + + MPI.Barrier(comm) i, j, k = rank2index(my_rank, Rx, Ry, Rz) nx, ny, nz = Nx÷Rx, Ny÷Ry, Nz÷Rz lx, ly, lz = Lx/Rx, Ly/Ry, Lz/Rz - x₁, x₂ = (i-1)*lx, i*lx - y₁, y₂ = (j-1)*ly, j*ly - z₁, z₂ = (k-1)*lz, k*lz + + x₁, x₂ = xL + (i-1)*lx, xL + i*lx + y₁, y₂ = yL + (j-1)*ly, yL + j*ly + z₁, z₂ = zL + (k-1)*lz, zL + k*lz + @info "rank=$my_rank, x ∈ [$x₁, $x₂], y ∈ [$y₁, $y₂], z ∈ [$z₁, $z₂]" grid = RegularCartesianGrid(size=(nx, ny, nz), x=(x₁, x₂), y=(y₁, y₂), z=(z₁, z₂)) + MPI.Barrier(comm) + return DistributedModel(ranks, nothing, nothing, comm) end -dm = DistributedModel(size=(32, 32, 32), length=(1, 2, 5), ranks=(2, 2, 2)) +dm = DistributedModel(ranks=(2, 2, 2), size=(32, 32, 32), x=(0, 1), y=(-0.5, 0.5), z=(-10, 0)) MPI.Finalize() From d446d12f20e6c524bb53b079200f91b2d99acffd Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sun, 15 Dec 2019 00:06:01 -0500 Subject: [PATCH 022/100] Compute connectivity graph assuming box model and brick MPI topology The connectivity graph needs to take into account periodic boundary conditions. --- src/distributed_model.jl | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/distributed_model.jl b/src/distributed_model.jl index 390a16d8df..7fc476881f 100644 --- a/src/distributed_model.jl +++ b/src/distributed_model.jl @@ -75,6 +75,33 @@ function DistributedModel(; size, x, y, z, ranks, model_kwargs...) MPI.Barrier(comm) + i_east = i+1 > Rx ? nothing : i+1 + i_west = i-1 < 1 ? nothing : i-1 + + j_north = j+1 > Ry ? nothing : j+1 + j_south = j-1 < 1 ? nothing : j-1 + + k_top = k+1 > Rz ? nothing : k+1 + k_bot = k-1 < 1 ? nothing : k-1 + + r_east = isnothing(i_east) ? nothing : index2rank(i_east, j, k, Rx, Ry, Rz) + r_west = isnothing(i_west) ? nothing : index2rank(i_west, j, k, Rx, Ry, Rz) + + r_north = isnothing(j_north) ? nothing : index2rank(i, j_north, k, Rx, Ry, Rz) + r_south = isnothing(j_south) ? nothing : index2rank(i, j_south, k, Rx, Ry, Rz) + + r_top = isnothing(k_top) ? nothing : index2rank(i, j, k_top, Rx, Ry, Rz) + r_bot = isnothing(k_bot) ? nothing : index2rank(i, j, k_bot, Rx, Ry, Rz) + + @info "rank=$my_rank, index=$index, i_east=$i_east, i_west=$i_west, j_north=$j_north, j_south=$j_south, k_top=$k_top, k_bot=$k_bot" + @info "rank=$my_rank, r_east=$r_east, r_west=$r_west, r_north=$r_north, r_south=$r_south, r_top=$r_top, r_bot=$r_bot" + + MPI.Barrier(comm) + + my_connectivity = (east=r_east, west=r_west, + north=r_north, south=r_south, + top=r_top, bottom=r_bot) + return DistributedModel(ranks, nothing, nothing, comm) end From b56b13c048834e5ca493b1617804c2286cecce89 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sun, 15 Dec 2019 00:39:01 -0500 Subject: [PATCH 023/100] Communication boundary condition type for MPI --- src/distributed_model.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/distributed_model.jl b/src/distributed_model.jl index 7fc476881f..3f10a25e07 100644 --- a/src/distributed_model.jl +++ b/src/distributed_model.jl @@ -2,6 +2,7 @@ import MPI using Oceananigans +using Oceananigans: BCType using Oceananigans.Grids: validate_tupled_argument struct DistributedModel{A, R, G, C} @@ -11,6 +12,8 @@ struct DistributedModel{A, R, G, C} MPI_Comm :: C end +struct Communication <: BCType end + const RankConnectivity = NamedTuple{(:east, :west, :north, :south, :top, :bottom)} @inline index2rank(i, j, k, Rx, Ry, Rz) = (i-1)*Ry*Rz + (j-1)*Rz + (k-1) From 117853dbce398ef48522e716378ddb50245561ac Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sun, 15 Dec 2019 00:40:56 -0500 Subject: [PATCH 024/100] Root process should gather connectivities from each rank --- src/distributed_model.jl | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/distributed_model.jl b/src/distributed_model.jl index 3f10a25e07..d5b4608496 100644 --- a/src/distributed_model.jl +++ b/src/distributed_model.jl @@ -99,15 +99,21 @@ function DistributedModel(; size, x, y, z, ranks, model_kwargs...) @info "rank=$my_rank, index=$index, i_east=$i_east, i_west=$i_west, j_north=$j_north, j_south=$j_south, k_top=$k_top, k_bot=$k_bot" @info "rank=$my_rank, r_east=$r_east, r_west=$r_west, r_north=$r_north, r_south=$r_south, r_top=$r_top, r_bot=$r_bot" - MPI.Barrier(comm) - my_connectivity = (east=r_east, west=r_west, north=r_north, south=r_south, top=r_top, bottom=r_bot) - return DistributedModel(ranks, nothing, nothing, comm) + MPI.Barrier(comm) + + connectivity_graph = MPI.Gather(my_connectivity, 0, comm) + + dm = DistributedModel(ranks, nothing, connectivity_graph, comm) + + return dm end dm = DistributedModel(ranks=(2, 2, 2), size=(32, 32, 32), x=(0, 1), y=(-0.5, 0.5), z=(-10, 0)) -MPI.Finalize() +for r in dm.connectivity_graph + @show r +end From 89e8c5bd1c799039e96e247c2baa6ccecef693bf Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sun, 15 Dec 2019 00:43:36 -0500 Subject: [PATCH 025/100] We can MPI.Finalize in a finalizer for `DistributedModel` --- src/distributed_model.jl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/distributed_model.jl b/src/distributed_model.jl index d5b4608496..b3a80517af 100644 --- a/src/distributed_model.jl +++ b/src/distributed_model.jl @@ -5,7 +5,7 @@ using Oceananigans using Oceananigans: BCType using Oceananigans.Grids: validate_tupled_argument -struct DistributedModel{A, R, G, C} +mutable struct DistributedModel{A, R, G, C} ranks :: R models :: A connectivity_graph :: G @@ -105,15 +105,17 @@ function DistributedModel(; size, x, y, z, ranks, model_kwargs...) MPI.Barrier(comm) - connectivity_graph = MPI.Gather(my_connectivity, 0, comm) - + connectivity_graph = MPI.Gather([0, 1, 2], 0, comm) + dm = DistributedModel(ranks, nothing, connectivity_graph, comm) + finalizer(x -> MPI.Finalize(), dm) + return dm end dm = DistributedModel(ranks=(2, 2, 2), size=(32, 32, 32), x=(0, 1), y=(-0.5, 0.5), z=(-10, 0)) -for r in dm.connectivity_graph - @show r -end +# for r in dm.connectivity_graph +# @show r +# end From 41346ce49c1e3cd3636e074a666952e124879640 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sun, 15 Dec 2019 00:58:14 -0500 Subject: [PATCH 026/100] Gotta be careful and run the rest of the script as rank 0 --- src/distributed_model.jl | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/distributed_model.jl b/src/distributed_model.jl index b3a80517af..cfc50fac61 100644 --- a/src/distributed_model.jl +++ b/src/distributed_model.jl @@ -107,15 +107,17 @@ function DistributedModel(; size, x, y, z, ranks, model_kwargs...) connectivity_graph = MPI.Gather([0, 1, 2], 0, comm) - dm = DistributedModel(ranks, nothing, connectivity_graph, comm) - - finalizer(x -> MPI.Finalize(), dm) - - return dm + if my_rank == 0 + dm = DistributedModel(ranks, nothing, connectivity_graph, comm) + finalizer(x -> MPI.Finalize(), dm) + return dm + end end dm = DistributedModel(ranks=(2, 2, 2), size=(32, 32, 32), x=(0, 1), y=(-0.5, 0.5), z=(-10, 0)) -# for r in dm.connectivity_graph -# @show r -# end +if MPI.Comm_rank(MPI.COMM_WORLD) == 0 + for r in dm.connectivity_graph + @show r + end +end From 724d2e02f45dfd86920cddd8678c32d349500f75 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sun, 15 Dec 2019 14:47:45 -0500 Subject: [PATCH 027/100] Finalizer could be a bad idea actually --- src/distributed_model.jl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/distributed_model.jl b/src/distributed_model.jl index cfc50fac61..173a81b299 100644 --- a/src/distributed_model.jl +++ b/src/distributed_model.jl @@ -108,9 +108,7 @@ function DistributedModel(; size, x, y, z, ranks, model_kwargs...) connectivity_graph = MPI.Gather([0, 1, 2], 0, comm) if my_rank == 0 - dm = DistributedModel(ranks, nothing, connectivity_graph, comm) - finalizer(x -> MPI.Finalize(), dm) - return dm + return DistributedModel(ranks, nothing, connectivity_graph, comm) end end From c45cc4c19939d891fd0839bace3652c795e0bf64 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sun, 15 Dec 2019 15:06:04 -0500 Subject: [PATCH 028/100] Send and received connectivities as named tuples --- src/distributed_model.jl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/distributed_model.jl b/src/distributed_model.jl index 173a81b299..28e2357449 100644 --- a/src/distributed_model.jl +++ b/src/distributed_model.jl @@ -103,11 +103,16 @@ function DistributedModel(; size, x, y, z, ranks, model_kwargs...) north=r_north, south=r_south, top=r_top, bottom=r_bot) - MPI.Barrier(comm) + MPI.send(my_connectivity, 0, my_rank, comm) - connectivity_graph = MPI.Gather([0, 1, 2], 0, comm) + MPI.Barrier(comm) if my_rank == 0 + connectivity_graph = Array{RankConnectivity}(undef, mpi_ranks) + for r in 0:mpi_ranks-1 + connectivity_graph[r+1], _ = MPI.recv(r, r, comm) + end + return DistributedModel(ranks, nothing, connectivity_graph, comm) end end From e594c29b38619fa948f86a2052271163130b21ec Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sun, 15 Dec 2019 16:54:24 -0500 Subject: [PATCH 029/100] No master/root model struct. Every rank stores their own model --- src/distributed_model.jl | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/src/distributed_model.jl b/src/distributed_model.jl index 28e2357449..0b484c22c8 100644 --- a/src/distributed_model.jl +++ b/src/distributed_model.jl @@ -5,11 +5,10 @@ using Oceananigans using Oceananigans: BCType using Oceananigans.Grids: validate_tupled_argument -mutable struct DistributedModel{A, R, G, C} +struct DistributedModel{A, R, G} ranks :: R - models :: A - connectivity_graph :: G - MPI_Comm :: C + model :: A + connectivity :: G end struct Communication <: BCType end @@ -40,7 +39,6 @@ function DistributedModel(; size, x, y, z, ranks, model_kwargs...) Rx, Ry, Rz = ranks total_ranks = Rx*Ry*Rz - MPI.Init() comm = MPI.COMM_WORLD mpi_ranks = MPI.Comm_size(comm) @@ -103,24 +101,20 @@ function DistributedModel(; size, x, y, z, ranks, model_kwargs...) north=r_north, south=r_south, top=r_top, bottom=r_bot) - MPI.send(my_connectivity, 0, my_rank, comm) - MPI.Barrier(comm) - if my_rank == 0 - connectivity_graph = Array{RankConnectivity}(undef, mpi_ranks) - for r in 0:mpi_ranks-1 - connectivity_graph[r+1], _ = MPI.recv(r, r, comm) - end + @info "Rank $my_rank creating my model..." + my_model = Model(grid=grid) + @info "Rank $my_rank: submodel created!" - return DistributedModel(ranks, nothing, connectivity_graph, comm) - end + return DistributedModel(ranks, my_model, my_connectivity) end +MPI.Init() + dm = DistributedModel(ranks=(2, 2, 2), size=(32, 32, 32), x=(0, 1), y=(-0.5, 0.5), z=(-10, 0)) -if MPI.Comm_rank(MPI.COMM_WORLD) == 0 - for r in dm.connectivity_graph - @show r - end -end +my_rank = MPI.Comm_rank(MPI.COMM_WORLD) +@info "Rank $my_rank: $(dm.connectivity), $(dm.model.grid.zF[end])" + +MPI.Finalize() From cd03e13fc8adeef22a715c7225e7db57e15651fa Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sun, 15 Dec 2019 17:13:53 -0500 Subject: [PATCH 030/100] Cleanup --- src/distributed_model.jl | 87 ++++++++++++++++++++++++---------------- 1 file changed, 52 insertions(+), 35 deletions(-) diff --git a/src/distributed_model.jl b/src/distributed_model.jl index 0b484c22c8..3bb166c6b0 100644 --- a/src/distributed_model.jl +++ b/src/distributed_model.jl @@ -1,19 +1,14 @@ +using Test + import MPI using Oceananigans - using Oceananigans: BCType using Oceananigans.Grids: validate_tupled_argument -struct DistributedModel{A, R, G} - ranks :: R - model :: A - connectivity :: G -end - -struct Communication <: BCType end - -const RankConnectivity = NamedTuple{(:east, :west, :north, :south, :top, :bottom)} +##### +##### Converting between index and MPI rank taking k as the fast index +##### @inline index2rank(i, j, k, Rx, Ry, Rz) = (i-1)*Ry*Rz + (j-1)*Rz + (k-1) @@ -25,6 +20,32 @@ const RankConnectivity = NamedTuple{(:east, :west, :north, :south, :top, :bottom return i+1, j+1, k+1 end +##### +##### Communication boundary condition +##### + +struct Communication <: BCType end + +##### +##### Distributed model struct and constructor +##### + +const Connectivity = NamedTuple{(:east, :west, :north, :south, :top, :bottom)} + +struct DistributedModel{A, R, G} + ranks :: R + model :: A + connectivity :: G +end + +""" + DistributedModel(size, x, y, z, ranks, model_kwargs...) + +size: Number of total grid points. +x, y, z: Left and right endpoints for each dimension. +ranks: Number of ranks in each dimension. +model_kwargs: Passed to `Model` constructor. +""" function DistributedModel(; size, x, y, z, ranks, model_kwargs...) validate_tupled_argument(ranks, Int, "size") validate_tupled_argument(ranks, Int, "ranks") @@ -44,26 +65,19 @@ function DistributedModel(; size, x, y, z, ranks, model_kwargs...) mpi_ranks = MPI.Comm_size(comm) my_rank = MPI.Comm_rank(comm) - if my_rank == 0 - if total_ranks != mpi_ranks - throw(ArgumentError("ranks=($Rx, $Ry, $Rz) [$total_ranks total] inconsistent " * - "with number of MPI ranks: $mpi_ranks. Exiting with code 1.")) - MPI.Finalize() - exit(code=1) - end + if total_ranks != mpi_ranks + throw(ArgumentError("ranks=($Rx, $Ry, $Rz) [$total_ranks total] inconsistent " * + "with number of MPI ranks: $mpi_ranks. Exiting with code 1.")) + MPI.Finalize() + exit(code=1) end - # Ensure that ranks 1:N don't go ahead if total_ranks != mpi_ranks. - MPI.Barrier(comm) + i, j, k = index = rank2index(my_rank, Rx, Ry, Rz) - model_id = my_rank + 1 - index = rank2index(my_rank, Rx, Ry, Rz) - rr = index2rank(index..., Rx, Ry, Rz) - @info "rank=$my_rank, index=$index, index2rank=$rr" + ##### + ##### Construct local grid + ##### - MPI.Barrier(comm) - - i, j, k = rank2index(my_rank, Rx, Ry, Rz) nx, ny, nz = Nx÷Rx, Ny÷Ry, Nz÷Rz lx, ly, lz = Lx/Rx, Ly/Ry, Lz/Rz @@ -71,10 +85,11 @@ function DistributedModel(; size, x, y, z, ranks, model_kwargs...) y₁, y₂ = yL + (j-1)*ly, yL + j*ly z₁, z₂ = zL + (k-1)*lz, zL + k*lz - @info "rank=$my_rank, x ∈ [$x₁, $x₂], y ∈ [$y₁, $y₂], z ∈ [$z₁, $z₂]" grid = RegularCartesianGrid(size=(nx, ny, nz), x=(x₁, x₂), y=(y₁, y₂), z=(z₁, z₂)) - MPI.Barrier(comm) + ##### + ##### Construct local connectivity + ##### i_east = i+1 > Rx ? nothing : i+1 i_west = i-1 < 1 ? nothing : i-1 @@ -94,14 +109,12 @@ function DistributedModel(; size, x, y, z, ranks, model_kwargs...) r_top = isnothing(k_top) ? nothing : index2rank(i, j, k_top, Rx, Ry, Rz) r_bot = isnothing(k_bot) ? nothing : index2rank(i, j, k_bot, Rx, Ry, Rz) - @info "rank=$my_rank, index=$index, i_east=$i_east, i_west=$i_west, j_north=$j_north, j_south=$j_south, k_top=$k_top, k_bot=$k_bot" - @info "rank=$my_rank, r_east=$r_east, r_west=$r_west, r_north=$r_north, r_south=$r_south, r_top=$r_top, r_bot=$r_bot" - - my_connectivity = (east=r_east, west=r_west, - north=r_north, south=r_south, - top=r_top, bottom=r_bot) + my_connectivity = (east=r_east, west=r_west, north=r_north, + south=r_south, top=r_top, bottom=r_bot) - MPI.Barrier(comm) + ##### + ##### Construct local model + ##### @info "Rank $my_rank creating my model..." my_model = Model(grid=grid) @@ -110,6 +123,10 @@ function DistributedModel(; size, x, y, z, ranks, model_kwargs...) return DistributedModel(ranks, my_model, my_connectivity) end +##### +##### Script/test/whatever +##### + MPI.Init() dm = DistributedModel(ranks=(2, 2, 2), size=(32, 32, 32), x=(0, 1), y=(-0.5, 0.5), z=(-10, 0)) From f37d607bf0926729e39c77f1c1fd8c606fda0d0d Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sun, 15 Dec 2019 17:28:05 -0500 Subject: [PATCH 031/100] Isolate construction of connectivity graph --- src/distributed_model.jl | 66 +++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 25 deletions(-) diff --git a/src/distributed_model.jl b/src/distributed_model.jl index 3bb166c6b0..b5eff1ca6a 100644 --- a/src/distributed_model.jl +++ b/src/distributed_model.jl @@ -20,6 +20,38 @@ using Oceananigans.Grids: validate_tupled_argument return i+1, j+1, k+1 end +##### +##### Connectivity graph +##### + +const Connectivity = NamedTuple{(:east, :west, :north, :south, :top, :bottom)} + +function construct_connectivity(index, ranks, boundary_conditions) + i, j, k = index + Rx, Ry, Rz = ranks + + i_east = i+1 > Rx ? nothing : i+1 + i_west = i-1 < 1 ? nothing : i-1 + + j_north = j+1 > Ry ? nothing : j+1 + j_south = j-1 < 1 ? nothing : j-1 + + k_top = k+1 > Rz ? nothing : k+1 + k_bot = k-1 < 1 ? nothing : k-1 + + r_east = isnothing(i_east) ? nothing : index2rank(i_east, j, k, Rx, Ry, Rz) + r_west = isnothing(i_west) ? nothing : index2rank(i_west, j, k, Rx, Ry, Rz) + + r_north = isnothing(j_north) ? nothing : index2rank(i, j_north, k, Rx, Ry, Rz) + r_south = isnothing(j_south) ? nothing : index2rank(i, j_south, k, Rx, Ry, Rz) + + r_top = isnothing(k_top) ? nothing : index2rank(i, j, k_top, Rx, Ry, Rz) + r_bot = isnothing(k_bot) ? nothing : index2rank(i, j, k_bot, Rx, Ry, Rz) + + return (east=r_east, west=r_west, north=r_north, + south=r_south, top=r_top, bottom=r_bot) +end + ##### ##### Communication boundary condition ##### @@ -30,8 +62,6 @@ struct Communication <: BCType end ##### Distributed model struct and constructor ##### -const Connectivity = NamedTuple{(:east, :west, :north, :south, :top, :bottom)} - struct DistributedModel{A, R, G} ranks :: R model :: A @@ -46,12 +76,13 @@ x, y, z: Left and right endpoints for each dimension. ranks: Number of ranks in each dimension. model_kwargs: Passed to `Model` constructor. """ -function DistributedModel(; size, x, y, z, ranks, model_kwargs...) +function DistributedModel(; size, x, y, z, ranks, boundary_conditions, model_kwargs...) validate_tupled_argument(ranks, Int, "size") validate_tupled_argument(ranks, Int, "ranks") Nx, Ny, Nz = size + # Pull out left and right endpoints for full model. xL, xR = x yL, yR = y zL, zR = z @@ -73,6 +104,7 @@ function DistributedModel(; size, x, y, z, ranks, model_kwargs...) end i, j, k = index = rank2index(my_rank, Rx, Ry, Rz) + @debug "Rank: $my_rank, index: $index" ##### ##### Construct local grid @@ -85,40 +117,22 @@ function DistributedModel(; size, x, y, z, ranks, model_kwargs...) y₁, y₂ = yL + (j-1)*ly, yL + j*ly z₁, z₂ = zL + (k-1)*lz, zL + k*lz + @debug "Constructing local grid: n=($nx, $ny, $nz), x ∈ [$x₁, $x₂], y ∈ [$y₁, $y₂], z ∈ [$z₁, $z₂]" grid = RegularCartesianGrid(size=(nx, ny, nz), x=(x₁, x₂), y=(y₁, y₂), z=(z₁, z₂)) ##### ##### Construct local connectivity ##### - i_east = i+1 > Rx ? nothing : i+1 - i_west = i-1 < 1 ? nothing : i-1 - - j_north = j+1 > Ry ? nothing : j+1 - j_south = j-1 < 1 ? nothing : j-1 - - k_top = k+1 > Rz ? nothing : k+1 - k_bot = k-1 < 1 ? nothing : k-1 - - r_east = isnothing(i_east) ? nothing : index2rank(i_east, j, k, Rx, Ry, Rz) - r_west = isnothing(i_west) ? nothing : index2rank(i_west, j, k, Rx, Ry, Rz) - - r_north = isnothing(j_north) ? nothing : index2rank(i, j_north, k, Rx, Ry, Rz) - r_south = isnothing(j_south) ? nothing : index2rank(i, j_south, k, Rx, Ry, Rz) - - r_top = isnothing(k_top) ? nothing : index2rank(i, j, k_top, Rx, Ry, Rz) - r_bot = isnothing(k_bot) ? nothing : index2rank(i, j, k_bot, Rx, Ry, Rz) + my_connectivity = construct_connectivity(index, ranks, boundary_conditions) - my_connectivity = (east=r_east, west=r_west, north=r_north, - south=r_south, top=r_top, bottom=r_bot) + @debug "Local connectivity: $my_connectivity" ##### ##### Construct local model ##### - @info "Rank $my_rank creating my model..." my_model = Model(grid=grid) - @info "Rank $my_rank: submodel created!" return DistributedModel(ranks, my_model, my_connectivity) end @@ -129,7 +143,9 @@ end MPI.Init() -dm = DistributedModel(ranks=(2, 2, 2), size=(32, 32, 32), x=(0, 1), y=(-0.5, 0.5), z=(-10, 0)) +dm = DistributedModel(ranks=(2, 2, 2), size=(32, 32, 32), + x=(0, 1), y=(-0.5, 0.5), z=(-10, 0), + boundary_conditions=nothing) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) @info "Rank $my_rank: $(dm.connectivity), $(dm.model.grid.zF[end])" From 2d8cb4052f6e8862336257026ffc31b141f54b56 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sun, 15 Dec 2019 18:14:41 -0500 Subject: [PATCH 032/100] Account for periodic boundary conditions when constructing connectivity --- src/distributed_model.jl | 60 +++++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/src/distributed_model.jl b/src/distributed_model.jl index b5eff1ca6a..421c108e0e 100644 --- a/src/distributed_model.jl +++ b/src/distributed_model.jl @@ -3,9 +3,15 @@ using Test import MPI using Oceananigans -using Oceananigans: BCType +using Oceananigans: BCType, PBC using Oceananigans.Grids: validate_tupled_argument +##### +##### Convinient aliases +##### + +const PeriodicBC = PBC + ##### ##### Converting between index and MPI rank taking k as the fast index ##### @@ -26,27 +32,49 @@ end const Connectivity = NamedTuple{(:east, :west, :north, :south, :top, :bottom)} -function construct_connectivity(index, ranks, boundary_conditions) - i, j, k = index - Rx, Ry, Rz = ranks +function increment_index(i, R, bc) + if i+1 > R + if bc isa PeriodicBC + return 1 + else + return nothing + end + else + return i+1 + end +end - i_east = i+1 > Rx ? nothing : i+1 - i_west = i-1 < 1 ? nothing : i-1 +function decrement_index(i, R, bc) + if i-1 < 1 + if bc isa PeriodicBC + return R + else + return nothing + end + else + return i-1 + end +end - j_north = j+1 > Ry ? nothing : j+1 - j_south = j-1 < 1 ? nothing : j-1 +function construct_connectivity(index, ranks, bcs) + i, j, k = index + Rx, Ry, Rz = ranks - k_top = k+1 > Rz ? nothing : k+1 - k_bot = k-1 < 1 ? nothing : k-1 + @show Rx, Ry, Rz - r_east = isnothing(i_east) ? nothing : index2rank(i_east, j, k, Rx, Ry, Rz) - r_west = isnothing(i_west) ? nothing : index2rank(i_west, j, k, Rx, Ry, Rz) + i_east = increment_index(i, Rx, bcs.x.right) + i_west = decrement_index(i, Rx, bcs.x.left) + j_north = increment_index(j, Ry, bcs.y.north) + j_south = decrement_index(j, Ry, bcs.y.south) + k_top = increment_index(k, Rz, bcs.z.top) + k_bot = decrement_index(k, Rz, bcs.z.bottom) + r_east = isnothing(i_east) ? nothing : index2rank(i_east, j, k, Rx, Ry, Rz) + r_west = isnothing(i_west) ? nothing : index2rank(i_west, j, k, Rx, Ry, Rz) r_north = isnothing(j_north) ? nothing : index2rank(i, j_north, k, Rx, Ry, Rz) r_south = isnothing(j_south) ? nothing : index2rank(i, j_south, k, Rx, Ry, Rz) - - r_top = isnothing(k_top) ? nothing : index2rank(i, j, k_top, Rx, Ry, Rz) - r_bot = isnothing(k_bot) ? nothing : index2rank(i, j, k_bot, Rx, Ry, Rz) + r_top = isnothing(k_top) ? nothing : index2rank(i, j, k_top, Rx, Ry, Rz) + r_bot = isnothing(k_bot) ? nothing : index2rank(i, j, k_bot, Rx, Ry, Rz) return (east=r_east, west=r_west, north=r_north, south=r_south, top=r_top, bottom=r_bot) @@ -145,7 +173,7 @@ MPI.Init() dm = DistributedModel(ranks=(2, 2, 2), size=(32, 32, 32), x=(0, 1), y=(-0.5, 0.5), z=(-10, 0), - boundary_conditions=nothing) + boundary_conditions=HorizontallyPeriodicBCs()) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) @info "Rank $my_rank: $(dm.connectivity), $(dm.model.grid.zF[end])" From ad2955944ca4ba9dcdd5c5d67da7493a8681000e Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Mon, 16 Dec 2019 05:25:04 -0500 Subject: [PATCH 033/100] Need to know index --- src/distributed_model.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/distributed_model.jl b/src/distributed_model.jl index 421c108e0e..649f89635f 100644 --- a/src/distributed_model.jl +++ b/src/distributed_model.jl @@ -90,7 +90,8 @@ struct Communication <: BCType end ##### Distributed model struct and constructor ##### -struct DistributedModel{A, R, G} +struct DistributedModel{I, A, R, G} + index :: I ranks :: R model :: A connectivity :: G @@ -153,7 +154,6 @@ function DistributedModel(; size, x, y, z, ranks, boundary_conditions, model_kwa ##### my_connectivity = construct_connectivity(index, ranks, boundary_conditions) - @debug "Local connectivity: $my_connectivity" ##### @@ -162,7 +162,7 @@ function DistributedModel(; size, x, y, z, ranks, boundary_conditions, model_kwa my_model = Model(grid=grid) - return DistributedModel(ranks, my_model, my_connectivity) + return DistributedModel(index, ranks, my_model, my_connectivity) end ##### @@ -171,7 +171,7 @@ end MPI.Init() -dm = DistributedModel(ranks=(2, 2, 2), size=(32, 32, 32), +dm = DistributedModel(ranks=(2, 2, 2), size=(16, 16, 16), x=(0, 1), y=(-0.5, 0.5), z=(-10, 0), boundary_conditions=HorizontallyPeriodicBCs()) From 54c18601a8d21f8c84a46dbb467c96816b375931 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Fri, 3 Jan 2020 12:33:23 -0500 Subject: [PATCH 034/100] Split test --- src/distributed_model.jl | 14 -------------- src/test_distributed_model.jl | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 14 deletions(-) create mode 100644 src/test_distributed_model.jl diff --git a/src/distributed_model.jl b/src/distributed_model.jl index 649f89635f..de1fe36b31 100644 --- a/src/distributed_model.jl +++ b/src/distributed_model.jl @@ -165,17 +165,3 @@ function DistributedModel(; size, x, y, z, ranks, boundary_conditions, model_kwa return DistributedModel(index, ranks, my_model, my_connectivity) end -##### -##### Script/test/whatever -##### - -MPI.Init() - -dm = DistributedModel(ranks=(2, 2, 2), size=(16, 16, 16), - x=(0, 1), y=(-0.5, 0.5), z=(-10, 0), - boundary_conditions=HorizontallyPeriodicBCs()) - -my_rank = MPI.Comm_rank(MPI.COMM_WORLD) -@info "Rank $my_rank: $(dm.connectivity), $(dm.model.grid.zF[end])" - -MPI.Finalize() diff --git a/src/test_distributed_model.jl b/src/test_distributed_model.jl new file mode 100644 index 0000000000..66df271ea7 --- /dev/null +++ b/src/test_distributed_model.jl @@ -0,0 +1,16 @@ +using Test + +import MPI + +include("distributed_model.jl") + +MPI.Init() + +dm = DistributedModel(ranks=(2, 2, 2), size=(16, 16, 16), + x=(0, 1), y=(-0.5, 0.5), z=(-10, 0), + boundary_conditions=HorizontallyPeriodicBCs()) + +my_rank = MPI.Comm_rank(MPI.COMM_WORLD) +@info "Rank $my_rank: $(dm.connectivity), $(dm.model.grid.zF[end])" + +MPI.Finalize() From ebbdc910e02169e08b6e770b3fb18b630982bf38 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Fri, 3 Jan 2020 15:57:10 -0500 Subject: [PATCH 035/100] Inject halo communication boundary conditions But need to figure out what to do with pressure solver as it tries to set up an NNN Poisson solver on some processes... --- src/distributed_model.jl | 64 ++++++++++++++++++++++++++++++----- src/test_distributed_model.jl | 3 +- 2 files changed, 57 insertions(+), 10 deletions(-) diff --git a/src/distributed_model.jl b/src/distributed_model.jl index de1fe36b31..46807bff77 100644 --- a/src/distributed_model.jl +++ b/src/distributed_model.jl @@ -1,5 +1,3 @@ -using Test - import MPI using Oceananigans @@ -60,8 +58,6 @@ function construct_connectivity(index, ranks, bcs) i, j, k = index Rx, Ry, Rz = ranks - @show Rx, Ry, Rz - i_east = increment_index(i, Rx, bcs.x.right) i_west = decrement_index(i, Rx, bcs.x.left) j_north = increment_index(j, Ry, bcs.y.north) @@ -81,10 +77,54 @@ function construct_connectivity(index, ranks, bcs) end ##### -##### Communication boundary condition +##### Halo communication boundary condition ##### -struct Communication <: BCType end +struct HaloCommunication <: BCType end +const HaloCommunicationBC = BoundaryCondition{<:HaloCommunication} + +const HaloCommunicationDetails = NamedTuple{(:rank_from, :rank_to)} +HaloCommunicationDetails(; rank_from, rank_to) = HaloCommunicationDetails((rank_from, rank_to)) + +function inject_halo_communication_boundary_conditions(boundary_conditions, my_rank, connectivity) + new_field_bcs = [] + + for field_bcs in boundary_conditions + rank_east = connectivity.east + rank_west = connectivity.west + rank_north = connectivity.north + rank_south = connectivity.south + rank_top = connectivity.top + rank_bottom = connectivity.bottom + + east_comm_bc_details = HaloCommunicationDetails(rank_from=my_rank, rank_to=connectivity.east) + west_comm_bc_details = HaloCommunicationDetails(rank_from=my_rank, rank_to=connectivity.west) + north_comm_bc_details = HaloCommunicationDetails(rank_from=my_rank, rank_to=connectivity.north) + south_comm_bc_details = HaloCommunicationDetails(rank_from=my_rank, rank_to=connectivity.south) + top_comm_bc_details = HaloCommunicationDetails(rank_from=my_rank, rank_to=connectivity.top) + bottom_comm_bc_details = HaloCommunicationDetails(rank_from=my_rank, rank_to=connectivity.bottom) + + east_comm_bc = BoundaryCondition(HaloCommunication, east_comm_bc_details) + west_comm_bc = BoundaryCondition(HaloCommunication, west_comm_bc_details) + north_comm_bc = BoundaryCondition(HaloCommunication, north_comm_bc_details) + south_comm_bc = BoundaryCondition(HaloCommunication, south_comm_bc_details) + top_comm_bc = BoundaryCondition(HaloCommunication, top_comm_bc_details) + bottom_comm_bc = BoundaryCondition(HaloCommunication, bottom_comm_bc_details) + + x_bcs = CoordinateBoundaryConditions(isnothing(rank_west) ? field_bcs.x.left : west_comm_bc, + isnothing(rank_east) ? field_bcs.x.right : east_comm_bc) + + y_bcs = CoordinateBoundaryConditions(isnothing(rank_south) ? field_bcs.y.south : south_comm_bc, + isnothing(rank_north) ? field_bcs.y.north : north_comm_bc) + + z_bcs = CoordinateBoundaryConditions(isnothing(rank_bottom) ? field_bcs.z.bottom : bottom_comm_bc, + isnothing(rank_top) ? field_bcs.z.top : top_comm_bc) + + push!(new_field_bcs, FieldBoundaryConditions(x_bcs, y_bcs, z_bcs)) + end + + return NamedTuple{propertynames(boundary_conditions)}(Tuple(new_field_bcs)) +end ##### ##### Distributed model struct and constructor @@ -153,15 +193,21 @@ function DistributedModel(; size, x, y, z, ranks, boundary_conditions, model_kwa ##### Construct local connectivity ##### - my_connectivity = construct_connectivity(index, ranks, boundary_conditions) + my_connectivity = construct_connectivity(index, ranks, boundary_conditions.u) @debug "Local connectivity: $my_connectivity" + ##### + ##### Change appropriate boundary conditions to halo communication BCs + ##### + + @debug "Injecting halo communication boundary conditions..." + boundary_conditions_with_communication = inject_halo_communication_boundary_conditions(boundary_conditions, my_rank, my_connectivity) + ##### ##### Construct local model ##### - my_model = Model(grid=grid) + my_model = Model(grid=grid, boundary_conditions=boundary_conditions_with_communication) return DistributedModel(index, ranks, my_model, my_connectivity) end - diff --git a/src/test_distributed_model.jl b/src/test_distributed_model.jl index 66df271ea7..680b546319 100644 --- a/src/test_distributed_model.jl +++ b/src/test_distributed_model.jl @@ -8,9 +8,10 @@ MPI.Init() dm = DistributedModel(ranks=(2, 2, 2), size=(16, 16, 16), x=(0, 1), y=(-0.5, 0.5), z=(-10, 0), - boundary_conditions=HorizontallyPeriodicBCs()) + boundary_conditions=HorizontallyPeriodicSolutionBCs()) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) @info "Rank $my_rank: $(dm.connectivity), $(dm.model.grid.zF[end])" +@info "u.x BCs: $(dm.model.boundary_conditions.solution.u.x)" MPI.Finalize() From bd7fd4ce8f5fb7a3974fdd9ef745bc0ad93c9f09 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sun, 5 Jan 2020 08:58:27 -0500 Subject: [PATCH 036/100] Move to an `Oceananigans.Distributed` submodule. --- src/{ => Distributed}/distributed_model.jl | 0 src/{ => Distributed}/test_distributed_model.jl | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename src/{ => Distributed}/distributed_model.jl (100%) rename src/{ => Distributed}/test_distributed_model.jl (100%) diff --git a/src/distributed_model.jl b/src/Distributed/distributed_model.jl similarity index 100% rename from src/distributed_model.jl rename to src/Distributed/distributed_model.jl diff --git a/src/test_distributed_model.jl b/src/Distributed/test_distributed_model.jl similarity index 100% rename from src/test_distributed_model.jl rename to src/Distributed/test_distributed_model.jl From e99f78f3b64edac3dbef8e4b3d4c9c27a3da24e6 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sun, 5 Jan 2020 09:20:39 -0500 Subject: [PATCH 037/100] Properly pass `Model` kwargs. --- src/Distributed/distributed_model.jl | 3 ++- src/Distributed/test_distributed_model.jl | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Distributed/distributed_model.jl b/src/Distributed/distributed_model.jl index 46807bff77..1be3e33e96 100644 --- a/src/Distributed/distributed_model.jl +++ b/src/Distributed/distributed_model.jl @@ -207,7 +207,8 @@ function DistributedModel(; size, x, y, z, ranks, boundary_conditions, model_kwa ##### Construct local model ##### - my_model = Model(grid=grid, boundary_conditions=boundary_conditions_with_communication) + my_model = Model(; grid = grid, boundary_conditions = boundary_conditions_with_communication, + model_kwargs...) return DistributedModel(index, ranks, my_model, my_connectivity) end diff --git a/src/Distributed/test_distributed_model.jl b/src/Distributed/test_distributed_model.jl index 680b546319..2a3b932457 100644 --- a/src/Distributed/test_distributed_model.jl +++ b/src/Distributed/test_distributed_model.jl @@ -6,9 +6,10 @@ include("distributed_model.jl") MPI.Init() -dm = DistributedModel(ranks=(2, 2, 2), size=(16, 16, 16), - x=(0, 1), y=(-0.5, 0.5), z=(-10, 0), - boundary_conditions=HorizontallyPeriodicSolutionBCs()) +dm = DistributedModel(ranks = (2, 2, 2), size = (16, 16, 16), + x = (0, 1), y = (-0.5, 0.5), z = (-10, 0), + boundary_conditions = HorizontallyPeriodicSolutionBCs(), + poisson_solver = nothing) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) @info "Rank $my_rank: $(dm.connectivity), $(dm.model.grid.zF[end])" From db394c05d4e6add4661c813e6cfc454af9d1e906 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sun, 5 Jan 2020 10:09:36 -0500 Subject: [PATCH 038/100] No need to `MPI.Finalize()` anymore --- src/Distributed/test_distributed_model.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Distributed/test_distributed_model.jl b/src/Distributed/test_distributed_model.jl index 2a3b932457..0d9a807a89 100644 --- a/src/Distributed/test_distributed_model.jl +++ b/src/Distributed/test_distributed_model.jl @@ -14,5 +14,3 @@ dm = DistributedModel(ranks = (2, 2, 2), size = (16, 16, 16), my_rank = MPI.Comm_rank(MPI.COMM_WORLD) @info "Rank $my_rank: $(dm.connectivity), $(dm.model.grid.zF[end])" @info "u.x BCs: $(dm.model.boundary_conditions.solution.u.x)" - -MPI.Finalize() From cabf31b8123febfc029496620986a6967a9d1a3f Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sun, 5 Jan 2020 11:00:32 -0500 Subject: [PATCH 039/100] =?UTF-8?q?Multiple=20dispatch=20to=20fill=20west?= =?UTF-8?q?=20halo=20with=20MPI=20=F0=9F=98=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Distributed/distributed_model.jl | 33 +++++++++++++++++++++++ src/Distributed/test_distributed_model.jl | 9 +++++-- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/Distributed/distributed_model.jl b/src/Distributed/distributed_model.jl index 1be3e33e96..119f550c6c 100644 --- a/src/Distributed/distributed_model.jl +++ b/src/Distributed/distributed_model.jl @@ -4,6 +4,8 @@ using Oceananigans using Oceananigans: BCType, PBC using Oceananigans.Grids: validate_tupled_argument +import Oceananigans: fill_west_halo! + ##### ##### Convinient aliases ##### @@ -126,6 +128,37 @@ function inject_halo_communication_boundary_conditions(boundary_conditions, my_r return NamedTuple{propertynames(boundary_conditions)}(Tuple(new_field_bcs)) end +# Note: Hard-coded so this only works up to 10^3 ranks. +@inline halo_comm_bc_send_tag(bc) = 10^3 * bc.condition.rank_from + bc.condition.rank_to +@inline halo_comm_bc_recv_tag(bc) = 10^3 * bc.condition.rank_to + bc.condition.rank_from + +function fill_west_halo!(c, bc::HaloCommunicationBC, arch, grid, args...) + N, H = grid.Nx, grid.Hx + + send_buffer = c.parent[N+1:N+H, :, :] + recv_buffer = c.parent[1:H, :, :] + + dest_rank = bc.condition.rank_to + src_rank = bc.condition.rank_from + + send_tag = halo_comm_bc_send_tag(bc) + recv_tag = halo_comm_bc_recv_tag(bc) + + # @info "MPI.Isend: dest_rank=$dest_rank, send_tag=$send_tag" + # MPI.Isend(send_buffer, dest_rank, send_tag, MPI.COMM_WORLD) + # @info "MPI.Isend: done!" + # + # @info "MPI.Recv!: src_rank=$src_rank, recv_tag=$recv_tag" + # MPI.Recv!(recv_buffer, dest_rank, recv_tag, MPI.COMM_WORLD) + # @info "MPI.Recv! done!" + + @info "Sendrecv! src_rank=$src_rank, dest_rank=$dest_rank, send_tag=$send_tag, recv_tag=$recv_tag" + MPI.Sendrecv!(send_buffer, dest_rank, send_tag, + recv_buffer, dest_rank, recv_tag, + MPI.COMM_WORLD) + @info "Sendrecv! done!" +end + ##### ##### Distributed model struct and constructor ##### diff --git a/src/Distributed/test_distributed_model.jl b/src/Distributed/test_distributed_model.jl index 0d9a807a89..4bafebece9 100644 --- a/src/Distributed/test_distributed_model.jl +++ b/src/Distributed/test_distributed_model.jl @@ -2,6 +2,8 @@ using Test import MPI +using Oceananigans: interior, fill_halo_regions! + include("distributed_model.jl") MPI.Init() @@ -12,5 +14,8 @@ dm = DistributedModel(ranks = (2, 2, 2), size = (16, 16, 16), poisson_solver = nothing) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) -@info "Rank $my_rank: $(dm.connectivity), $(dm.model.grid.zF[end])" -@info "u.x BCs: $(dm.model.boundary_conditions.solution.u.x)" +# @info "Rank $my_rank: $(dm.connectivity), $(dm.model.grid.zF[end])" +# @info "u.x BCs: $(dm.model.boundary_conditions.solution.u.x)" + +interior(dm.model.velocities.u) .= rand(8, 8, 8) +fill_halo_regions!(dm.model.velocities.u.data, dm.model.boundary_conditions.solution.u, CPU(), dm.model.grid) From 35bb6745c5f1b909c43e247750daaf32aed8c743 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sun, 5 Jan 2020 11:50:31 -0500 Subject: [PATCH 040/100] Actually fill in the west halo. Also start to generalize. --- src/Distributed/distributed_model.jl | 46 ++++++++++++----------- src/Distributed/test_distributed_model.jl | 2 + 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/src/Distributed/distributed_model.jl b/src/Distributed/distributed_model.jl index 119f550c6c..0c862240b7 100644 --- a/src/Distributed/distributed_model.jl +++ b/src/Distributed/distributed_model.jl @@ -128,35 +128,37 @@ function inject_halo_communication_boundary_conditions(boundary_conditions, my_r return NamedTuple{propertynames(boundary_conditions)}(Tuple(new_field_bcs)) end -# Note: Hard-coded so this only works up to 10^3 ranks. -@inline halo_comm_bc_send_tag(bc) = 10^3 * bc.condition.rank_from + bc.condition.rank_to -@inline halo_comm_bc_recv_tag(bc) = 10^3 * bc.condition.rank_to + bc.condition.rank_from +# Unfortunately can't call MPI.Comm_size(MPI.COMM_WORLD) before MPI.Init(). +const MAX_RANKS = 10^3 -function fill_west_halo!(c, bc::HaloCommunicationBC, arch, grid, args...) - N, H = grid.Nx, grid.Hx +sides = (:west, :east, :south, :north, :top, :bottom) +coords = (:x, :x, :y, :y, :z, :z) + +@inline west_halo_comm_bc_send_tag(bc) = 6 * (MAX_RANKS * bc.condition.rank_from + bc.condition.rank_to) +@inline west_halo_comm_bc_recv_tag(bc) = 6 * (MAX_RANKS * bc.condition.rank_to + bc.condition.rank_from) + +@inline west_send_buffer(c, N, H) = c.parent[N+1:N+H, :, :] - send_buffer = c.parent[N+1:N+H, :, :] - recv_buffer = c.parent[1:H, :, :] +@inline west_recv_buffer(grid) = zeros(grid.Hx, grid.Ty, grid.Tz) - dest_rank = bc.condition.rank_to - src_rank = bc.condition.rank_from +const east_recv_buffer = west_recv_buffer - send_tag = halo_comm_bc_send_tag(bc) - recv_tag = halo_comm_bc_recv_tag(bc) +function fill_west_halo!(c, bc::HaloCommunicationBC, arch, grid, args...) + send_buffer = west_send_buffer(c, grid.Nx, grid.Hx) + recv_buffer = west_recv_buffer(grid) + + send_tag = west_halo_comm_bc_send_tag(bc) + recv_tag = west_halo_comm_bc_recv_tag(bc) - # @info "MPI.Isend: dest_rank=$dest_rank, send_tag=$send_tag" - # MPI.Isend(send_buffer, dest_rank, send_tag, MPI.COMM_WORLD) - # @info "MPI.Isend: done!" - # - # @info "MPI.Recv!: src_rank=$src_rank, recv_tag=$recv_tag" - # MPI.Recv!(recv_buffer, dest_rank, recv_tag, MPI.COMM_WORLD) - # @info "MPI.Recv! done!" + rank_send_to = rank_recv_from = bc.condition.rank_to - @info "Sendrecv! src_rank=$src_rank, dest_rank=$dest_rank, send_tag=$send_tag, recv_tag=$recv_tag" - MPI.Sendrecv!(send_buffer, dest_rank, send_tag, - recv_buffer, dest_rank, recv_tag, + @info "Sendrecv!: rank_send_to=rank_recv_from=$rank_send_to, send_tag=$send_tag, recv_tag=$recv_tag" + MPI.Sendrecv!(send_buffer, rank_send_to, send_tag, + recv_buffer, rank_recv_from, recv_tag, MPI.COMM_WORLD) - @info "Sendrecv! done!" + @info "Sendrecv!: done!" + + c.parent[1:grid.Hx, :, :] .= recv_buffer end ##### diff --git a/src/Distributed/test_distributed_model.jl b/src/Distributed/test_distributed_model.jl index 4bafebece9..9790dbd705 100644 --- a/src/Distributed/test_distributed_model.jl +++ b/src/Distributed/test_distributed_model.jl @@ -19,3 +19,5 @@ my_rank = MPI.Comm_rank(MPI.COMM_WORLD) interior(dm.model.velocities.u) .= rand(8, 8, 8) fill_halo_regions!(dm.model.velocities.u.data, dm.model.boundary_conditions.solution.u, CPU(), dm.model.grid) + +display(interior(dm.model.velocities.u)) From 45666369d3ea4f5c864d8d662a5f445b22f3587a Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sun, 5 Jan 2020 12:39:52 -0500 Subject: [PATCH 041/100] Oceananigans.Distributed: use macros to fill each halo. --- src/Distributed/distributed_model.jl | 87 +++++++++++++++++++++------- 1 file changed, 67 insertions(+), 20 deletions(-) diff --git a/src/Distributed/distributed_model.jl b/src/Distributed/distributed_model.jl index 0c862240b7..2b54a591f8 100644 --- a/src/Distributed/distributed_model.jl +++ b/src/Distributed/distributed_model.jl @@ -4,7 +4,9 @@ using Oceananigans using Oceananigans: BCType, PBC using Oceananigans.Grids: validate_tupled_argument -import Oceananigans: fill_west_halo! +import Oceananigans: fill_west_halo!, fill_east_halo!, + fill_south_halo!, fill_north_halo!, + fill_bottom_halo!, fill_top_halo! ##### ##### Convinient aliases @@ -128,37 +130,82 @@ function inject_halo_communication_boundary_conditions(boundary_conditions, my_r return NamedTuple{propertynames(boundary_conditions)}(Tuple(new_field_bcs)) end -# Unfortunately can't call MPI.Comm_size(MPI.COMM_WORLD) before MPI.Init(). -const MAX_RANKS = 10^3 +##### +##### Filling halos for halo communication boundary conditions +##### sides = (:west, :east, :south, :north, :top, :bottom) coords = (:x, :x, :y, :y, :z, :z) -@inline west_halo_comm_bc_send_tag(bc) = 6 * (MAX_RANKS * bc.condition.rank_from + bc.condition.rank_to) -@inline west_halo_comm_bc_recv_tag(bc) = 6 * (MAX_RANKS * bc.condition.rank_to + bc.condition.rank_from) +# Unfortunately can't call MPI.Comm_size(MPI.COMM_WORLD) before MPI.Init(). +const MAX_RANKS = 10^3 + +# Define functions that return unique send and recv MPI tags for each side. +for (i, side) in enumerate(sides) + send_tag_fn_name = Symbol(side, :_halo_comm_bc_send_tag) + recv_tag_fn_name = Symbol(side, :_halo_comm_bc_recv_tag) + @eval begin + @inline $send_tag_fn_name(bc) = 6 * (MAX_RANKS * bc.condition.rank_from + bc.condition.rank_to) + $i + @inline $recv_tag_fn_name(bc) = 6 * (MAX_RANKS * bc.condition.rank_to + bc.condition.rank_from) + $i + end +end + +@inline west_send_buffer(c, N, H) = c.parent[N+1:N+H, :, :] +@inline east_send_buffer(c, N, H) = c.parent[1+H:2H, :, :] +@inline south_send_buffer(c, N, H) = c.parent[:, N+1:N+H, :] +@inline north_send_buffer(c, N, H) = c.parent[:, 1+H:2H, :] +@inline top_send_buffer(c, N, H) = c.parent[:, :, 1+H:2H] +@inline bottom_send_buffer(c, N, H) = c.parent[:, :, N+1:N+H] + +@inline west_recv_buffer(grid) = zeros(grid.Hx, grid.Ty, grid.Tz) +@inline south_recv_buffer(grid) = zeros(grid.Tx, grid.Hy, grid.Tz) +@inline top_recv_buffer(grid) = zeros(grid.Tx, grid.Ty, grid.Hz) + +const east_recv_buffer = west_recv_buffer +const north_recv_buffer = south_recv_buffer +const bottom_recv_buffer = top_recv_buffer -@inline west_send_buffer(c, N, H) = c.parent[N+1:N+H, :, :] +@inline copy_recv_buffer_into_west_halo!(c, N, H, buf) = (c.parent[ 1:H, :, :] .= buf) +@inline copy_recv_buffer_into_east_halo!(c, N, H, buf) = (c.parent[N+H+1:N+2H, :, :] .= buf) +@inline copy_recv_buffer_into_south_halo!(c, N, H, buf) = (c.parent[:, 1:H, :] .= buf) +@inline copy_recv_buffer_into_north_halo!(c, N, H, buf) = (c.parent[:, N+H+1:N+2H, :] .= buf) +@inline copy_recv_buffer_into_bottom_halo!(c, N, H, buf) = (c.parent[:, :, 1:H ] .= buf) +@inline copy_recv_buffer_into_top_halo!(c, N, H, buf) = (c.parent[:, :, N+H+1:N+2H] .= buf) -@inline west_recv_buffer(grid) = zeros(grid.Hx, grid.Ty, grid.Tz) +for (x, side) in zip(coords, sides) + H = Symbol(:H, x) + N = Symbol(:N, x) -const east_recv_buffer = west_recv_buffer + fill_fn_name = Symbol(:fill_, side, :_halo!) + send_buf_fn_name = Symbol(side, :_send_buffer) + recv_buf_fn_name = Symbol(side, :_recv_buffer) + send_tag_fn_name = Symbol(side, :_halo_comm_bc_send_tag) + recv_tag_fn_name = Symbol(side, :_halo_comm_bc_recv_tag) + copy_buf_fn_name = Symbol(:copy_recv_buffer_into_, side, :_halo!) -function fill_west_halo!(c, bc::HaloCommunicationBC, arch, grid, args...) - send_buffer = west_send_buffer(c, grid.Nx, grid.Hx) - recv_buffer = west_recv_buffer(grid) + @eval begin + function $fill_fn_name(c, bc::HaloCommunicationBC, arch, grid, args...) + send_buffer = $send_buf_fn_name(c, grid.$(N), grid.$(H)) + recv_buffer = $recv_buf_fn_name(grid) - send_tag = west_halo_comm_bc_send_tag(bc) - recv_tag = west_halo_comm_bc_recv_tag(bc) + send_tag = $send_tag_fn_name(bc) + recv_tag = $recv_tag_fn_name(bc) - rank_send_to = rank_recv_from = bc.condition.rank_to + my_rank = bc.condition.rank_from + rank_send_to = rank_recv_from = bc.condition.rank_to - @info "Sendrecv!: rank_send_to=rank_recv_from=$rank_send_to, send_tag=$send_tag, recv_tag=$recv_tag" - MPI.Sendrecv!(send_buffer, rank_send_to, send_tag, - recv_buffer, rank_recv_from, recv_tag, - MPI.COMM_WORLD) - @info "Sendrecv!: done!" + @info "Sendrecv!: my_rank=$my_rank, rank_send_to=rank_recv_from=$rank_send_to, " * + "send_tag=$send_tag, recv_tag=$recv_tag" - c.parent[1:grid.Hx, :, :] .= recv_buffer + MPI.Sendrecv!(send_buffer, rank_send_to, send_tag, + recv_buffer, rank_recv_from, recv_tag, + MPI.COMM_WORLD) + + @info "Sendrecv!: my_rank=$my_rank done!" + + $copy_buf_fn_name(c, grid.$(N), grid.$(H), recv_buffer) + end + end end ##### From 227f2be16dab1635d5266d257e4ea81c4b925a83 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sun, 5 Jan 2020 13:16:59 -0500 Subject: [PATCH 042/100] Hmmm, halo communication seems to deadlock :( --- src/Distributed/distributed_model.jl | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/Distributed/distributed_model.jl b/src/Distributed/distributed_model.jl index 2b54a591f8..c1b46c81ae 100644 --- a/src/Distributed/distributed_model.jl +++ b/src/Distributed/distributed_model.jl @@ -194,14 +194,22 @@ for (x, side) in zip(coords, sides) my_rank = bc.condition.rank_from rank_send_to = rank_recv_from = bc.condition.rank_to - @info "Sendrecv!: my_rank=$my_rank, rank_send_to=rank_recv_from=$rank_send_to, " * - "send_tag=$send_tag, recv_tag=$recv_tag" - - MPI.Sendrecv!(send_buffer, rank_send_to, send_tag, - recv_buffer, rank_recv_from, recv_tag, - MPI.COMM_WORLD) - - @info "Sendrecv!: my_rank=$my_rank done!" + @info "MPI.Isend: my_rank=$my_rank, rank_send_to=$rank_send_to, send_tag=$send_tag" + MPI.Isend(send_buffer, rank_send_to, send_tag, MPI.COMM_WORLD) + @info "MPI.Isend: done!" + + @info "MPI.Recv!: my_rank=$my_rank, rank_recv_from=$rank_recv_from, recv_tag=$recv_tag" + MPI.Recv!(recv_buffer, rank_recv_from, recv_tag, MPI.COMM_WORLD) + @info "MPI.Recv! done!" + + # @info "Sendrecv!: my_rank=$my_rank, rank_send_to=rank_recv_from=$rank_send_to, " * + # "send_tag=$send_tag, recv_tag=$recv_tag" + # + # MPI.Sendrecv!(send_buffer, rank_send_to, send_tag, + # recv_buffer, rank_recv_from, recv_tag, + # MPI.COMM_WORLD) + # + # @info "Sendrecv!: my_rank=$my_rank done!" $copy_buf_fn_name(c, grid.$(N), grid.$(H), recv_buffer) end From 5cdeac3566fbef799912ba2e9a87ed379292e406 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Fri, 5 Feb 2021 09:54:07 -0500 Subject: [PATCH 043/100] Modernize `distributed_model.jl` --- src/Distributed/distributed_model.jl | 335 ++++++++++++++------------- 1 file changed, 176 insertions(+), 159 deletions(-) diff --git a/src/Distributed/distributed_model.jl b/src/Distributed/distributed_model.jl index c1b46c81ae..777571bf9b 100644 --- a/src/Distributed/distributed_model.jl +++ b/src/Distributed/distributed_model.jl @@ -1,18 +1,14 @@ import MPI using Oceananigans -using Oceananigans: BCType, PBC -using Oceananigans.Grids: validate_tupled_argument - -import Oceananigans: fill_west_halo!, fill_east_halo!, - fill_south_halo!, fill_north_halo!, - fill_bottom_halo!, fill_top_halo! +using Oceananigans.Grids -##### -##### Convinient aliases -##### +using Oceananigans.Grids: validate_tupled_argument +using Oceananigans.BoundaryConditions: BCType -const PeriodicBC = PBC +import Oceananigans.BoundaryConditions: + fill_west_halo!, fill_east_halo!, fill_south_halo!, + fill_north_halo!, fill_bottom_halo!, fill_top_halo! ##### ##### Converting between index and MPI rank taking k as the fast index @@ -29,14 +25,27 @@ const PeriodicBC = PBC end ##### -##### Connectivity graph +##### Rank connectivity graph ##### const Connectivity = NamedTuple{(:east, :west, :north, :south, :top, :bottom)} -function increment_index(i, R, bc) +struct RankConnectivity{E, W, N, S, T, B} + east :: E + west :: W + north :: N + south :: S + top :: T + bottom :: B +end + +RankConnectivity(; east, west, north, south, top, bottom) = + RankConnectivity(east, west, north, south, top, bottom) + +function increment_index(i, R, topo) + R == 1 && return nothing if i+1 > R - if bc isa PeriodicBC + if topo == Periodic return 1 else return nothing @@ -46,9 +55,10 @@ function increment_index(i, R, bc) end end -function decrement_index(i, R, bc) +function decrement_index(i, R, topo) + R == 1 && return nothing if i-1 < 1 - if bc isa PeriodicBC + if topo == Periodic return R else return nothing @@ -58,16 +68,17 @@ function decrement_index(i, R, bc) end end -function construct_connectivity(index, ranks, bcs) - i, j, k = index +function RankConnectivity(model_index, ranks, topology) + i, j, k = model_index Rx, Ry, Rz = ranks + TX, TY, TZ = topology - i_east = increment_index(i, Rx, bcs.x.right) - i_west = decrement_index(i, Rx, bcs.x.left) - j_north = increment_index(j, Ry, bcs.y.north) - j_south = decrement_index(j, Ry, bcs.y.south) - k_top = increment_index(k, Rz, bcs.z.top) - k_bot = decrement_index(k, Rz, bcs.z.bottom) + i_east = increment_index(i, Rx, TX) + i_west = decrement_index(i, Rx, TX) + j_north = increment_index(j, Ry, TY) + j_south = decrement_index(j, Ry, TY) + k_top = increment_index(k, Rz, TZ) + k_bot = decrement_index(k, Rz, TZ) r_east = isnothing(i_east) ? nothing : index2rank(i_east, j, k, Rx, Ry, Rz) r_west = isnothing(i_west) ? nothing : index2rank(i_west, j, k, Rx, Ry, Rz) @@ -76,8 +87,8 @@ function construct_connectivity(index, ranks, bcs) r_top = isnothing(k_top) ? nothing : index2rank(i, j, k_top, Rx, Ry, Rz) r_bot = isnothing(k_bot) ? nothing : index2rank(i, j, k_bot, Rx, Ry, Rz) - return (east=r_east, west=r_west, north=r_north, - south=r_south, top=r_top, bottom=r_bot) + return RankConnectivity(east=r_east, west=r_west, north=r_north, + south=r_south, top=r_top, bottom=r_bot) end ##### @@ -85,136 +96,139 @@ end ##### struct HaloCommunication <: BCType end + const HaloCommunicationBC = BoundaryCondition{<:HaloCommunication} -const HaloCommunicationDetails = NamedTuple{(:rank_from, :rank_to)} -HaloCommunicationDetails(; rank_from, rank_to) = HaloCommunicationDetails((rank_from, rank_to)) +HaloCommunicationBoundaryCondition(val; kwargs...) = BoundaryCondition(HaloCommunication, val; kwargs...) -function inject_halo_communication_boundary_conditions(boundary_conditions, my_rank, connectivity) - new_field_bcs = [] +struct HaloCommunicationRanks{T} + from :: T + to :: T +end - for field_bcs in boundary_conditions - rank_east = connectivity.east - rank_west = connectivity.west - rank_north = connectivity.north - rank_south = connectivity.south - rank_top = connectivity.top - rank_bottom = connectivity.bottom - - east_comm_bc_details = HaloCommunicationDetails(rank_from=my_rank, rank_to=connectivity.east) - west_comm_bc_details = HaloCommunicationDetails(rank_from=my_rank, rank_to=connectivity.west) - north_comm_bc_details = HaloCommunicationDetails(rank_from=my_rank, rank_to=connectivity.north) - south_comm_bc_details = HaloCommunicationDetails(rank_from=my_rank, rank_to=connectivity.south) - top_comm_bc_details = HaloCommunicationDetails(rank_from=my_rank, rank_to=connectivity.top) - bottom_comm_bc_details = HaloCommunicationDetails(rank_from=my_rank, rank_to=connectivity.bottom) - - east_comm_bc = BoundaryCondition(HaloCommunication, east_comm_bc_details) - west_comm_bc = BoundaryCondition(HaloCommunication, west_comm_bc_details) - north_comm_bc = BoundaryCondition(HaloCommunication, north_comm_bc_details) - south_comm_bc = BoundaryCondition(HaloCommunication, south_comm_bc_details) - top_comm_bc = BoundaryCondition(HaloCommunication, top_comm_bc_details) - bottom_comm_bc = BoundaryCondition(HaloCommunication, bottom_comm_bc_details) - - x_bcs = CoordinateBoundaryConditions(isnothing(rank_west) ? field_bcs.x.left : west_comm_bc, - isnothing(rank_east) ? field_bcs.x.right : east_comm_bc) - - y_bcs = CoordinateBoundaryConditions(isnothing(rank_south) ? field_bcs.y.south : south_comm_bc, - isnothing(rank_north) ? field_bcs.y.north : north_comm_bc) - - z_bcs = CoordinateBoundaryConditions(isnothing(rank_bottom) ? field_bcs.z.bottom : bottom_comm_bc, - isnothing(rank_top) ? field_bcs.z.top : top_comm_bc) - - push!(new_field_bcs, FieldBoundaryConditions(x_bcs, y_bcs, z_bcs)) - end +HaloCommunicationRanks(; from, to) = HaloCommunicationRanks(from, to) - return NamedTuple{propertynames(boundary_conditions)}(Tuple(new_field_bcs)) +function inject_halo_communication_boundary_conditions(field_bcs, my_rank, connectivity) + new_field_bcs = [] + + rank_east = connectivity.east + rank_west = connectivity.west + rank_north = connectivity.north + rank_south = connectivity.south + rank_top = connectivity.top + rank_bottom = connectivity.bottom + + east_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_east) + west_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_west) + north_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_north) + south_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_south) + top_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_top) + bottom_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_bottom) + + east_comm_bc = HaloCommunicationBoundaryCondition(east_comm_ranks) + west_comm_bc = HaloCommunicationBoundaryCondition(west_comm_ranks) + north_comm_bc = HaloCommunicationBoundaryCondition(north_comm_ranks) + south_comm_bc = HaloCommunicationBoundaryCondition(south_comm_ranks) + top_comm_bc = HaloCommunicationBoundaryCondition(top_comm_ranks) + bottom_comm_bc = HaloCommunicationBoundaryCondition(bottom_comm_ranks) + + x_bcs = CoordinateBoundaryConditions(isnothing(rank_west) ? field_bcs.x.left : west_comm_bc, + isnothing(rank_east) ? field_bcs.x.right : east_comm_bc) + + y_bcs = CoordinateBoundaryConditions(isnothing(rank_south) ? field_bcs.y.south : south_comm_bc, + isnothing(rank_north) ? field_bcs.y.north : north_comm_bc) + + z_bcs = CoordinateBoundaryConditions(isnothing(rank_bottom) ? field_bcs.z.bottom : bottom_comm_bc, + isnothing(rank_top) ? field_bcs.z.top : top_comm_bc) + + return FieldBoundaryConditions(x_bcs, y_bcs, z_bcs) end ##### ##### Filling halos for halo communication boundary conditions ##### -sides = (:west, :east, :south, :north, :top, :bottom) -coords = (:x, :x, :y, :y, :z, :z) - -# Unfortunately can't call MPI.Comm_size(MPI.COMM_WORLD) before MPI.Init(). -const MAX_RANKS = 10^3 - -# Define functions that return unique send and recv MPI tags for each side. -for (i, side) in enumerate(sides) - send_tag_fn_name = Symbol(side, :_halo_comm_bc_send_tag) - recv_tag_fn_name = Symbol(side, :_halo_comm_bc_recv_tag) - @eval begin - @inline $send_tag_fn_name(bc) = 6 * (MAX_RANKS * bc.condition.rank_from + bc.condition.rank_to) + $i - @inline $recv_tag_fn_name(bc) = 6 * (MAX_RANKS * bc.condition.rank_to + bc.condition.rank_from) + $i - end -end - -@inline west_send_buffer(c, N, H) = c.parent[N+1:N+H, :, :] -@inline east_send_buffer(c, N, H) = c.parent[1+H:2H, :, :] -@inline south_send_buffer(c, N, H) = c.parent[:, N+1:N+H, :] -@inline north_send_buffer(c, N, H) = c.parent[:, 1+H:2H, :] -@inline top_send_buffer(c, N, H) = c.parent[:, :, 1+H:2H] -@inline bottom_send_buffer(c, N, H) = c.parent[:, :, N+1:N+H] - -@inline west_recv_buffer(grid) = zeros(grid.Hx, grid.Ty, grid.Tz) -@inline south_recv_buffer(grid) = zeros(grid.Tx, grid.Hy, grid.Tz) -@inline top_recv_buffer(grid) = zeros(grid.Tx, grid.Ty, grid.Hz) - -const east_recv_buffer = west_recv_buffer -const north_recv_buffer = south_recv_buffer -const bottom_recv_buffer = top_recv_buffer - -@inline copy_recv_buffer_into_west_halo!(c, N, H, buf) = (c.parent[ 1:H, :, :] .= buf) -@inline copy_recv_buffer_into_east_halo!(c, N, H, buf) = (c.parent[N+H+1:N+2H, :, :] .= buf) -@inline copy_recv_buffer_into_south_halo!(c, N, H, buf) = (c.parent[:, 1:H, :] .= buf) -@inline copy_recv_buffer_into_north_halo!(c, N, H, buf) = (c.parent[:, N+H+1:N+2H, :] .= buf) -@inline copy_recv_buffer_into_bottom_halo!(c, N, H, buf) = (c.parent[:, :, 1:H ] .= buf) -@inline copy_recv_buffer_into_top_halo!(c, N, H, buf) = (c.parent[:, :, N+H+1:N+2H] .= buf) - -for (x, side) in zip(coords, sides) - H = Symbol(:H, x) - N = Symbol(:N, x) - - fill_fn_name = Symbol(:fill_, side, :_halo!) - send_buf_fn_name = Symbol(side, :_send_buffer) - recv_buf_fn_name = Symbol(side, :_recv_buffer) - send_tag_fn_name = Symbol(side, :_halo_comm_bc_send_tag) - recv_tag_fn_name = Symbol(side, :_halo_comm_bc_recv_tag) - copy_buf_fn_name = Symbol(:copy_recv_buffer_into_, side, :_halo!) - - @eval begin - function $fill_fn_name(c, bc::HaloCommunicationBC, arch, grid, args...) - send_buffer = $send_buf_fn_name(c, grid.$(N), grid.$(H)) - recv_buffer = $recv_buf_fn_name(grid) - - send_tag = $send_tag_fn_name(bc) - recv_tag = $recv_tag_fn_name(bc) - - my_rank = bc.condition.rank_from - rank_send_to = rank_recv_from = bc.condition.rank_to - - @info "MPI.Isend: my_rank=$my_rank, rank_send_to=$rank_send_to, send_tag=$send_tag" - MPI.Isend(send_buffer, rank_send_to, send_tag, MPI.COMM_WORLD) - @info "MPI.Isend: done!" - - @info "MPI.Recv!: my_rank=$my_rank, rank_recv_from=$rank_recv_from, recv_tag=$recv_tag" - MPI.Recv!(recv_buffer, rank_recv_from, recv_tag, MPI.COMM_WORLD) - @info "MPI.Recv! done!" - - # @info "Sendrecv!: my_rank=$my_rank, rank_send_to=rank_recv_from=$rank_send_to, " * - # "send_tag=$send_tag, recv_tag=$recv_tag" - # - # MPI.Sendrecv!(send_buffer, rank_send_to, send_tag, - # recv_buffer, rank_recv_from, recv_tag, - # MPI.COMM_WORLD) - # - # @info "Sendrecv!: my_rank=$my_rank done!" - - $copy_buf_fn_name(c, grid.$(N), grid.$(H), recv_buffer) - end - end -end +# sides = (:west, :east, :south, :north, :top, :bottom) +# coords = (:x, :x, :y, :y, :z, :z) + +# # Unfortunately can't call MPI.Comm_size(MPI.COMM_WORLD) before MPI.Init(). +# const MAX_RANKS = 10^3 + +# # Define functions that return unique send and recv MPI tags for each side. +# for (i, side) in enumerate(sides) +# send_tag_fn_name = Symbol(side, :_halo_comm_bc_send_tag) +# recv_tag_fn_name = Symbol(side, :_halo_comm_bc_recv_tag) +# @eval begin +# @inline $send_tag_fn_name(bc) = 6 * (MAX_RANKS * bc.condition.rank_from + bc.condition.rank_to) + $i +# @inline $recv_tag_fn_name(bc) = 6 * (MAX_RANKS * bc.condition.rank_to + bc.condition.rank_from) + $i +# end +# end + +# @inline west_send_buffer(c, N, H) = c.parent[N+1:N+H, :, :] +# @inline east_send_buffer(c, N, H) = c.parent[1+H:2H, :, :] +# @inline south_send_buffer(c, N, H) = c.parent[:, N+1:N+H, :] +# @inline north_send_buffer(c, N, H) = c.parent[:, 1+H:2H, :] +# @inline top_send_buffer(c, N, H) = c.parent[:, :, 1+H:2H] +# @inline bottom_send_buffer(c, N, H) = c.parent[:, :, N+1:N+H] + +# @inline west_recv_buffer(grid) = zeros(grid.Hx, grid.Ty, grid.Tz) +# @inline south_recv_buffer(grid) = zeros(grid.Tx, grid.Hy, grid.Tz) +# @inline top_recv_buffer(grid) = zeros(grid.Tx, grid.Ty, grid.Hz) + +# const east_recv_buffer = west_recv_buffer +# const north_recv_buffer = south_recv_buffer +# const bottom_recv_buffer = top_recv_buffer + +# @inline copy_recv_buffer_into_west_halo!(c, N, H, buf) = (c.parent[ 1:H, :, :] .= buf) +# @inline copy_recv_buffer_into_east_halo!(c, N, H, buf) = (c.parent[N+H+1:N+2H, :, :] .= buf) +# @inline copy_recv_buffer_into_south_halo!(c, N, H, buf) = (c.parent[:, 1:H, :] .= buf) +# @inline copy_recv_buffer_into_north_halo!(c, N, H, buf) = (c.parent[:, N+H+1:N+2H, :] .= buf) +# @inline copy_recv_buffer_into_bottom_halo!(c, N, H, buf) = (c.parent[:, :, 1:H ] .= buf) +# @inline copy_recv_buffer_into_top_halo!(c, N, H, buf) = (c.parent[:, :, N+H+1:N+2H] .= buf) + +# for (x, side) in zip(coords, sides) +# H = Symbol(:H, x) +# N = Symbol(:N, x) + +# fill_fn_name = Symbol(:fill_, side, :_halo!) +# send_buf_fn_name = Symbol(side, :_send_buffer) +# recv_buf_fn_name = Symbol(side, :_recv_buffer) +# send_tag_fn_name = Symbol(side, :_halo_comm_bc_send_tag) +# recv_tag_fn_name = Symbol(side, :_halo_comm_bc_recv_tag) +# copy_buf_fn_name = Symbol(:copy_recv_buffer_into_, side, :_halo!) + +# @eval begin +# function $fill_fn_name(c, bc::HaloCommunicationBC, arch, grid, args...) +# send_buffer = $send_buf_fn_name(c, grid.$(N), grid.$(H)) +# recv_buffer = $recv_buf_fn_name(grid) + +# send_tag = $send_tag_fn_name(bc) +# recv_tag = $recv_tag_fn_name(bc) + +# my_rank = bc.condition.rank_from +# rank_send_to = rank_recv_from = bc.condition.rank_to + +# @info "MPI.Isend: my_rank=$my_rank, rank_send_to=$rank_send_to, send_tag=$send_tag" +# MPI.Isend(send_buffer, rank_send_to, send_tag, MPI.COMM_WORLD) +# @info "MPI.Isend: done!" + +# @info "MPI.Recv!: my_rank=$my_rank, rank_recv_from=$rank_recv_from, recv_tag=$recv_tag" +# MPI.Recv!(recv_buffer, rank_recv_from, recv_tag, MPI.COMM_WORLD) +# @info "MPI.Recv! done!" + +# # @info "Sendrecv!: my_rank=$my_rank, rank_send_to=rank_recv_from=$rank_send_to, " * +# # "send_tag=$send_tag, recv_tag=$recv_tag" +# # +# # MPI.Sendrecv!(send_buffer, rank_send_to, send_tag, +# # recv_buffer, rank_recv_from, recv_tag, +# # MPI.COMM_WORLD) +# # +# # @info "Sendrecv!: my_rank=$my_rank done!" + +# $copy_buf_fn_name(c, grid.$(N), grid.$(H), recv_buffer) +# end +# end +# end ##### ##### Distributed model struct and constructor @@ -235,17 +249,16 @@ x, y, z: Left and right endpoints for each dimension. ranks: Number of ranks in each dimension. model_kwargs: Passed to `Model` constructor. """ -function DistributedModel(; size, x, y, z, ranks, boundary_conditions, model_kwargs...) - validate_tupled_argument(ranks, Int, "size") +function DistributedModel(; grid, ranks, model_kwargs...) validate_tupled_argument(ranks, Int, "ranks") - Nx, Ny, Nz = size + Nx, Ny, Nz = size(grid) # Pull out left and right endpoints for full model. - xL, xR = x - yL, yR = y - zL, zR = z - Lx, Ly, Lz = xR-xL, yR-yL, zR-zL + xL, xR = grid.xF[1], grid.xF[Nx+1] + yL, yR = grid.yF[1], grid.yF[Ny+1] + zL, zR = grid.zF[1], grid.zF[Nz+1] + Lx, Ly, Lz = length(grid) Rx, Ry, Rz = ranks total_ranks = Rx*Ry*Rz @@ -257,18 +270,23 @@ function DistributedModel(; size, x, y, z, ranks, boundary_conditions, model_kwa if total_ranks != mpi_ranks throw(ArgumentError("ranks=($Rx, $Ry, $Rz) [$total_ranks total] inconsistent " * - "with number of MPI ranks: $mpi_ranks. Exiting with code 1.")) + "with number of MPI ranks: $mpi_ranks. Exiting with return code 1.")) MPI.Finalize() exit(code=1) end i, j, k = index = rank2index(my_rank, Rx, Ry, Rz) - @debug "Rank: $my_rank, index: $index" + @debug "My rank: $my_rank, my index: $index" ##### ##### Construct local grid ##### + # Make sure we can put an integer number of grid points in each rank. + @assert isinteger(Nx / Rx) + @assert isinteger(Ny / Ry) + @assert isinteger(Nz / Rz) + nx, ny, nz = Nx÷Rx, Ny÷Ry, Nz÷Rz lx, ly, lz = Lx/Rx, Ly/Ry, Lz/Rz @@ -277,28 +295,27 @@ function DistributedModel(; size, x, y, z, ranks, boundary_conditions, model_kwa z₁, z₂ = zL + (k-1)*lz, zL + k*lz @debug "Constructing local grid: n=($nx, $ny, $nz), x ∈ [$x₁, $x₂], y ∈ [$y₁, $y₂], z ∈ [$z₁, $z₂]" - grid = RegularCartesianGrid(size=(nx, ny, nz), x=(x₁, x₂), y=(y₁, y₂), z=(z₁, z₂)) + my_grid = RegularCartesianGrid(topology=topology(grid), size=(nx, ny, nz), x=(x₁, x₂), y=(y₁, y₂), z=(z₁, z₂)) ##### ##### Construct local connectivity ##### - my_connectivity = construct_connectivity(index, ranks, boundary_conditions.u) + my_connectivity = RankConnectivity(index, ranks, topology(grid)) @debug "Local connectivity: $my_connectivity" ##### ##### Change appropriate boundary conditions to halo communication BCs ##### - @debug "Injecting halo communication boundary conditions..." - boundary_conditions_with_communication = inject_halo_communication_boundary_conditions(boundary_conditions, my_rank, my_connectivity) + # @debug "Injecting halo communication boundary conditions..." + # boundary_conditions_with_communication = inject_halo_communication_boundary_conditions(boundary_conditions, my_rank, my_connectivity) ##### ##### Construct local model ##### - my_model = Model(; grid = grid, boundary_conditions = boundary_conditions_with_communication, - model_kwargs...) + my_model = IncompressibleModel(; grid=my_grid, model_kwargs...) return DistributedModel(index, ranks, my_model, my_connectivity) end From c84a958e708051acd32536a85712e90ab2deae2d Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Fri, 5 Feb 2021 09:54:29 -0500 Subject: [PATCH 044/100] Some basic tests for slab decomposition models --- src/Distributed/test_distributed_model.jl | 145 +++++++++++++++++++--- 1 file changed, 131 insertions(+), 14 deletions(-) diff --git a/src/Distributed/test_distributed_model.jl b/src/Distributed/test_distributed_model.jl index 9790dbd705..0e08689a9e 100644 --- a/src/Distributed/test_distributed_model.jl +++ b/src/Distributed/test_distributed_model.jl @@ -1,23 +1,140 @@ using Test +using MPI +using Oceananigans -import MPI +MPI.Initialized() || MPI.Init() +comm = MPI.COMM_WORLD -using Oceananigans: interior, fill_halo_regions! +# Right now just testing with 4 ranks! +mpi_ranks = MPI.Comm_size(comm) +@assert mpi_ranks == 4 -include("distributed_model.jl") +function test_triply_periodic_connectivity_with_411_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + dm = DistributedModel(grid=full_grid, ranks=(4, 1, 1)) -MPI.Init() + my_rank = MPI.Comm_rank(MPI.COMM_WORLD) + @test my_rank == index2rank(dm.index..., dm.ranks...) -dm = DistributedModel(ranks = (2, 2, 2), size = (16, 16, 16), - x = (0, 1), y = (-0.5, 0.5), z = (-10, 0), - boundary_conditions = HorizontallyPeriodicSolutionBCs(), - poisson_solver = nothing) + model = dm.model + connectivity = dm.connectivity -my_rank = MPI.Comm_rank(MPI.COMM_WORLD) -# @info "Rank $my_rank: $(dm.connectivity), $(dm.model.grid.zF[end])" -# @info "u.x BCs: $(dm.model.boundary_conditions.solution.u.x)" + # No communication in y and z. + @test isnothing(connectivity.south) + @test isnothing(connectivity.north) + @test isnothing(connectivity.top) + @test isnothing(connectivity.bottom) -interior(dm.model.velocities.u) .= rand(8, 8, 8) -fill_halo_regions!(dm.model.velocities.u.data, dm.model.boundary_conditions.solution.u, CPU(), dm.model.grid) + if my_rank == 0 + @test connectivity.east == 1 + @test connectivity.west == 3 + elseif my_rank == 1 + @test connectivity.east == 2 + @test connectivity.west == 0 + elseif my_rank == 2 + @test connectivity.east == 3 + @test connectivity.west == 1 + elseif my_rank == 3 + @test connectivity.east == 0 + @test connectivity.west == 2 + end + + nx, ny, nz = size(model.grid) + @test model.grid.xF[1] == 0.25*my_rank + @test model.grid.xF[nx+1] == 0.25*(my_rank+1) + @test model.grid.yF[1] == 0 + @test model.grid.yF[ny+1] == 2 + @test model.grid.zF[1] == -3 + @test model.grid.zF[nz+1] == 0 +end + +function test_triply_periodic_connectivity_with_141_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + dm = DistributedModel(grid=full_grid, ranks=(1, 4, 1)) + + my_rank = MPI.Comm_rank(MPI.COMM_WORLD) + @test my_rank == index2rank(dm.index..., dm.ranks...) + + model = dm.model + connectivity = dm.connectivity + + # No communication in x and z. + @test isnothing(connectivity.east) + @test isnothing(connectivity.west) + @test isnothing(connectivity.top) + @test isnothing(connectivity.bottom) + + if my_rank == 0 + @test connectivity.north == 1 + @test connectivity.south == 3 + elseif my_rank == 1 + @test connectivity.north == 2 + @test connectivity.south == 0 + elseif my_rank == 2 + @test connectivity.north == 3 + @test connectivity.south == 1 + elseif my_rank == 3 + @test connectivity.north == 0 + @test connectivity.south == 2 + end + + nx, ny, nz = size(model.grid) + @test model.grid.xF[1] == 0 + @test model.grid.xF[nx+1] == 1 + @test model.grid.yF[1] == 0.5*my_rank + @test model.grid.yF[ny+1] == 0.5*(my_rank+1) + @test model.grid.zF[1] == -3 + @test model.grid.zF[nz+1] == 0 +end + +function test_triply_periodic_connectivity_with_114_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + dm = DistributedModel(grid=full_grid, ranks=(1, 1, 4)) + + my_rank = MPI.Comm_rank(MPI.COMM_WORLD) + @test my_rank == index2rank(dm.index..., dm.ranks...) + + model = dm.model + connectivity = dm.connectivity + + # No communication in x and y. + @test isnothing(connectivity.east) + @test isnothing(connectivity.west) + @test isnothing(connectivity.north) + @test isnothing(connectivity.south) + + if my_rank == 0 + @test connectivity.top == 1 + @test connectivity.bottom == 3 + elseif my_rank == 1 + @test connectivity.top == 2 + @test connectivity.bottom == 0 + elseif my_rank == 2 + @test connectivity.top == 3 + @test connectivity.bottom == 1 + elseif my_rank == 3 + @test connectivity.top == 0 + @test connectivity.bottom == 2 + end + + nx, ny, nz = size(model.grid) + @test model.grid.xF[1] == 0 + @test model.grid.xF[nx+1] == 1 + @test model.grid.yF[1] == 0 + @test model.grid.yF[ny+1] == 2 + @test model.grid.zF[1] == -3 + 0.75*my_rank + @test model.grid.zF[nz+1] == -3 + 0.75*(my_rank+1) +end + +@testset "Distributed MPI Oceananigans" begin + test_triply_periodic_connectivity_with_411_ranks() + test_triply_periodic_connectivity_with_141_ranks() + test_triply_periodic_connectivity_with_114_ranks() +end + +# MPI.Finalize() +# @test MPI.Finalized() -display(interior(dm.model.velocities.u)) From 571eba539513076aa549a6e06133ae28b1430931 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Fri, 5 Feb 2021 15:09:35 -0500 Subject: [PATCH 045/100] Properly inject halo communication BCs --- src/Distributed/distributed_model.jl | 72 +++++++++++++++-------- src/Distributed/test_distributed_model.jl | 30 +++++++++- 2 files changed, 77 insertions(+), 25 deletions(-) diff --git a/src/Distributed/distributed_model.jl b/src/Distributed/distributed_model.jl index 777571bf9b..5c426e98f9 100644 --- a/src/Distributed/distributed_model.jl +++ b/src/Distributed/distributed_model.jl @@ -7,6 +7,7 @@ using Oceananigans.Grids: validate_tupled_argument using Oceananigans.BoundaryConditions: BCType import Oceananigans.BoundaryConditions: + bctype_str, print_condition, fill_west_halo!, fill_east_halo!, fill_south_halo!, fill_north_halo!, fill_bottom_halo!, fill_top_halo! @@ -28,8 +29,6 @@ end ##### Rank connectivity graph ##### -const Connectivity = NamedTuple{(:east, :west, :north, :south, :top, :bottom)} - struct RankConnectivity{E, W, N, S, T, B} east :: E west :: W @@ -97,20 +96,22 @@ end struct HaloCommunication <: BCType end -const HaloCommunicationBC = BoundaryCondition{<:HaloCommunication} +# const HaloCommunicationBC = BoundaryCondition{<:HaloCommunication} + +bctype_str(::HaloCommunicationBC) ="HaloCommunication" HaloCommunicationBoundaryCondition(val; kwargs...) = BoundaryCondition(HaloCommunication, val; kwargs...) -struct HaloCommunicationRanks{T} - from :: T +struct HaloCommunicationRanks{F, T} + from :: F to :: T end HaloCommunicationRanks(; from, to) = HaloCommunicationRanks(from, to) -function inject_halo_communication_boundary_conditions(field_bcs, my_rank, connectivity) - new_field_bcs = [] +print_condition(hcr::HaloCommunicationRanks) = "(from rank $(hcr.from), to rank $(hcr.to))" +function inject_halo_communication_boundary_conditions(field_bcs, my_rank, connectivity) rank_east = connectivity.east rank_west = connectivity.west rank_north = connectivity.north @@ -132,14 +133,14 @@ function inject_halo_communication_boundary_conditions(field_bcs, my_rank, conne top_comm_bc = HaloCommunicationBoundaryCondition(top_comm_ranks) bottom_comm_bc = HaloCommunicationBoundaryCondition(bottom_comm_ranks) - x_bcs = CoordinateBoundaryConditions(isnothing(rank_west) ? field_bcs.x.left : west_comm_bc, - isnothing(rank_east) ? field_bcs.x.right : east_comm_bc) + x_bcs = CoordinateBoundaryConditions(isnothing(rank_west) ? field_bcs.west : west_comm_bc, + isnothing(rank_east) ? field_bcs.east : east_comm_bc) - y_bcs = CoordinateBoundaryConditions(isnothing(rank_south) ? field_bcs.y.south : south_comm_bc, - isnothing(rank_north) ? field_bcs.y.north : north_comm_bc) + y_bcs = CoordinateBoundaryConditions(isnothing(rank_south) ? field_bcs.south : south_comm_bc, + isnothing(rank_north) ? field_bcs.north : north_comm_bc) - z_bcs = CoordinateBoundaryConditions(isnothing(rank_bottom) ? field_bcs.z.bottom : bottom_comm_bc, - isnothing(rank_top) ? field_bcs.z.top : top_comm_bc) + z_bcs = CoordinateBoundaryConditions(isnothing(rank_bottom) ? field_bcs.bottom : bottom_comm_bc, + isnothing(rank_top) ? field_bcs.top : top_comm_bc) return FieldBoundaryConditions(x_bcs, y_bcs, z_bcs) end @@ -235,10 +236,10 @@ end ##### struct DistributedModel{I, A, R, G} - index :: I - ranks :: R - model :: A - connectivity :: G + index :: I + ranks :: R + model :: A + connectivity :: G end """ @@ -249,7 +250,7 @@ x, y, z: Left and right endpoints for each dimension. ranks: Number of ranks in each dimension. model_kwargs: Passed to `Model` constructor. """ -function DistributedModel(; grid, ranks, model_kwargs...) +function DistributedModel(; grid, ranks, boundary_conditions=nothing, model_kwargs...) validate_tupled_argument(ranks, Int, "ranks") Nx, Ny, Nz = size(grid) @@ -276,7 +277,7 @@ function DistributedModel(; grid, ranks, model_kwargs...) end i, j, k = index = rank2index(my_rank, Rx, Ry, Rz) - @debug "My rank: $my_rank, my index: $index" + @info "My rank: $my_rank, my index: $index" ##### ##### Construct local grid @@ -294,7 +295,7 @@ function DistributedModel(; grid, ranks, model_kwargs...) y₁, y₂ = yL + (j-1)*ly, yL + j*ly z₁, z₂ = zL + (k-1)*lz, zL + k*lz - @debug "Constructing local grid: n=($nx, $ny, $nz), x ∈ [$x₁, $x₂], y ∈ [$y₁, $y₂], z ∈ [$z₁, $z₂]" + @info "Constructing local grid: n=($nx, $ny, $nz), x ∈ [$x₁, $x₂], y ∈ [$y₁, $y₂], z ∈ [$z₁, $z₂]" my_grid = RegularCartesianGrid(topology=topology(grid), size=(nx, ny, nz), x=(x₁, x₂), y=(y₁, y₂), z=(z₁, z₂)) ##### @@ -302,20 +303,43 @@ function DistributedModel(; grid, ranks, model_kwargs...) ##### my_connectivity = RankConnectivity(index, ranks, topology(grid)) - @debug "Local connectivity: $my_connectivity" + @info "Local connectivity: $my_connectivity" ##### ##### Change appropriate boundary conditions to halo communication BCs ##### - # @debug "Injecting halo communication boundary conditions..." - # boundary_conditions_with_communication = inject_halo_communication_boundary_conditions(boundary_conditions, my_rank, my_connectivity) + # FIXME: Stop assuming (u, v, w, T, S). + + bcs = isnothing(boundary_conditions) ? NamedTuple() : boundary_conditions + + bcs = ( + u = haskey(bcs, :u) ? bcs.u : UVelocityBoundaryConditions(grid), + v = haskey(bcs, :v) ? bcs.v : VVelocityBoundaryConditions(grid), + w = haskey(bcs, :w) ? bcs.w : WVelocityBoundaryConditions(grid), + T = haskey(bcs, :T) ? bcs.T : TracerBoundaryConditions(grid), + S = haskey(bcs, :S) ? bcs.S : TracerBoundaryConditions(grid) + ) + + @debug "Injecting halo communication boundary conditions..." + + communicative_bcs = ( + u = inject_halo_communication_boundary_conditions(bcs.u, my_rank, my_connectivity), + v = inject_halo_communication_boundary_conditions(bcs.v, my_rank, my_connectivity), + w = inject_halo_communication_boundary_conditions(bcs.w, my_rank, my_connectivity), + T = inject_halo_communication_boundary_conditions(bcs.T, my_rank, my_connectivity), + S = inject_halo_communication_boundary_conditions(bcs.S, my_rank, my_connectivity) + ) ##### ##### Construct local model ##### - my_model = IncompressibleModel(; grid=my_grid, model_kwargs...) + my_model = IncompressibleModel(; grid=my_grid, boundary_conditions=communicative_bcs, model_kwargs...) return DistributedModel(index, ranks, my_model, my_connectivity) end + +function Base.show(io::IO, dm::DistributedModel) + print(io, "DistributedModel with $(dm.ranks) ranks") +end diff --git a/src/Distributed/test_distributed_model.jl b/src/Distributed/test_distributed_model.jl index 0e08689a9e..b9561e685e 100644 --- a/src/Distributed/test_distributed_model.jl +++ b/src/Distributed/test_distributed_model.jl @@ -47,6 +47,15 @@ function test_triply_periodic_connectivity_with_411_ranks() @test model.grid.yF[ny+1] == 2 @test model.grid.zF[1] == -3 @test model.grid.zF[nz+1] == 0 + + for field in fields(model) + @test field.boundary_conditions.east isa HaloCommunicationBC + @test field.boundary_conditions.west isa HaloCommunicationBC + @test !isa(field.boundary_conditions.north, HaloCommunicationBC) + @test !isa(field.boundary_conditions.south, HaloCommunicationBC) + @test !isa(field.boundary_conditions.top, HaloCommunicationBC) + @test !isa(field.boundary_conditions.bottom, HaloCommunicationBC) + end end function test_triply_periodic_connectivity_with_141_ranks() @@ -87,6 +96,15 @@ function test_triply_periodic_connectivity_with_141_ranks() @test model.grid.yF[ny+1] == 0.5*(my_rank+1) @test model.grid.zF[1] == -3 @test model.grid.zF[nz+1] == 0 + + for field in fields(model) + @test !isa(field.boundary_conditions.east, HaloCommunicationBC) + @test !isa(field.boundary_conditions.west, HaloCommunicationBC) + @test field.boundary_conditions.north isa HaloCommunicationBC + @test field.boundary_conditions.south isa HaloCommunicationBC + @test !isa(field.boundary_conditions.top, HaloCommunicationBC) + @test !isa(field.boundary_conditions.bottom, HaloCommunicationBC) + end end function test_triply_periodic_connectivity_with_114_ranks() @@ -127,14 +145,24 @@ function test_triply_periodic_connectivity_with_114_ranks() @test model.grid.yF[ny+1] == 2 @test model.grid.zF[1] == -3 + 0.75*my_rank @test model.grid.zF[nz+1] == -3 + 0.75*(my_rank+1) + + for field in fields(model) + @test !isa(field.boundary_conditions.east, HaloCommunicationBC) + @test !isa(field.boundary_conditions.west, HaloCommunicationBC) + @test !isa(field.boundary_conditions.north, HaloCommunicationBC) + @test !isa(field.boundary_conditions.south, HaloCommunicationBC) + @test field.boundary_conditions.top isa HaloCommunicationBC + @test field.boundary_conditions.bottom isa HaloCommunicationBC + end end @testset "Distributed MPI Oceananigans" begin test_triply_periodic_connectivity_with_411_ranks() test_triply_periodic_connectivity_with_141_ranks() test_triply_periodic_connectivity_with_114_ranks() + # TODO: 221 ranks + # TODO: triply bounded end # MPI.Finalize() # @test MPI.Finalized() - From ea72c5d9508b5e5b1aef0086ac293b5586476af4 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Fri, 5 Feb 2021 17:34:39 -0500 Subject: [PATCH 046/100] New multi-architectures --- src/Distributed/distributed_model.jl | 40 ++++++++++++++++------- src/Distributed/test_distributed_model.jl | 11 +++++-- 2 files changed, 36 insertions(+), 15 deletions(-) diff --git a/src/Distributed/distributed_model.jl b/src/Distributed/distributed_model.jl index 5c426e98f9..179eb69949 100644 --- a/src/Distributed/distributed_model.jl +++ b/src/Distributed/distributed_model.jl @@ -1,6 +1,7 @@ import MPI using Oceananigans +using Oceananigans.Architectures using Oceananigans.Grids using Oceananigans.Grids: validate_tupled_argument @@ -11,6 +12,21 @@ import Oceananigans.BoundaryConditions: fill_west_halo!, fill_east_halo!, fill_south_halo!, fill_north_halo!, fill_bottom_halo!, fill_top_halo! +##### +##### Architecture stuff +##### + +# TODO: Put connectivity inside architecture? MPI should be initialize so you can construct it in there. +# Might have to make it MultiCPU(; grid, ranks) + +struct MultiCPU{R} <: AbstractArchitecture + ranks :: R +end + +MultiCPU(; ranks) = MultiCPU(ranks) + +child_architecture(::MultiCPU) = CPU() + ##### ##### Converting between index and MPI rank taking k as the fast index ##### @@ -235,22 +251,17 @@ end ##### Distributed model struct and constructor ##### -struct DistributedModel{I, A, R, G} +struct DistributedModel{A, I, M, R, G} + architecture :: A index :: I ranks :: R - model :: A + model :: M connectivity :: G end -""" - DistributedModel(size, x, y, z, ranks, model_kwargs...) +function DistributedModel(; architecture, grid, boundary_conditions=nothing, model_kwargs...) + ranks = architecture.ranks -size: Number of total grid points. -x, y, z: Left and right endpoints for each dimension. -ranks: Number of ranks in each dimension. -model_kwargs: Passed to `Model` constructor. -""" -function DistributedModel(; grid, ranks, boundary_conditions=nothing, model_kwargs...) validate_tupled_argument(ranks, Int, "ranks") Nx, Ny, Nz = size(grid) @@ -335,9 +346,14 @@ function DistributedModel(; grid, ranks, boundary_conditions=nothing, model_kwar ##### Construct local model ##### - my_model = IncompressibleModel(; grid=my_grid, boundary_conditions=communicative_bcs, model_kwargs...) + my_model = IncompressibleModel(; + architecture = child_architecture(architecture), + grid = my_grid, + boundary_conditions = communicative_bcs, + model_kwargs... + ) - return DistributedModel(index, ranks, my_model, my_connectivity) + return DistributedModel(architecture, index, ranks, my_model, my_connectivity) end function Base.show(io::IO, dm::DistributedModel) diff --git a/src/Distributed/test_distributed_model.jl b/src/Distributed/test_distributed_model.jl index b9561e685e..4e515b6631 100644 --- a/src/Distributed/test_distributed_model.jl +++ b/src/Distributed/test_distributed_model.jl @@ -2,6 +2,8 @@ using Test using MPI using Oceananigans +using Oceananigans.BoundaryConditions: fill_halo_regions! + MPI.Initialized() || MPI.Init() comm = MPI.COMM_WORLD @@ -12,7 +14,8 @@ mpi_ranks = MPI.Comm_size(comm) function test_triply_periodic_connectivity_with_411_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) - dm = DistributedModel(grid=full_grid, ranks=(4, 1, 1)) + arch = MultiCPU(ranks=(4, 1, 1)) + dm = DistributedModel(architecture=arch, grid=full_grid) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) @test my_rank == index2rank(dm.index..., dm.ranks...) @@ -61,7 +64,8 @@ end function test_triply_periodic_connectivity_with_141_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) - dm = DistributedModel(grid=full_grid, ranks=(1, 4, 1)) + arch = MultiCPU(ranks=(1, 4, 1)) + dm = DistributedModel(architecture=arch, grid=full_grid) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) @test my_rank == index2rank(dm.index..., dm.ranks...) @@ -110,7 +114,8 @@ end function test_triply_periodic_connectivity_with_114_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) - dm = DistributedModel(grid=full_grid, ranks=(1, 1, 4)) + arch = MultiCPU(ranks=(1, 1, 4)) + dm = DistributedModel(architecture=arch, grid=full_grid) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) @test my_rank == index2rank(dm.index..., dm.ranks...) From c92dbb8f7a9b762de920ba33a88ac795d089faf3 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Fri, 5 Feb 2021 18:35:32 -0500 Subject: [PATCH 047/100] Success communicating east/west halos! --- src/Distributed/distributed_model.jl | 231 +++++++++++++++++---------- 1 file changed, 148 insertions(+), 83 deletions(-) diff --git a/src/Distributed/distributed_model.jl b/src/Distributed/distributed_model.jl index 179eb69949..21f7b5159b 100644 --- a/src/Distributed/distributed_model.jl +++ b/src/Distributed/distributed_model.jl @@ -4,11 +4,13 @@ using Oceananigans using Oceananigans.Architectures using Oceananigans.Grids +using KernelAbstractions: @kernel, @index, Event, MultiEvent using Oceananigans.Grids: validate_tupled_argument using Oceananigans.BoundaryConditions: BCType import Oceananigans.BoundaryConditions: bctype_str, print_condition, + fill_halo_regions!, fill_west_halo!, fill_east_halo!, fill_south_halo!, fill_north_halo!, fill_bottom_halo!, fill_top_halo! @@ -19,7 +21,9 @@ import Oceananigans.BoundaryConditions: # TODO: Put connectivity inside architecture? MPI should be initialize so you can construct it in there. # Might have to make it MultiCPU(; grid, ranks) -struct MultiCPU{R} <: AbstractArchitecture +abstract type AbstractMultiArchitecture <: AbstractArchitecture end + +struct MultiCPU{R} <: AbstractMultiArchitecture ranks :: R end @@ -112,7 +116,7 @@ end struct HaloCommunication <: BCType end -# const HaloCommunicationBC = BoundaryCondition{<:HaloCommunication} +HaloCommunicationBC = BoundaryCondition{<:HaloCommunication} bctype_str(::HaloCommunicationBC) ="HaloCommunication" @@ -161,91 +165,152 @@ function inject_halo_communication_boundary_conditions(field_bcs, my_rank, conne return FieldBoundaryConditions(x_bcs, y_bcs, z_bcs) end +##### +##### MPI tags for halo communication BCs +##### + +sides = (:west, :east, :south, :north, :top, :bottom) + +side_id = Dict( + :east => 1, :west => 2, + :north => 3, :south => 4, + :top => 5, :bottom => 6 +) + +opposite_side = Dict( + :east => :west, :west => :east, + :north => :south, :south => :north, + :top => :bottom, :bottom => :top +) + +# Unfortunately can't call MPI.Comm_size(MPI.COMM_WORLD) before MPI.Init(). +const MAX_RANKS = 10^3 +RANK_DIGITS = 3 + +# Define functions that return unique send and recv MPI tags for each side. +for side in sides + side_str = string(side) + send_tag_fn_name = Symbol(side, :_send_tag) + recv_tag_fn_name = Symbol(side, :_recv_tag) + @eval begin + function $send_tag_fn_name(bc) + from_digits = string(bc.condition.from, pad=RANK_DIGITS) + to_digits = string(bc.condition.to, pad=RANK_DIGITS) + side_digit = string(side_id[Symbol($side_str)]) + return parse(Int, from_digits * to_digits * side_digit) + end + + function $recv_tag_fn_name(bc) + from_digits = string(bc.condition.from, pad=RANK_DIGITS) + to_digits = string(bc.condition.to, pad=RANK_DIGITS) + side_digit = string(side_id[opposite_side[Symbol($side_str)]]) + return parse(Int, to_digits * from_digits * side_digit) + end + end +end + ##### ##### Filling halos for halo communication boundary conditions ##### -# sides = (:west, :east, :south, :north, :top, :bottom) -# coords = (:x, :x, :y, :y, :z, :z) - -# # Unfortunately can't call MPI.Comm_size(MPI.COMM_WORLD) before MPI.Init(). -# const MAX_RANKS = 10^3 - -# # Define functions that return unique send and recv MPI tags for each side. -# for (i, side) in enumerate(sides) -# send_tag_fn_name = Symbol(side, :_halo_comm_bc_send_tag) -# recv_tag_fn_name = Symbol(side, :_halo_comm_bc_recv_tag) -# @eval begin -# @inline $send_tag_fn_name(bc) = 6 * (MAX_RANKS * bc.condition.rank_from + bc.condition.rank_to) + $i -# @inline $recv_tag_fn_name(bc) = 6 * (MAX_RANKS * bc.condition.rank_to + bc.condition.rank_from) + $i -# end -# end - -# @inline west_send_buffer(c, N, H) = c.parent[N+1:N+H, :, :] -# @inline east_send_buffer(c, N, H) = c.parent[1+H:2H, :, :] -# @inline south_send_buffer(c, N, H) = c.parent[:, N+1:N+H, :] -# @inline north_send_buffer(c, N, H) = c.parent[:, 1+H:2H, :] -# @inline top_send_buffer(c, N, H) = c.parent[:, :, 1+H:2H] -# @inline bottom_send_buffer(c, N, H) = c.parent[:, :, N+1:N+H] - -# @inline west_recv_buffer(grid) = zeros(grid.Hx, grid.Ty, grid.Tz) -# @inline south_recv_buffer(grid) = zeros(grid.Tx, grid.Hy, grid.Tz) -# @inline top_recv_buffer(grid) = zeros(grid.Tx, grid.Ty, grid.Hz) - -# const east_recv_buffer = west_recv_buffer -# const north_recv_buffer = south_recv_buffer -# const bottom_recv_buffer = top_recv_buffer - -# @inline copy_recv_buffer_into_west_halo!(c, N, H, buf) = (c.parent[ 1:H, :, :] .= buf) -# @inline copy_recv_buffer_into_east_halo!(c, N, H, buf) = (c.parent[N+H+1:N+2H, :, :] .= buf) -# @inline copy_recv_buffer_into_south_halo!(c, N, H, buf) = (c.parent[:, 1:H, :] .= buf) -# @inline copy_recv_buffer_into_north_halo!(c, N, H, buf) = (c.parent[:, N+H+1:N+2H, :] .= buf) -# @inline copy_recv_buffer_into_bottom_halo!(c, N, H, buf) = (c.parent[:, :, 1:H ] .= buf) -# @inline copy_recv_buffer_into_top_halo!(c, N, H, buf) = (c.parent[:, :, N+H+1:N+2H] .= buf) - -# for (x, side) in zip(coords, sides) -# H = Symbol(:H, x) -# N = Symbol(:N, x) - -# fill_fn_name = Symbol(:fill_, side, :_halo!) -# send_buf_fn_name = Symbol(side, :_send_buffer) -# recv_buf_fn_name = Symbol(side, :_recv_buffer) -# send_tag_fn_name = Symbol(side, :_halo_comm_bc_send_tag) -# recv_tag_fn_name = Symbol(side, :_halo_comm_bc_recv_tag) -# copy_buf_fn_name = Symbol(:copy_recv_buffer_into_, side, :_halo!) - -# @eval begin -# function $fill_fn_name(c, bc::HaloCommunicationBC, arch, grid, args...) -# send_buffer = $send_buf_fn_name(c, grid.$(N), grid.$(H)) -# recv_buffer = $recv_buf_fn_name(grid) - -# send_tag = $send_tag_fn_name(bc) -# recv_tag = $recv_tag_fn_name(bc) - -# my_rank = bc.condition.rank_from -# rank_send_to = rank_recv_from = bc.condition.rank_to - -# @info "MPI.Isend: my_rank=$my_rank, rank_send_to=$rank_send_to, send_tag=$send_tag" -# MPI.Isend(send_buffer, rank_send_to, send_tag, MPI.COMM_WORLD) -# @info "MPI.Isend: done!" - -# @info "MPI.Recv!: my_rank=$my_rank, rank_recv_from=$rank_recv_from, recv_tag=$recv_tag" -# MPI.Recv!(recv_buffer, rank_recv_from, recv_tag, MPI.COMM_WORLD) -# @info "MPI.Recv! done!" - -# # @info "Sendrecv!: my_rank=$my_rank, rank_send_to=rank_recv_from=$rank_send_to, " * -# # "send_tag=$send_tag, recv_tag=$recv_tag" -# # -# # MPI.Sendrecv!(send_buffer, rank_send_to, send_tag, -# # recv_buffer, rank_recv_from, recv_tag, -# # MPI.COMM_WORLD) -# # -# # @info "Sendrecv!: my_rank=$my_rank done!" - -# $copy_buf_fn_name(c, grid.$(N), grid.$(H), recv_buffer) -# end -# end -# end +@inline west_send_buffer(c, N, H) = c.parent[N+1:N+H, :, :] +@inline east_send_buffer(c, N, H) = c.parent[1+H:2H, :, :] +@inline south_send_buffer(c, N, H) = c.parent[:, N+1:N+H, :] +@inline north_send_buffer(c, N, H) = c.parent[:, 1+H:2H, :] +@inline top_send_buffer(c, N, H) = c.parent[:, :, 1+H:2H] +@inline bottom_send_buffer(c, N, H) = c.parent[:, :, N+1:N+H] + +@inline west_recv_buffer(grid) = zeros(grid.Hx, grid.Ny + 2grid.Hy, grid.Nz + 2grid.Hz) +@inline south_recv_buffer(grid) = zeros(grid.Nx + 2grid.Hx, grid.Hy, grid.Nz + 2grid.Hz) +@inline top_recv_buffer(grid) = zeros(grid.Nx + 2grid.Hx, grid.Ny + 2grid.Hy, grid.Hz) + +const east_recv_buffer = west_recv_buffer +const north_recv_buffer = south_recv_buffer +const bottom_recv_buffer = top_recv_buffer + +@inline copy_recv_buffer_into_west_halo!(c, N, H, buf) = (c.parent[ 1:H, :, :] .= buf) +@inline copy_recv_buffer_into_east_halo!(c, N, H, buf) = (c.parent[N+H+1:N+2H, :, :] .= buf) +@inline copy_recv_buffer_into_south_halo!(c, N, H, buf) = (c.parent[:, 1:H, :] .= buf) +@inline copy_recv_buffer_into_north_halo!(c, N, H, buf) = (c.parent[:, N+H+1:N+2H, :] .= buf) +@inline copy_recv_buffer_into_bottom_halo!(c, N, H, buf) = (c.parent[:, :, 1:H ] .= buf) +@inline copy_recv_buffer_into_top_halo!(c, N, H, buf) = (c.parent[:, :, N+H+1:N+2H] .= buf) + +function fill_halo_regions!(c::AbstractArray, bcs, arch::AbstractMultiArchitecture, grid, args...) + + barrier = Event(device(child_architecture(arch))) + + east_event, west_event = fill_east_and_west_halos!(c, bcs.east, bcs.west, arch, barrier, grid, args...) + # north_event, south_event = fill_north_and_south_halos!(c, bcs.north, bcs.south, arch, barrier, grid, args...) + # top_event, bottom_event = fill_top_and_bottom_halos!(c, bcs.east, bcs.west, arch, barrier, grid, args...) + + events = [east_event, west_event] # , north_event, south_event, top_event, bottom_event] + events = filter(e -> e isa Event, events) + wait(device(child_architecture(arch)), MultiEvent(Tuple(events))) + + return nothing +end + +function fill_east_and_west_halos!(c, east_bc, west_bc, arch, barrier, grid, args...) + east_event = fill_east_halo!(c, east_bc, child_architecture(arch), barrier, grid, args...) + west_event = fill_west_halo!(c, west_bc, child_architecture(arch), barrier, grid, args...) + return east_event, west_event +end + +function fill_east_and_west_halos!(c, east_bc::HaloCommunicationBC, west_bc::HaloCommunicationBC, arch, barrier, grid, args...) + # 1 -> send east halo to eastern rank and fill east halo from eastern rank's west halo. + # 2 -> send west halo to western rank and fill west halo from western rank's east halo. + + @assert east_bc.condition.from == west_bc.condition.from + my_rank = east_bc.condition.from + + rank_to_send_to1 = east_bc.condition.to + rank_to_send_to2 = west_bc.condition.to + + send_buffer1 = east_send_buffer(c, grid.Nx, grid.Hx) + send_buffer2 = west_send_buffer(c, grid.Nx, grid.Hx) + + send_tag1 = east_send_tag(east_bc) + send_tag2 = west_send_tag(west_bc) + + @info "MPI.Isend: my_rank=$my_rank, rank_to_send_to=$rank_to_send_to1, send_tag=$send_tag1" + @info "MPI.Isend: my_rank=$my_rank, rank_to_send_to=$rank_to_send_to2, send_tag=$send_tag2" + + send_req1 = MPI.Isend(send_buffer1, rank_to_send_to1, send_tag1, MPI.COMM_WORLD) + send_req2 = MPI.Isend(send_buffer2, rank_to_send_to2, send_tag2, MPI.COMM_WORLD) + + ### + + rank_to_recv_from1 = east_bc.condition.to + rank_to_recv_from2 = west_bc.condition.to + + recv_buffer1 = east_recv_buffer(grid) + recv_buffer2 = west_recv_buffer(grid) + + recv_tag1 = east_recv_tag(east_bc) + recv_tag2 = west_recv_tag(west_bc) + + @info "MPI.Recv!: my_rank=$my_rank, rank_to_recv_from=$rank_to_recv_from1, recv_tag=$recv_tag1" + @info "MPI.Recv!: my_rank=$my_rank, rank_to_recv_from=$rank_to_recv_from2, recv_tag=$recv_tag2" + + MPI.Recv!(recv_buffer1, rank_to_recv_from1, recv_tag1, MPI.COMM_WORLD) + MPI.Recv!(recv_buffer2, rank_to_recv_from2, recv_tag2, MPI.COMM_WORLD) + + @info "Communication done!" + + copy_recv_buffer_into_east_halo!(c, grid.Nx, grid.Hx, recv_buffer1) + copy_recv_buffer_into_west_halo!(c, grid.Nx, grid.Hx, recv_buffer2) + + # @info "Sendrecv!: my_rank=$my_rank, rank_send_to=rank_recv_from=$rank_send_to, " * + # "send_tag=$send_tag, recv_tag=$recv_tag" + # + # MPI.Sendrecv!(send_buffer, rank_send_to, send_tag, + # recv_buffer, rank_recv_from, recv_tag, + # MPI.COMM_WORLD) + # + # @info "Sendrecv!: my_rank=$my_rank done!" + + return nothing, nothing +end ##### ##### Distributed model struct and constructor From 50fc38793f85e1559a69bb499d714f9420a6907c Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Fri, 5 Feb 2021 19:33:14 -0500 Subject: [PATCH 048/100] More responsibilities for `MultiCPU` --- src/Distributed/distributed_architectures.jl | 143 +++++++++++++++++ src/Distributed/distributed_model.jl | 158 ++----------------- 2 files changed, 155 insertions(+), 146 deletions(-) create mode 100644 src/Distributed/distributed_architectures.jl diff --git a/src/Distributed/distributed_architectures.jl b/src/Distributed/distributed_architectures.jl new file mode 100644 index 0000000000..400cab3908 --- /dev/null +++ b/src/Distributed/distributed_architectures.jl @@ -0,0 +1,143 @@ +using Oceananigans.Grids: validate_tupled_argument + +# TODO: Put connectivity inside architecture? MPI should be initialize so you can construct it in there. +# Might have to make it MultiCPU(; grid, ranks) + +abstract type AbstractMultiArchitecture <: AbstractArchitecture end + +struct MultiCPU{R, I, ρ, C} <: AbstractMultiArchitecture + my_rank :: R + my_index :: I + ranks :: ρ + connectivity :: C +end + +child_architecture(::MultiCPU) = CPU() + +##### +##### Converting between index and MPI rank taking k as the fast index +##### + +@inline index2rank(i, j, k, Rx, Ry, Rz) = (i-1)*Ry*Rz + (j-1)*Rz + (k-1) + +@inline function rank2index(r, Rx, Ry, Rz) + i = div(r, Ry*Rz) + r -= i*Ry*Rz + j = div(r, Rz) + k = mod(r, Rz) + return i+1, j+1, k+1 # 1-based Julia +end + +##### +##### Rank connectivity graph +##### + +struct RankConnectivity{E, W, N, S, T, B} + east :: E + west :: W + north :: N + south :: S + top :: T + bottom :: B +end + +RankConnectivity(; east, west, north, south, top, bottom) = + RankConnectivity(east, west, north, south, top, bottom) + +function increment_index(i, R, topo) + R == 1 && return nothing + if i+1 > R + if topo == Periodic + return 1 + else + return nothing + end + else + return i+1 + end +end + +function decrement_index(i, R, topo) + R == 1 && return nothing + if i-1 < 1 + if topo == Periodic + return R + else + return nothing + end + else + return i-1 + end +end + +function RankConnectivity(model_index, ranks, topology) + i, j, k = model_index + Rx, Ry, Rz = ranks + TX, TY, TZ = topology + + i_east = increment_index(i, Rx, TX) + i_west = decrement_index(i, Rx, TX) + j_north = increment_index(j, Ry, TY) + j_south = decrement_index(j, Ry, TY) + k_top = increment_index(k, Rz, TZ) + k_bot = decrement_index(k, Rz, TZ) + + r_east = isnothing(i_east) ? nothing : index2rank(i_east, j, k, Rx, Ry, Rz) + r_west = isnothing(i_west) ? nothing : index2rank(i_west, j, k, Rx, Ry, Rz) + r_north = isnothing(j_north) ? nothing : index2rank(i, j_north, k, Rx, Ry, Rz) + r_south = isnothing(j_south) ? nothing : index2rank(i, j_south, k, Rx, Ry, Rz) + r_top = isnothing(k_top) ? nothing : index2rank(i, j, k_top, Rx, Ry, Rz) + r_bot = isnothing(k_bot) ? nothing : index2rank(i, j, k_bot, Rx, Ry, Rz) + + return RankConnectivity(east=r_east, west=r_west, north=r_north, + south=r_south, top=r_top, bottom=r_bot) +end + +##### +##### Constructors +##### + +function MultiCPU(; grid, ranks) + MPI.Initialized() || error("Must call MPI.Init() before constructing a MultiCPU.") + + validate_tupled_argument(ranks, Int, "ranks") + + Rx, Ry, Rz = ranks + total_ranks = Rx*Ry*Rz + + comm = MPI.COMM_WORLD + + mpi_ranks = MPI.Comm_size(comm) + my_rank = MPI.Comm_rank(comm) + + i, j, k = my_index = rank2index(my_rank, Rx, Ry, Rz) + + if total_ranks != mpi_ranks + throw(ArgumentError("ranks=($Rx, $Ry, $Rz) [$total_ranks total] inconsistent " * + "with number of MPI ranks: $mpi_ranks. Exiting with return code 1.")) + MPI.Finalize() + exit(code=1) + end + + comm = MPI.COMM_WORLD + + my_connectivity = RankConnectivity(my_index, ranks, topology(grid)) + + return MultiCPU(my_rank, my_index, ranks, my_connectivity) +end + +##### +##### Pretty printing +##### + +function Base.show(io::IO, arch::MultiCPU) + c = arch.connectivity + print(io, "MultiCPU architecture (rank $(arch.my_rank)/$(prod(arch.ranks))) [index $(arch.my_index) / $(arch.ranks)]\n", + "└── connectivity:", + isnothing(c.east) ? "" : " east=$(c.east)", + isnothing(c.west) ? "" : " west=$(c.west)", + isnothing(c.north) ? "" : " north=$(c.north)", + isnothing(c.south) ? "" : " south=$(c.south)", + isnothing(c.top) ? "" : " top=$(c.top)", + isnothing(c.bottom) ? "" : " bottom=$(c.bottom)") +end diff --git a/src/Distributed/distributed_model.jl b/src/Distributed/distributed_model.jl index 21f7b5159b..24ff6b5f8a 100644 --- a/src/Distributed/distributed_model.jl +++ b/src/Distributed/distributed_model.jl @@ -5,7 +5,6 @@ using Oceananigans.Architectures using Oceananigans.Grids using KernelAbstractions: @kernel, @index, Event, MultiEvent -using Oceananigans.Grids: validate_tupled_argument using Oceananigans.BoundaryConditions: BCType import Oceananigans.BoundaryConditions: @@ -14,101 +13,8 @@ import Oceananigans.BoundaryConditions: fill_west_halo!, fill_east_halo!, fill_south_halo!, fill_north_halo!, fill_bottom_halo!, fill_top_halo! -##### -##### Architecture stuff -##### - -# TODO: Put connectivity inside architecture? MPI should be initialize so you can construct it in there. -# Might have to make it MultiCPU(; grid, ranks) - -abstract type AbstractMultiArchitecture <: AbstractArchitecture end - -struct MultiCPU{R} <: AbstractMultiArchitecture - ranks :: R -end - -MultiCPU(; ranks) = MultiCPU(ranks) - -child_architecture(::MultiCPU) = CPU() - -##### -##### Converting between index and MPI rank taking k as the fast index -##### +include("distributed_architectures.jl") -@inline index2rank(i, j, k, Rx, Ry, Rz) = (i-1)*Ry*Rz + (j-1)*Rz + (k-1) - -@inline function rank2index(r, Rx, Ry, Rz) - i = div(r, Ry*Rz) - r -= i*Ry*Rz - j = div(r, Rz) - k = mod(r, Rz) - return i+1, j+1, k+1 -end - -##### -##### Rank connectivity graph -##### - -struct RankConnectivity{E, W, N, S, T, B} - east :: E - west :: W - north :: N - south :: S - top :: T - bottom :: B -end - -RankConnectivity(; east, west, north, south, top, bottom) = - RankConnectivity(east, west, north, south, top, bottom) - -function increment_index(i, R, topo) - R == 1 && return nothing - if i+1 > R - if topo == Periodic - return 1 - else - return nothing - end - else - return i+1 - end -end - -function decrement_index(i, R, topo) - R == 1 && return nothing - if i-1 < 1 - if topo == Periodic - return R - else - return nothing - end - else - return i-1 - end -end - -function RankConnectivity(model_index, ranks, topology) - i, j, k = model_index - Rx, Ry, Rz = ranks - TX, TY, TZ = topology - - i_east = increment_index(i, Rx, TX) - i_west = decrement_index(i, Rx, TX) - j_north = increment_index(j, Ry, TY) - j_south = decrement_index(j, Ry, TY) - k_top = increment_index(k, Rz, TZ) - k_bot = decrement_index(k, Rz, TZ) - - r_east = isnothing(i_east) ? nothing : index2rank(i_east, j, k, Rx, Ry, Rz) - r_west = isnothing(i_west) ? nothing : index2rank(i_west, j, k, Rx, Ry, Rz) - r_north = isnothing(j_north) ? nothing : index2rank(i, j_north, k, Rx, Ry, Rz) - r_south = isnothing(j_south) ? nothing : index2rank(i, j_south, k, Rx, Ry, Rz) - r_top = isnothing(k_top) ? nothing : index2rank(i, j, k_top, Rx, Ry, Rz) - r_bot = isnothing(k_bot) ? nothing : index2rank(i, j, k_bot, Rx, Ry, Rz) - - return RankConnectivity(east=r_east, west=r_west, north=r_north, - south=r_south, top=r_top, bottom=r_bot) -end ##### ##### Halo communication boundary condition @@ -278,8 +184,6 @@ function fill_east_and_west_halos!(c, east_bc::HaloCommunicationBC, west_bc::Hal send_req1 = MPI.Isend(send_buffer1, rank_to_send_to1, send_tag1, MPI.COMM_WORLD) send_req2 = MPI.Isend(send_buffer2, rank_to_send_to2, send_tag2, MPI.COMM_WORLD) - ### - rank_to_recv_from1 = east_bc.condition.to rank_to_recv_from2 = west_bc.condition.to @@ -316,18 +220,20 @@ end ##### Distributed model struct and constructor ##### -struct DistributedModel{A, I, M, R, G} +# TODO: add the full grid! + +struct DistributedModel{A, M} architecture :: A - index :: I - ranks :: R model :: M - connectivity :: G end function DistributedModel(; architecture, grid, boundary_conditions=nothing, model_kwargs...) - ranks = architecture.ranks + my_rank = architecture.my_rank + i, j, k = architecture.my_index + Rx, Ry, Rz = architecture.ranks + my_connectivity = architecture.connectivity - validate_tupled_argument(ranks, Int, "ranks") + ## Construct local grid Nx, Ny, Nz = size(grid) @@ -337,28 +243,6 @@ function DistributedModel(; architecture, grid, boundary_conditions=nothing, mod zL, zR = grid.zF[1], grid.zF[Nz+1] Lx, Ly, Lz = length(grid) - Rx, Ry, Rz = ranks - total_ranks = Rx*Ry*Rz - - comm = MPI.COMM_WORLD - - mpi_ranks = MPI.Comm_size(comm) - my_rank = MPI.Comm_rank(comm) - - if total_ranks != mpi_ranks - throw(ArgumentError("ranks=($Rx, $Ry, $Rz) [$total_ranks total] inconsistent " * - "with number of MPI ranks: $mpi_ranks. Exiting with return code 1.")) - MPI.Finalize() - exit(code=1) - end - - i, j, k = index = rank2index(my_rank, Rx, Ry, Rz) - @info "My rank: $my_rank, my index: $index" - - ##### - ##### Construct local grid - ##### - # Make sure we can put an integer number of grid points in each rank. @assert isinteger(Nx / Rx) @assert isinteger(Ny / Ry) @@ -371,19 +255,9 @@ function DistributedModel(; architecture, grid, boundary_conditions=nothing, mod y₁, y₂ = yL + (j-1)*ly, yL + j*ly z₁, z₂ = zL + (k-1)*lz, zL + k*lz - @info "Constructing local grid: n=($nx, $ny, $nz), x ∈ [$x₁, $x₂], y ∈ [$y₁, $y₂], z ∈ [$z₁, $z₂]" my_grid = RegularCartesianGrid(topology=topology(grid), size=(nx, ny, nz), x=(x₁, x₂), y=(y₁, y₂), z=(z₁, z₂)) - ##### - ##### Construct local connectivity - ##### - - my_connectivity = RankConnectivity(index, ranks, topology(grid)) - @info "Local connectivity: $my_connectivity" - - ##### - ##### Change appropriate boundary conditions to halo communication BCs - ##### + ## Change appropriate boundary conditions to halo communication BCs # FIXME: Stop assuming (u, v, w, T, S). @@ -397,8 +271,6 @@ function DistributedModel(; architecture, grid, boundary_conditions=nothing, mod S = haskey(bcs, :S) ? bcs.S : TracerBoundaryConditions(grid) ) - @debug "Injecting halo communication boundary conditions..." - communicative_bcs = ( u = inject_halo_communication_boundary_conditions(bcs.u, my_rank, my_connectivity), v = inject_halo_communication_boundary_conditions(bcs.v, my_rank, my_connectivity), @@ -407,9 +279,7 @@ function DistributedModel(; architecture, grid, boundary_conditions=nothing, mod S = inject_halo_communication_boundary_conditions(bcs.S, my_rank, my_connectivity) ) - ##### - ##### Construct local model - ##### + ## Construct local model my_model = IncompressibleModel(; architecture = child_architecture(architecture), @@ -418,9 +288,5 @@ function DistributedModel(; architecture, grid, boundary_conditions=nothing, mod model_kwargs... ) - return DistributedModel(architecture, index, ranks, my_model, my_connectivity) -end - -function Base.show(io::IO, dm::DistributedModel) - print(io, "DistributedModel with $(dm.ranks) ranks") + return DistributedModel(architecture, my_model) end From a04c583baba25f719cae4bbf4f5a7fcbe53d781a Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Fri, 5 Feb 2021 19:33:48 -0500 Subject: [PATCH 049/100] Modular tests --- src/Distributed/test_distributed_model.jl | 197 ++++++++++++++++------ 1 file changed, 143 insertions(+), 54 deletions(-) diff --git a/src/Distributed/test_distributed_model.jl b/src/Distributed/test_distributed_model.jl index 4e515b6631..1653b284b5 100644 --- a/src/Distributed/test_distributed_model.jl +++ b/src/Distributed/test_distributed_model.jl @@ -11,17 +11,19 @@ comm = MPI.COMM_WORLD mpi_ranks = MPI.Comm_size(comm) @assert mpi_ranks == 4 -function test_triply_periodic_connectivity_with_411_ranks() +##### +##### Multi architectures and rank connectivity +##### + +function run_triply_periodic_rank_connectivity_tests_with_411_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) - arch = MultiCPU(ranks=(4, 1, 1)) - dm = DistributedModel(architecture=arch, grid=full_grid) + arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) - @test my_rank == index2rank(dm.index..., dm.ranks...) + @test my_rank == index2rank(arch.my_index..., arch.ranks...) - model = dm.model - connectivity = dm.connectivity + connectivity = arch.connectivity # No communication in y and z. @test isnothing(connectivity.south) @@ -43,35 +45,18 @@ function test_triply_periodic_connectivity_with_411_ranks() @test connectivity.west == 2 end - nx, ny, nz = size(model.grid) - @test model.grid.xF[1] == 0.25*my_rank - @test model.grid.xF[nx+1] == 0.25*(my_rank+1) - @test model.grid.yF[1] == 0 - @test model.grid.yF[ny+1] == 2 - @test model.grid.zF[1] == -3 - @test model.grid.zF[nz+1] == 0 - - for field in fields(model) - @test field.boundary_conditions.east isa HaloCommunicationBC - @test field.boundary_conditions.west isa HaloCommunicationBC - @test !isa(field.boundary_conditions.north, HaloCommunicationBC) - @test !isa(field.boundary_conditions.south, HaloCommunicationBC) - @test !isa(field.boundary_conditions.top, HaloCommunicationBC) - @test !isa(field.boundary_conditions.bottom, HaloCommunicationBC) - end + return nothing end -function test_triply_periodic_connectivity_with_141_ranks() +function run_triply_periodic_rank_connectivity_tests_with_141_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) - arch = MultiCPU(ranks=(1, 4, 1)) - dm = DistributedModel(architecture=arch, grid=full_grid) + arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) - @test my_rank == index2rank(dm.index..., dm.ranks...) + @test my_rank == index2rank(arch.my_index..., arch.ranks...) - model = dm.model - connectivity = dm.connectivity + connectivity = arch.connectivity # No communication in x and z. @test isnothing(connectivity.east) @@ -93,35 +78,18 @@ function test_triply_periodic_connectivity_with_141_ranks() @test connectivity.south == 2 end - nx, ny, nz = size(model.grid) - @test model.grid.xF[1] == 0 - @test model.grid.xF[nx+1] == 1 - @test model.grid.yF[1] == 0.5*my_rank - @test model.grid.yF[ny+1] == 0.5*(my_rank+1) - @test model.grid.zF[1] == -3 - @test model.grid.zF[nz+1] == 0 - - for field in fields(model) - @test !isa(field.boundary_conditions.east, HaloCommunicationBC) - @test !isa(field.boundary_conditions.west, HaloCommunicationBC) - @test field.boundary_conditions.north isa HaloCommunicationBC - @test field.boundary_conditions.south isa HaloCommunicationBC - @test !isa(field.boundary_conditions.top, HaloCommunicationBC) - @test !isa(field.boundary_conditions.bottom, HaloCommunicationBC) - end + return nothing end -function test_triply_periodic_connectivity_with_114_ranks() +function run_triply_periodic_rank_connectivity_tests_with_114_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) - arch = MultiCPU(ranks=(1, 1, 4)) - dm = DistributedModel(architecture=arch, grid=full_grid) + arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) - @test my_rank == index2rank(dm.index..., dm.ranks...) + @test my_rank == index2rank(arch.my_index..., arch.ranks...) - model = dm.model - connectivity = dm.connectivity + connectivity = arch.connectivity # No communication in x and y. @test isnothing(connectivity.east) @@ -143,6 +111,62 @@ function test_triply_periodic_connectivity_with_114_ranks() @test connectivity.bottom == 2 end + return nothing +end + +##### +##### Local grids for distributed models +##### + +function run_triply_periodic_local_grid_tests_with_411_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) + dm = DistributedModel(architecture=arch, grid=full_grid) + + my_rank = MPI.Comm_rank(MPI.COMM_WORLD) + + model = dm.model + nx, ny, nz = size(model.grid) + @test model.grid.xF[1] == 0.25*my_rank + @test model.grid.xF[nx+1] == 0.25*(my_rank+1) + @test model.grid.yF[1] == 0 + @test model.grid.yF[ny+1] == 2 + @test model.grid.zF[1] == -3 + @test model.grid.zF[nz+1] == 0 + + return nothing +end + +function run_triply_periodic_local_grid_tests_with_141_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) + dm = DistributedModel(architecture=arch, grid=full_grid) + + my_rank = MPI.Comm_rank(MPI.COMM_WORLD) + + model = dm.model + nx, ny, nz = size(model.grid) + @test model.grid.xF[1] == 0 + @test model.grid.xF[nx+1] == 1 + @test model.grid.yF[1] == 0.5*my_rank + @test model.grid.yF[ny+1] == 0.5*(my_rank+1) + @test model.grid.zF[1] == -3 + @test model.grid.zF[nz+1] == 0 + + return nothing +end + +function run_triply_periodic_local_grid_tests_with_114_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) + dm = DistributedModel(architecture=arch, grid=full_grid) + + my_rank = MPI.Comm_rank(MPI.COMM_WORLD) + + model = dm.model nx, ny, nz = size(model.grid) @test model.grid.xF[1] == 0 @test model.grid.xF[nx+1] == 1 @@ -151,7 +175,52 @@ function test_triply_periodic_connectivity_with_114_ranks() @test model.grid.zF[1] == -3 + 0.75*my_rank @test model.grid.zF[nz+1] == -3 + 0.75*(my_rank+1) - for field in fields(model) + return nothing +end + +##### +##### +##### + +function run_triply_periodic_bc_injection_tests_with_411_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) + dm = DistributedModel(architecture=arch, grid=full_grid) + + for field in fields(dm.model) + @test field.boundary_conditions.east isa HaloCommunicationBC + @test field.boundary_conditions.west isa HaloCommunicationBC + @test !isa(field.boundary_conditions.north, HaloCommunicationBC) + @test !isa(field.boundary_conditions.south, HaloCommunicationBC) + @test !isa(field.boundary_conditions.top, HaloCommunicationBC) + @test !isa(field.boundary_conditions.bottom, HaloCommunicationBC) + end +end + +function run_triply_periodic_bc_injection_tests_with_141_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) + dm = DistributedModel(architecture=arch, grid=full_grid) + + for field in fields(dm.model) + @test !isa(field.boundary_conditions.east, HaloCommunicationBC) + @test !isa(field.boundary_conditions.west, HaloCommunicationBC) + @test field.boundary_conditions.north isa HaloCommunicationBC + @test field.boundary_conditions.south isa HaloCommunicationBC + @test !isa(field.boundary_conditions.top, HaloCommunicationBC) + @test !isa(field.boundary_conditions.bottom, HaloCommunicationBC) + end +end + +function run_triply_periodic_bc_injection_tests_with_114_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) + dm = DistributedModel(architecture=arch, grid=full_grid) + + for field in fields(dm.model) @test !isa(field.boundary_conditions.east, HaloCommunicationBC) @test !isa(field.boundary_conditions.west, HaloCommunicationBC) @test !isa(field.boundary_conditions.north, HaloCommunicationBC) @@ -162,9 +231,29 @@ function test_triply_periodic_connectivity_with_114_ranks() end @testset "Distributed MPI Oceananigans" begin - test_triply_periodic_connectivity_with_411_ranks() - test_triply_periodic_connectivity_with_141_ranks() - test_triply_periodic_connectivity_with_114_ranks() + @info "Testing distributed MPI Oceananigans..." + + @testset "Multi architectures rank connectivity" begin + @info " Testing multi architecture rank connectivity..." + run_triply_periodic_rank_connectivity_tests_with_411_ranks() + run_triply_periodic_rank_connectivity_tests_with_141_ranks() + run_triply_periodic_rank_connectivity_tests_with_114_ranks() + end + + @testset "Local grids for distributed models" begin + @info " Testing local grids for distributed models..." + run_triply_periodic_local_grid_tests_with_411_ranks() + run_triply_periodic_local_grid_tests_with_141_ranks() + run_triply_periodic_local_grid_tests_with_114_ranks() + end + + @testset "Injection of halo communication BCs" begin + @info " Testing injection of halo communication BCs..." + run_triply_periodic_bc_injection_tests_with_411_ranks() + run_triply_periodic_bc_injection_tests_with_141_ranks() + run_triply_periodic_bc_injection_tests_with_114_ranks() + end + # TODO: 221 ranks # TODO: triply bounded end From bb9ab679fbef9b4a2e458c8e4a8d146f751a5f82 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Fri, 5 Feb 2021 19:48:36 -0500 Subject: [PATCH 050/100] Need more files --- src/Distributed/distributed_architectures.jl | 2 + src/Distributed/distributed_model.jl | 223 +------------------ src/Distributed/halo_communication.jl | 151 +++++++++++++ src/Distributed/halo_communication_bcs.jl | 54 +++++ 4 files changed, 214 insertions(+), 216 deletions(-) create mode 100644 src/Distributed/halo_communication.jl create mode 100644 src/Distributed/halo_communication_bcs.jl diff --git a/src/Distributed/distributed_architectures.jl b/src/Distributed/distributed_architectures.jl index 400cab3908..796ef3a708 100644 --- a/src/Distributed/distributed_architectures.jl +++ b/src/Distributed/distributed_architectures.jl @@ -1,3 +1,5 @@ +using Oceananigans.Architectures + using Oceananigans.Grids: validate_tupled_argument # TODO: Put connectivity inside architecture? MPI should be initialize so you can construct it in there. diff --git a/src/Distributed/distributed_model.jl b/src/Distributed/distributed_model.jl index 24ff6b5f8a..2a5c06593c 100644 --- a/src/Distributed/distributed_model.jl +++ b/src/Distributed/distributed_model.jl @@ -1,229 +1,19 @@ -import MPI +using MPI using Oceananigans -using Oceananigans.Architectures using Oceananigans.Grids -using KernelAbstractions: @kernel, @index, Event, MultiEvent -using Oceananigans.BoundaryConditions: BCType - -import Oceananigans.BoundaryConditions: - bctype_str, print_condition, - fill_halo_regions!, - fill_west_halo!, fill_east_halo!, fill_south_halo!, - fill_north_halo!, fill_bottom_halo!, fill_top_halo! - include("distributed_architectures.jl") - - -##### -##### Halo communication boundary condition -##### - -struct HaloCommunication <: BCType end - -HaloCommunicationBC = BoundaryCondition{<:HaloCommunication} - -bctype_str(::HaloCommunicationBC) ="HaloCommunication" - -HaloCommunicationBoundaryCondition(val; kwargs...) = BoundaryCondition(HaloCommunication, val; kwargs...) - -struct HaloCommunicationRanks{F, T} - from :: F - to :: T -end - -HaloCommunicationRanks(; from, to) = HaloCommunicationRanks(from, to) - -print_condition(hcr::HaloCommunicationRanks) = "(from rank $(hcr.from), to rank $(hcr.to))" - -function inject_halo_communication_boundary_conditions(field_bcs, my_rank, connectivity) - rank_east = connectivity.east - rank_west = connectivity.west - rank_north = connectivity.north - rank_south = connectivity.south - rank_top = connectivity.top - rank_bottom = connectivity.bottom - - east_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_east) - west_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_west) - north_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_north) - south_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_south) - top_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_top) - bottom_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_bottom) - - east_comm_bc = HaloCommunicationBoundaryCondition(east_comm_ranks) - west_comm_bc = HaloCommunicationBoundaryCondition(west_comm_ranks) - north_comm_bc = HaloCommunicationBoundaryCondition(north_comm_ranks) - south_comm_bc = HaloCommunicationBoundaryCondition(south_comm_ranks) - top_comm_bc = HaloCommunicationBoundaryCondition(top_comm_ranks) - bottom_comm_bc = HaloCommunicationBoundaryCondition(bottom_comm_ranks) - - x_bcs = CoordinateBoundaryConditions(isnothing(rank_west) ? field_bcs.west : west_comm_bc, - isnothing(rank_east) ? field_bcs.east : east_comm_bc) - - y_bcs = CoordinateBoundaryConditions(isnothing(rank_south) ? field_bcs.south : south_comm_bc, - isnothing(rank_north) ? field_bcs.north : north_comm_bc) - - z_bcs = CoordinateBoundaryConditions(isnothing(rank_bottom) ? field_bcs.bottom : bottom_comm_bc, - isnothing(rank_top) ? field_bcs.top : top_comm_bc) - - return FieldBoundaryConditions(x_bcs, y_bcs, z_bcs) -end - -##### -##### MPI tags for halo communication BCs -##### - -sides = (:west, :east, :south, :north, :top, :bottom) - -side_id = Dict( - :east => 1, :west => 2, - :north => 3, :south => 4, - :top => 5, :bottom => 6 -) - -opposite_side = Dict( - :east => :west, :west => :east, - :north => :south, :south => :north, - :top => :bottom, :bottom => :top -) - -# Unfortunately can't call MPI.Comm_size(MPI.COMM_WORLD) before MPI.Init(). -const MAX_RANKS = 10^3 -RANK_DIGITS = 3 - -# Define functions that return unique send and recv MPI tags for each side. -for side in sides - side_str = string(side) - send_tag_fn_name = Symbol(side, :_send_tag) - recv_tag_fn_name = Symbol(side, :_recv_tag) - @eval begin - function $send_tag_fn_name(bc) - from_digits = string(bc.condition.from, pad=RANK_DIGITS) - to_digits = string(bc.condition.to, pad=RANK_DIGITS) - side_digit = string(side_id[Symbol($side_str)]) - return parse(Int, from_digits * to_digits * side_digit) - end - - function $recv_tag_fn_name(bc) - from_digits = string(bc.condition.from, pad=RANK_DIGITS) - to_digits = string(bc.condition.to, pad=RANK_DIGITS) - side_digit = string(side_id[opposite_side[Symbol($side_str)]]) - return parse(Int, to_digits * from_digits * side_digit) - end - end -end - -##### -##### Filling halos for halo communication boundary conditions -##### - -@inline west_send_buffer(c, N, H) = c.parent[N+1:N+H, :, :] -@inline east_send_buffer(c, N, H) = c.parent[1+H:2H, :, :] -@inline south_send_buffer(c, N, H) = c.parent[:, N+1:N+H, :] -@inline north_send_buffer(c, N, H) = c.parent[:, 1+H:2H, :] -@inline top_send_buffer(c, N, H) = c.parent[:, :, 1+H:2H] -@inline bottom_send_buffer(c, N, H) = c.parent[:, :, N+1:N+H] - -@inline west_recv_buffer(grid) = zeros(grid.Hx, grid.Ny + 2grid.Hy, grid.Nz + 2grid.Hz) -@inline south_recv_buffer(grid) = zeros(grid.Nx + 2grid.Hx, grid.Hy, grid.Nz + 2grid.Hz) -@inline top_recv_buffer(grid) = zeros(grid.Nx + 2grid.Hx, grid.Ny + 2grid.Hy, grid.Hz) - -const east_recv_buffer = west_recv_buffer -const north_recv_buffer = south_recv_buffer -const bottom_recv_buffer = top_recv_buffer - -@inline copy_recv_buffer_into_west_halo!(c, N, H, buf) = (c.parent[ 1:H, :, :] .= buf) -@inline copy_recv_buffer_into_east_halo!(c, N, H, buf) = (c.parent[N+H+1:N+2H, :, :] .= buf) -@inline copy_recv_buffer_into_south_halo!(c, N, H, buf) = (c.parent[:, 1:H, :] .= buf) -@inline copy_recv_buffer_into_north_halo!(c, N, H, buf) = (c.parent[:, N+H+1:N+2H, :] .= buf) -@inline copy_recv_buffer_into_bottom_halo!(c, N, H, buf) = (c.parent[:, :, 1:H ] .= buf) -@inline copy_recv_buffer_into_top_halo!(c, N, H, buf) = (c.parent[:, :, N+H+1:N+2H] .= buf) - -function fill_halo_regions!(c::AbstractArray, bcs, arch::AbstractMultiArchitecture, grid, args...) - - barrier = Event(device(child_architecture(arch))) - - east_event, west_event = fill_east_and_west_halos!(c, bcs.east, bcs.west, arch, barrier, grid, args...) - # north_event, south_event = fill_north_and_south_halos!(c, bcs.north, bcs.south, arch, barrier, grid, args...) - # top_event, bottom_event = fill_top_and_bottom_halos!(c, bcs.east, bcs.west, arch, barrier, grid, args...) - - events = [east_event, west_event] # , north_event, south_event, top_event, bottom_event] - events = filter(e -> e isa Event, events) - wait(device(child_architecture(arch)), MultiEvent(Tuple(events))) - - return nothing -end - -function fill_east_and_west_halos!(c, east_bc, west_bc, arch, barrier, grid, args...) - east_event = fill_east_halo!(c, east_bc, child_architecture(arch), barrier, grid, args...) - west_event = fill_west_halo!(c, west_bc, child_architecture(arch), barrier, grid, args...) - return east_event, west_event -end - -function fill_east_and_west_halos!(c, east_bc::HaloCommunicationBC, west_bc::HaloCommunicationBC, arch, barrier, grid, args...) - # 1 -> send east halo to eastern rank and fill east halo from eastern rank's west halo. - # 2 -> send west halo to western rank and fill west halo from western rank's east halo. - - @assert east_bc.condition.from == west_bc.condition.from - my_rank = east_bc.condition.from - - rank_to_send_to1 = east_bc.condition.to - rank_to_send_to2 = west_bc.condition.to - - send_buffer1 = east_send_buffer(c, grid.Nx, grid.Hx) - send_buffer2 = west_send_buffer(c, grid.Nx, grid.Hx) - - send_tag1 = east_send_tag(east_bc) - send_tag2 = west_send_tag(west_bc) - - @info "MPI.Isend: my_rank=$my_rank, rank_to_send_to=$rank_to_send_to1, send_tag=$send_tag1" - @info "MPI.Isend: my_rank=$my_rank, rank_to_send_to=$rank_to_send_to2, send_tag=$send_tag2" - - send_req1 = MPI.Isend(send_buffer1, rank_to_send_to1, send_tag1, MPI.COMM_WORLD) - send_req2 = MPI.Isend(send_buffer2, rank_to_send_to2, send_tag2, MPI.COMM_WORLD) - - rank_to_recv_from1 = east_bc.condition.to - rank_to_recv_from2 = west_bc.condition.to - - recv_buffer1 = east_recv_buffer(grid) - recv_buffer2 = west_recv_buffer(grid) - - recv_tag1 = east_recv_tag(east_bc) - recv_tag2 = west_recv_tag(west_bc) - - @info "MPI.Recv!: my_rank=$my_rank, rank_to_recv_from=$rank_to_recv_from1, recv_tag=$recv_tag1" - @info "MPI.Recv!: my_rank=$my_rank, rank_to_recv_from=$rank_to_recv_from2, recv_tag=$recv_tag2" - - MPI.Recv!(recv_buffer1, rank_to_recv_from1, recv_tag1, MPI.COMM_WORLD) - MPI.Recv!(recv_buffer2, rank_to_recv_from2, recv_tag2, MPI.COMM_WORLD) - - @info "Communication done!" - - copy_recv_buffer_into_east_halo!(c, grid.Nx, grid.Hx, recv_buffer1) - copy_recv_buffer_into_west_halo!(c, grid.Nx, grid.Hx, recv_buffer2) - - # @info "Sendrecv!: my_rank=$my_rank, rank_send_to=rank_recv_from=$rank_send_to, " * - # "send_tag=$send_tag, recv_tag=$recv_tag" - # - # MPI.Sendrecv!(send_buffer, rank_send_to, send_tag, - # recv_buffer, rank_recv_from, recv_tag, - # MPI.COMM_WORLD) - # - # @info "Sendrecv!: my_rank=$my_rank done!" - - return nothing, nothing -end +include("halo_communication_bcs.jl") +include("halo_communication.jl") ##### ##### Distributed model struct and constructor ##### -# TODO: add the full grid! - -struct DistributedModel{A, M} +struct DistributedModel{A, G, M} architecture :: A + grid :: G model :: M end @@ -255,6 +45,7 @@ function DistributedModel(; architecture, grid, boundary_conditions=nothing, mod y₁, y₂ = yL + (j-1)*ly, yL + j*ly z₁, z₂ = zL + (k-1)*lz, zL + k*lz + # FIXME: local grid might have different topology! my_grid = RegularCartesianGrid(topology=topology(grid), size=(nx, ny, nz), x=(x₁, x₂), y=(y₁, y₂), z=(z₁, z₂)) ## Change appropriate boundary conditions to halo communication BCs @@ -288,5 +79,5 @@ function DistributedModel(; architecture, grid, boundary_conditions=nothing, mod model_kwargs... ) - return DistributedModel(architecture, my_model) + return DistributedModel(architecture, grid, my_model) end diff --git a/src/Distributed/halo_communication.jl b/src/Distributed/halo_communication.jl new file mode 100644 index 0000000000..68454c90dc --- /dev/null +++ b/src/Distributed/halo_communication.jl @@ -0,0 +1,151 @@ +using KernelAbstractions: @kernel, @index, Event, MultiEvent + +import Oceananigans.BoundaryConditions: + fill_halo_regions!, + fill_west_halo!, fill_east_halo!, fill_south_halo!, + fill_north_halo!, fill_bottom_halo!, fill_top_halo! + +##### +##### MPI tags for halo communication BCs +##### + +sides = (:west, :east, :south, :north, :top, :bottom) + +side_id = Dict( + :east => 1, :west => 2, + :north => 3, :south => 4, + :top => 5, :bottom => 6 +) + +opposite_side = Dict( + :east => :west, :west => :east, + :north => :south, :south => :north, + :top => :bottom, :bottom => :top +) + +# Unfortunately can't call MPI.Comm_size(MPI.COMM_WORLD) before MPI.Init(). +const MAX_RANKS = 10^3 +RANK_DIGITS = 3 + +# Define functions that return unique send and recv MPI tags for each side. +for side in sides + side_str = string(side) + send_tag_fn_name = Symbol(side, :_send_tag) + recv_tag_fn_name = Symbol(side, :_recv_tag) + @eval begin + function $send_tag_fn_name(bc) + from_digits = string(bc.condition.from, pad=RANK_DIGITS) + to_digits = string(bc.condition.to, pad=RANK_DIGITS) + side_digit = string(side_id[Symbol($side_str)]) + return parse(Int, from_digits * to_digits * side_digit) + end + + function $recv_tag_fn_name(bc) + from_digits = string(bc.condition.from, pad=RANK_DIGITS) + to_digits = string(bc.condition.to, pad=RANK_DIGITS) + side_digit = string(side_id[opposite_side[Symbol($side_str)]]) + return parse(Int, to_digits * from_digits * side_digit) + end + end +end + +##### +##### Filling halos for halo communication boundary conditions +##### + +@inline west_send_buffer(c, N, H) = c.parent[N+1:N+H, :, :] +@inline east_send_buffer(c, N, H) = c.parent[1+H:2H, :, :] +@inline south_send_buffer(c, N, H) = c.parent[:, N+1:N+H, :] +@inline north_send_buffer(c, N, H) = c.parent[:, 1+H:2H, :] +@inline top_send_buffer(c, N, H) = c.parent[:, :, 1+H:2H] +@inline bottom_send_buffer(c, N, H) = c.parent[:, :, N+1:N+H] + +@inline west_recv_buffer(grid) = zeros(grid.Hx, grid.Ny + 2grid.Hy, grid.Nz + 2grid.Hz) +@inline south_recv_buffer(grid) = zeros(grid.Nx + 2grid.Hx, grid.Hy, grid.Nz + 2grid.Hz) +@inline top_recv_buffer(grid) = zeros(grid.Nx + 2grid.Hx, grid.Ny + 2grid.Hy, grid.Hz) + +const east_recv_buffer = west_recv_buffer +const north_recv_buffer = south_recv_buffer +const bottom_recv_buffer = top_recv_buffer + +@inline copy_recv_buffer_into_west_halo!(c, N, H, buf) = (c.parent[ 1:H, :, :] .= buf) +@inline copy_recv_buffer_into_east_halo!(c, N, H, buf) = (c.parent[N+H+1:N+2H, :, :] .= buf) +@inline copy_recv_buffer_into_south_halo!(c, N, H, buf) = (c.parent[:, 1:H, :] .= buf) +@inline copy_recv_buffer_into_north_halo!(c, N, H, buf) = (c.parent[:, N+H+1:N+2H, :] .= buf) +@inline copy_recv_buffer_into_bottom_halo!(c, N, H, buf) = (c.parent[:, :, 1:H ] .= buf) +@inline copy_recv_buffer_into_top_halo!(c, N, H, buf) = (c.parent[:, :, N+H+1:N+2H] .= buf) + +function fill_halo_regions!(c::AbstractArray, bcs, arch::AbstractMultiArchitecture, grid, args...) + + barrier = Event(device(child_architecture(arch))) + + east_event, west_event = fill_east_and_west_halos!(c, bcs.east, bcs.west, arch, barrier, grid, args...) + # north_event, south_event = fill_north_and_south_halos!(c, bcs.north, bcs.south, arch, barrier, grid, args...) + # top_event, bottom_event = fill_top_and_bottom_halos!(c, bcs.east, bcs.west, arch, barrier, grid, args...) + + events = [east_event, west_event] # , north_event, south_event, top_event, bottom_event] + events = filter(e -> e isa Event, events) + wait(device(child_architecture(arch)), MultiEvent(Tuple(events))) + + return nothing +end + +function fill_east_and_west_halos!(c, east_bc, west_bc, arch, barrier, grid, args...) + east_event = fill_east_halo!(c, east_bc, child_architecture(arch), barrier, grid, args...) + west_event = fill_west_halo!(c, west_bc, child_architecture(arch), barrier, grid, args...) + return east_event, west_event +end + +function fill_east_and_west_halos!(c, east_bc::HaloCommunicationBC, west_bc::HaloCommunicationBC, arch, barrier, grid, args...) + # 1 -> send east halo to eastern rank and fill east halo from eastern rank's west halo. + # 2 -> send west halo to western rank and fill west halo from western rank's east halo. + + @assert east_bc.condition.from == west_bc.condition.from + my_rank = east_bc.condition.from + + rank_to_send_to1 = east_bc.condition.to + rank_to_send_to2 = west_bc.condition.to + + send_buffer1 = east_send_buffer(c, grid.Nx, grid.Hx) + send_buffer2 = west_send_buffer(c, grid.Nx, grid.Hx) + + send_tag1 = east_send_tag(east_bc) + send_tag2 = west_send_tag(west_bc) + + @info "MPI.Isend: my_rank=$my_rank, rank_to_send_to=$rank_to_send_to1, send_tag=$send_tag1" + @info "MPI.Isend: my_rank=$my_rank, rank_to_send_to=$rank_to_send_to2, send_tag=$send_tag2" + + send_req1 = MPI.Isend(send_buffer1, rank_to_send_to1, send_tag1, MPI.COMM_WORLD) + send_req2 = MPI.Isend(send_buffer2, rank_to_send_to2, send_tag2, MPI.COMM_WORLD) + + rank_to_recv_from1 = east_bc.condition.to + rank_to_recv_from2 = west_bc.condition.to + + recv_buffer1 = east_recv_buffer(grid) + recv_buffer2 = west_recv_buffer(grid) + + recv_tag1 = east_recv_tag(east_bc) + recv_tag2 = west_recv_tag(west_bc) + + @info "MPI.Recv!: my_rank=$my_rank, rank_to_recv_from=$rank_to_recv_from1, recv_tag=$recv_tag1" + @info "MPI.Recv!: my_rank=$my_rank, rank_to_recv_from=$rank_to_recv_from2, recv_tag=$recv_tag2" + + MPI.Recv!(recv_buffer1, rank_to_recv_from1, recv_tag1, MPI.COMM_WORLD) + MPI.Recv!(recv_buffer2, rank_to_recv_from2, recv_tag2, MPI.COMM_WORLD) + + @info "Communication done!" + + copy_recv_buffer_into_east_halo!(c, grid.Nx, grid.Hx, recv_buffer1) + copy_recv_buffer_into_west_halo!(c, grid.Nx, grid.Hx, recv_buffer2) + + # @info "Sendrecv!: my_rank=$my_rank, rank_send_to=rank_recv_from=$rank_send_to, " * + # "send_tag=$send_tag, recv_tag=$recv_tag" + # + # MPI.Sendrecv!(send_buffer, rank_send_to, send_tag, + # recv_buffer, rank_recv_from, recv_tag, + # MPI.COMM_WORLD) + # + # @info "Sendrecv!: my_rank=$my_rank done!" + + return nothing, nothing +end \ No newline at end of file diff --git a/src/Distributed/halo_communication_bcs.jl b/src/Distributed/halo_communication_bcs.jl new file mode 100644 index 0000000000..aa21385545 --- /dev/null +++ b/src/Distributed/halo_communication_bcs.jl @@ -0,0 +1,54 @@ +using Oceananigans.BoundaryConditions: BCType + +import Oceananigans.BoundaryConditions: bctype_str, print_condition + +struct HaloCommunication <: BCType end + +HaloCommunicationBC = BoundaryCondition{<:HaloCommunication} + +bctype_str(::HaloCommunicationBC) ="HaloCommunication" + +HaloCommunicationBoundaryCondition(val; kwargs...) = BoundaryCondition(HaloCommunication, val; kwargs...) + +struct HaloCommunicationRanks{F, T} + from :: F + to :: T +end + +HaloCommunicationRanks(; from, to) = HaloCommunicationRanks(from, to) + +print_condition(hcr::HaloCommunicationRanks) = "(from rank $(hcr.from) to rank $(hcr.to))" + +function inject_halo_communication_boundary_conditions(field_bcs, my_rank, connectivity) + rank_east = connectivity.east + rank_west = connectivity.west + rank_north = connectivity.north + rank_south = connectivity.south + rank_top = connectivity.top + rank_bottom = connectivity.bottom + + east_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_east) + west_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_west) + north_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_north) + south_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_south) + top_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_top) + bottom_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_bottom) + + east_comm_bc = HaloCommunicationBoundaryCondition(east_comm_ranks) + west_comm_bc = HaloCommunicationBoundaryCondition(west_comm_ranks) + north_comm_bc = HaloCommunicationBoundaryCondition(north_comm_ranks) + south_comm_bc = HaloCommunicationBoundaryCondition(south_comm_ranks) + top_comm_bc = HaloCommunicationBoundaryCondition(top_comm_ranks) + bottom_comm_bc = HaloCommunicationBoundaryCondition(bottom_comm_ranks) + + x_bcs = CoordinateBoundaryConditions(isnothing(rank_west) ? field_bcs.west : west_comm_bc, + isnothing(rank_east) ? field_bcs.east : east_comm_bc) + + y_bcs = CoordinateBoundaryConditions(isnothing(rank_south) ? field_bcs.south : south_comm_bc, + isnothing(rank_north) ? field_bcs.north : north_comm_bc) + + z_bcs = CoordinateBoundaryConditions(isnothing(rank_bottom) ? field_bcs.bottom : bottom_comm_bc, + isnothing(rank_top) ? field_bcs.top : top_comm_bc) + + return FieldBoundaryConditions(x_bcs, y_bcs, z_bcs) +end \ No newline at end of file From 493e84e189fe5f53a55464a29cab7222c13f0ec0 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Fri, 5 Feb 2021 20:47:09 -0500 Subject: [PATCH 051/100] East/west halo communication passes tests! --- src/Distributed/distributed_model.jl | 6 +++++ src/Distributed/distributed_utils.jl | 20 +++++++++++++++ src/Distributed/test_distributed_model.jl | 30 +++++++++++++++++++++++ 3 files changed, 56 insertions(+) create mode 100644 src/Distributed/distributed_utils.jl diff --git a/src/Distributed/distributed_model.jl b/src/Distributed/distributed_model.jl index 2a5c06593c..c2d02c16d5 100644 --- a/src/Distributed/distributed_model.jl +++ b/src/Distributed/distributed_model.jl @@ -3,6 +3,7 @@ using MPI using Oceananigans using Oceananigans.Grids +include("distributed_utils.jl") include("distributed_architectures.jl") include("halo_communication_bcs.jl") include("halo_communication.jl") @@ -81,3 +82,8 @@ function DistributedModel(; architecture, grid, boundary_conditions=nothing, mod return DistributedModel(architecture, grid, my_model) end + +function Base.show(io::IO, dm::DistributedModel) + print(io, "DistributedModel with ") + print(io, dm.architecture) +end diff --git a/src/Distributed/distributed_utils.jl b/src/Distributed/distributed_utils.jl new file mode 100644 index 0000000000..ecd6879832 --- /dev/null +++ b/src/Distributed/distributed_utils.jl @@ -0,0 +1,20 @@ +using Oceananigans.Grids: left_halo_indices, right_halo_indices +using Oceananigans.Fields: AbstractField + +@inline west_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = + view(f.data, left_halo_indices(LX, topology(f, 1), f.grid.Nx, f.grid.Hx), :, :) + +@inline east_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = + view(f.data, right_halo_indices(LX, topology(f, 1), f.grid.Nx, f.grid.Hx), :, :) + +@inline south_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = + view(f.data, :, left_halo_indices(LY, topology(f, 2), f.grid.Ny, f.grid.Hy), :) + +@inline north_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = + view(f.data, :, right_halo_indices(LY, topology(f, 2), f.grid.Ny, f.grid.Hy), :) + +@inline bottom_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = + view(f.data, :, :, left_halo_indices(LZ, topology(f, 3), f.grid.Nz, f.grid.Hz), :) + +@inline bottom_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = + view(f.data, :, :, right_halo_indices(LZ, topology(f, 3), f.grid.Nz, f.grid.Hz), :) diff --git a/src/Distributed/test_distributed_model.jl b/src/Distributed/test_distributed_model.jl index 1653b284b5..bec9ba4578 100644 --- a/src/Distributed/test_distributed_model.jl +++ b/src/Distributed/test_distributed_model.jl @@ -230,6 +230,31 @@ function run_triply_periodic_bc_injection_tests_with_114_ranks() end end +##### +##### Halo communication +##### + +function run_triply_periodic_halo_communication_tests_with_411_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 1), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) + dm = DistributedModel(architecture=arch, grid=full_grid) + + for field in fields(dm.model) + set!(field, arch.my_rank) + fill_halo_regions!(field, arch) + + @test all(east_halo(u) .== arch.connectivity.east) + @test all(west_halo(u) .== arch.connectivity.west) + end + + return nothing +end + +##### +##### Run tests! +##### + @testset "Distributed MPI Oceananigans" begin @info "Testing distributed MPI Oceananigans..." @@ -254,6 +279,11 @@ end run_triply_periodic_bc_injection_tests_with_114_ranks() end + @testset "Halo communication" begin + @info " Testing halo communication..." + run_triply_periodic_halo_communication_tests_with_411_ranks() + end + # TODO: 221 ranks # TODO: triply bounded end From e117b1384db2ec776e73e16b06a6301207f5cfb8 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Fri, 5 Feb 2021 21:25:36 -0500 Subject: [PATCH 052/100] More modular halo communication --- src/Distributed/halo_communication.jl | 94 ++++++++++++----------- src/Distributed/test_distributed_model.jl | 5 +- 2 files changed, 53 insertions(+), 46 deletions(-) diff --git a/src/Distributed/halo_communication.jl b/src/Distributed/halo_communication.jl index 68454c90dc..521d02387a 100644 --- a/src/Distributed/halo_communication.jl +++ b/src/Distributed/halo_communication.jl @@ -28,23 +28,28 @@ const MAX_RANKS = 10^3 RANK_DIGITS = 3 # Define functions that return unique send and recv MPI tags for each side. +# It's an integer where +# digit 1: the side +# digits 2-4: the from rank +# digits 5-7: the to rank + for side in sides side_str = string(side) send_tag_fn_name = Symbol(side, :_send_tag) recv_tag_fn_name = Symbol(side, :_recv_tag) @eval begin - function $send_tag_fn_name(bc) - from_digits = string(bc.condition.from, pad=RANK_DIGITS) - to_digits = string(bc.condition.to, pad=RANK_DIGITS) + function $send_tag_fn_name(my_rank, rank_to_send_to) + from_digits = string(my_rank, pad=RANK_DIGITS) + to_digits = string(rank_to_send_to, pad=RANK_DIGITS) side_digit = string(side_id[Symbol($side_str)]) return parse(Int, from_digits * to_digits * side_digit) end - function $recv_tag_fn_name(bc) - from_digits = string(bc.condition.from, pad=RANK_DIGITS) - to_digits = string(bc.condition.to, pad=RANK_DIGITS) + function $recv_tag_fn_name(my_rank, rank_to_recv_from) + from_digits = string(rank_to_recv_from, pad=RANK_DIGITS) + to_digits = string(my_rank, pad=RANK_DIGITS) side_digit = string(side_id[opposite_side[Symbol($side_str)]]) - return parse(Int, to_digits * from_digits * side_digit) + return parse(Int, from_digits * to_digits * side_digit) end end end @@ -96,56 +101,57 @@ function fill_east_and_west_halos!(c, east_bc, west_bc, arch, barrier, grid, arg return east_event, west_event end -function fill_east_and_west_halos!(c, east_bc::HaloCommunicationBC, west_bc::HaloCommunicationBC, arch, barrier, grid, args...) - # 1 -> send east halo to eastern rank and fill east halo from eastern rank's west halo. - # 2 -> send west halo to western rank and fill west halo from western rank's east halo. +function send_east_halo(c, grid, my_rank, rank_to_send_to) + send_buffer = east_send_buffer(c, grid.Nx, grid.Hx) + send_tag = east_send_tag(my_rank, rank_to_send_to) - @assert east_bc.condition.from == west_bc.condition.from - my_rank = east_bc.condition.from + @debug "Sending east halo: my_rank=$my_rank, rank_to_send_to=$rank_to_send_to, send_tag=$send_tag" + status = MPI.Isend(send_buffer, rank_to_send_to, send_tag, MPI.COMM_WORLD) - rank_to_send_to1 = east_bc.condition.to - rank_to_send_to2 = west_bc.condition.to + return status +end + +function send_west_halo(c, grid, my_rank, rank_to_send_to) + send_buffer = west_send_buffer(c, grid.Nx, grid.Hx) + send_tag = west_send_tag(my_rank, rank_to_send_to) - send_buffer1 = east_send_buffer(c, grid.Nx, grid.Hx) - send_buffer2 = west_send_buffer(c, grid.Nx, grid.Hx) + @debug "Sending west halo: my_rank=$my_rank, rank_to_send_to=$rank_to_send_to, send_tag=$send_tag" + status = MPI.Isend(send_buffer, rank_to_send_to, send_tag, MPI.COMM_WORLD) - send_tag1 = east_send_tag(east_bc) - send_tag2 = west_send_tag(west_bc) + return status +end - @info "MPI.Isend: my_rank=$my_rank, rank_to_send_to=$rank_to_send_to1, send_tag=$send_tag1" - @info "MPI.Isend: my_rank=$my_rank, rank_to_send_to=$rank_to_send_to2, send_tag=$send_tag2" +function recv_and_fill_east_halo!(c, grid, my_rank, rank_to_recv_from) + recv_buffer = east_recv_buffer(grid) + recv_tag = east_recv_tag(my_rank, rank_to_recv_from) - send_req1 = MPI.Isend(send_buffer1, rank_to_send_to1, send_tag1, MPI.COMM_WORLD) - send_req2 = MPI.Isend(send_buffer2, rank_to_send_to2, send_tag2, MPI.COMM_WORLD) + @debug "Receiving east halo: my_rank=$my_rank, rank_to_recv_from=$rank_to_recv_from, recv_tag=$recv_tag" + MPI.Recv!(recv_buffer, rank_to_recv_from, recv_tag, MPI.COMM_WORLD) - rank_to_recv_from1 = east_bc.condition.to - rank_to_recv_from2 = west_bc.condition.to + copy_recv_buffer_into_east_halo!(c, grid.Nx, grid.Hx, recv_buffer) - recv_buffer1 = east_recv_buffer(grid) - recv_buffer2 = west_recv_buffer(grid) + return nothing +end - recv_tag1 = east_recv_tag(east_bc) - recv_tag2 = west_recv_tag(west_bc) +function recv_and_fill_west_halo!(c, grid, my_rank, rank_to_recv_from) + recv_buffer = west_recv_buffer(grid) + recv_tag = west_recv_tag(my_rank, rank_to_recv_from) - @info "MPI.Recv!: my_rank=$my_rank, rank_to_recv_from=$rank_to_recv_from1, recv_tag=$recv_tag1" - @info "MPI.Recv!: my_rank=$my_rank, rank_to_recv_from=$rank_to_recv_from2, recv_tag=$recv_tag2" + @debug "Receiving west halo: my_rank=$my_rank, rank_to_recv_from=$rank_to_recv_from, recv_tag=$recv_tag" + MPI.Recv!(recv_buffer, rank_to_recv_from, recv_tag, MPI.COMM_WORLD) - MPI.Recv!(recv_buffer1, rank_to_recv_from1, recv_tag1, MPI.COMM_WORLD) - MPI.Recv!(recv_buffer2, rank_to_recv_from2, recv_tag2, MPI.COMM_WORLD) + copy_recv_buffer_into_west_halo!(c, grid.Nx, grid.Hx, recv_buffer) - @info "Communication done!" + return nothing +end - copy_recv_buffer_into_east_halo!(c, grid.Nx, grid.Hx, recv_buffer1) - copy_recv_buffer_into_west_halo!(c, grid.Nx, grid.Hx, recv_buffer2) +function fill_east_and_west_halos!(c, east_bc::HaloCommunicationBC, west_bc::HaloCommunicationBC, arch, barrier, grid, args...) + my_rank = east_bc.condition.from + send_east_halo(c, grid, my_rank, east_bc.condition.to) + send_west_halo(c, grid, my_rank, west_bc.condition.to) - # @info "Sendrecv!: my_rank=$my_rank, rank_send_to=rank_recv_from=$rank_send_to, " * - # "send_tag=$send_tag, recv_tag=$recv_tag" - # - # MPI.Sendrecv!(send_buffer, rank_send_to, send_tag, - # recv_buffer, rank_recv_from, recv_tag, - # MPI.COMM_WORLD) - # - # @info "Sendrecv!: my_rank=$my_rank done!" + recv_and_fill_east_halo!(c, grid, my_rank, east_bc.condition.to) + recv_and_fill_west_halo!(c, grid, my_rank, west_bc.condition.to) return nothing, nothing -end \ No newline at end of file +end diff --git a/src/Distributed/test_distributed_model.jl b/src/Distributed/test_distributed_model.jl index bec9ba4578..77e4d22ae1 100644 --- a/src/Distributed/test_distributed_model.jl +++ b/src/Distributed/test_distributed_model.jl @@ -244,8 +244,8 @@ function run_triply_periodic_halo_communication_tests_with_411_ranks() set!(field, arch.my_rank) fill_halo_regions!(field, arch) - @test all(east_halo(u) .== arch.connectivity.east) - @test all(west_halo(u) .== arch.connectivity.west) + @test all(east_halo(field) .== arch.connectivity.east) + @test all(west_halo(field) .== arch.connectivity.west) end return nothing @@ -279,6 +279,7 @@ end run_triply_periodic_bc_injection_tests_with_114_ranks() end + # TODO: Larger halos! @testset "Halo communication" begin @info " Testing halo communication..." run_triply_periodic_halo_communication_tests_with_411_ranks() From 3cd719ce1c34142b7d3438e31c2efdef314a6258 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Fri, 5 Feb 2021 21:53:07 -0500 Subject: [PATCH 053/100] Send and receive views to avoid memory allocations --- src/Distributed/distributed_utils.jl | 32 +++++++++++--- src/Distributed/halo_communication.jl | 61 +++++++++------------------ 2 files changed, 44 insertions(+), 49 deletions(-) diff --git a/src/Distributed/distributed_utils.jl b/src/Distributed/distributed_utils.jl index ecd6879832..dfd6dc0fbc 100644 --- a/src/Distributed/distributed_utils.jl +++ b/src/Distributed/distributed_utils.jl @@ -1,20 +1,38 @@ -using Oceananigans.Grids: left_halo_indices, right_halo_indices +using Oceananigans.Grids: left_halo_indices, right_halo_indices, underlying_left_halo_indices, underlying_right_halo_indices using Oceananigans.Fields: AbstractField -@inline west_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = +west_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = view(f.data, left_halo_indices(LX, topology(f, 1), f.grid.Nx, f.grid.Hx), :, :) -@inline east_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = +east_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = view(f.data, right_halo_indices(LX, topology(f, 1), f.grid.Nx, f.grid.Hx), :, :) -@inline south_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = +south_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = view(f.data, :, left_halo_indices(LY, topology(f, 2), f.grid.Ny, f.grid.Hy), :) -@inline north_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = +north_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = view(f.data, :, right_halo_indices(LY, topology(f, 2), f.grid.Ny, f.grid.Hy), :) -@inline bottom_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = +bottom_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = view(f.data, :, :, left_halo_indices(LZ, topology(f, 3), f.grid.Nz, f.grid.Hz), :) -@inline bottom_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = +bottom_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = view(f.data, :, :, right_halo_indices(LZ, topology(f, 3), f.grid.Nz, f.grid.Hz), :) + +underlying_west_halo(f, grid, location) = + view(f.parent, underlying_left_halo_indices(location, topology(grid, 1), grid.Nx, grid.Hx), :, :) + +underlying_east_halo(f, grid, location) = + view(f.parent, underlying_right_halo_indices(location, topology(grid, 1), grid.Nx, grid.Hx), :, :) + +underlying_south_halo(f, grid, location) = + view(f.parent, :, underlying_left_halo_indices(location, topology(grid, 2), grid.Ny, grid.Hy), :) + +underlying_north_halo(f, grid, location) = + view(f.parent, :, underlying_right_halo_indices(location, topology(grid, 2), grid.Nz, grid.Hz), :) + +underlying_bottom_halo(f, grid, location) = + view(f.parent, :, :, underlying_left_halo_indices(location, topology(grid, 3), grid.Nz, grid.Hz)) + +underlying_top_halo(f, grid, location) = + view(f.parent, :, :, underlying_right_halo_indices(location, topology(grid, 3), grid.Nz, grid.Hz)) diff --git a/src/Distributed/halo_communication.jl b/src/Distributed/halo_communication.jl index 521d02387a..e773fa9356 100644 --- a/src/Distributed/halo_communication.jl +++ b/src/Distributed/halo_communication.jl @@ -58,33 +58,14 @@ end ##### Filling halos for halo communication boundary conditions ##### -@inline west_send_buffer(c, N, H) = c.parent[N+1:N+H, :, :] -@inline east_send_buffer(c, N, H) = c.parent[1+H:2H, :, :] -@inline south_send_buffer(c, N, H) = c.parent[:, N+1:N+H, :] -@inline north_send_buffer(c, N, H) = c.parent[:, 1+H:2H, :] -@inline top_send_buffer(c, N, H) = c.parent[:, :, 1+H:2H] -@inline bottom_send_buffer(c, N, H) = c.parent[:, :, N+1:N+H] - -@inline west_recv_buffer(grid) = zeros(grid.Hx, grid.Ny + 2grid.Hy, grid.Nz + 2grid.Hz) -@inline south_recv_buffer(grid) = zeros(grid.Nx + 2grid.Hx, grid.Hy, grid.Nz + 2grid.Hz) -@inline top_recv_buffer(grid) = zeros(grid.Nx + 2grid.Hx, grid.Ny + 2grid.Hy, grid.Hz) - -const east_recv_buffer = west_recv_buffer -const north_recv_buffer = south_recv_buffer -const bottom_recv_buffer = top_recv_buffer - -@inline copy_recv_buffer_into_west_halo!(c, N, H, buf) = (c.parent[ 1:H, :, :] .= buf) -@inline copy_recv_buffer_into_east_halo!(c, N, H, buf) = (c.parent[N+H+1:N+2H, :, :] .= buf) -@inline copy_recv_buffer_into_south_halo!(c, N, H, buf) = (c.parent[:, 1:H, :] .= buf) -@inline copy_recv_buffer_into_north_halo!(c, N, H, buf) = (c.parent[:, N+H+1:N+2H, :] .= buf) -@inline copy_recv_buffer_into_bottom_halo!(c, N, H, buf) = (c.parent[:, :, 1:H ] .= buf) -@inline copy_recv_buffer_into_top_halo!(c, N, H, buf) = (c.parent[:, :, N+H+1:N+2H] .= buf) - -function fill_halo_regions!(c::AbstractArray, bcs, arch::AbstractMultiArchitecture, grid, args...) +fill_halo_regions!(field::AbstractField{LX, LY, LZ}, arch::AbstractMultiArchitecture, args...) where {LX, LY, LZ} = + fill_halo_regions!(field.data, field.boundary_conditions, arch, field.grid, (LX, LY, LZ), args...) + +function fill_halo_regions!(c::AbstractArray, bcs, arch::AbstractMultiArchitecture, grid, location, args...) barrier = Event(device(child_architecture(arch))) - east_event, west_event = fill_east_and_west_halos!(c, bcs.east, bcs.west, arch, barrier, grid, args...) + east_event, west_event = fill_east_and_west_halos!(c, bcs.east, bcs.west, arch, barrier, grid, location, args...) # north_event, south_event = fill_north_and_south_halos!(c, bcs.north, bcs.south, arch, barrier, grid, args...) # top_event, bottom_event = fill_top_and_bottom_halos!(c, bcs.east, bcs.west, arch, barrier, grid, args...) @@ -95,14 +76,14 @@ function fill_halo_regions!(c::AbstractArray, bcs, arch::AbstractMultiArchitectu return nothing end -function fill_east_and_west_halos!(c, east_bc, west_bc, arch, barrier, grid, args...) +function fill_east_and_west_halos!(c, east_bc, west_bc, arch, barrier, grid, location, args...) east_event = fill_east_halo!(c, east_bc, child_architecture(arch), barrier, grid, args...) west_event = fill_west_halo!(c, west_bc, child_architecture(arch), barrier, grid, args...) return east_event, west_event end -function send_east_halo(c, grid, my_rank, rank_to_send_to) - send_buffer = east_send_buffer(c, grid.Nx, grid.Hx) +function send_east_halo(c, grid, c_location, my_rank, rank_to_send_to) + send_buffer = underlying_east_halo(c, grid, c_location) send_tag = east_send_tag(my_rank, rank_to_send_to) @debug "Sending east halo: my_rank=$my_rank, rank_to_send_to=$rank_to_send_to, send_tag=$send_tag" @@ -111,8 +92,8 @@ function send_east_halo(c, grid, my_rank, rank_to_send_to) return status end -function send_west_halo(c, grid, my_rank, rank_to_send_to) - send_buffer = west_send_buffer(c, grid.Nx, grid.Hx) +function send_west_halo(c, grid, c_location, my_rank, rank_to_send_to) + send_buffer = underlying_west_halo(c, grid, c_location) send_tag = west_send_tag(my_rank, rank_to_send_to) @debug "Sending west halo: my_rank=$my_rank, rank_to_send_to=$rank_to_send_to, send_tag=$send_tag" @@ -121,37 +102,33 @@ function send_west_halo(c, grid, my_rank, rank_to_send_to) return status end -function recv_and_fill_east_halo!(c, grid, my_rank, rank_to_recv_from) - recv_buffer = east_recv_buffer(grid) +function recv_and_fill_east_halo!(c, grid, c_location, my_rank, rank_to_recv_from) + recv_buffer = underlying_east_halo(c, grid, c_location) recv_tag = east_recv_tag(my_rank, rank_to_recv_from) @debug "Receiving east halo: my_rank=$my_rank, rank_to_recv_from=$rank_to_recv_from, recv_tag=$recv_tag" MPI.Recv!(recv_buffer, rank_to_recv_from, recv_tag, MPI.COMM_WORLD) - copy_recv_buffer_into_east_halo!(c, grid.Nx, grid.Hx, recv_buffer) - return nothing end -function recv_and_fill_west_halo!(c, grid, my_rank, rank_to_recv_from) - recv_buffer = west_recv_buffer(grid) +function recv_and_fill_west_halo!(c, grid, c_location, my_rank, rank_to_recv_from) + recv_buffer = underlying_west_halo(c, grid, c_location) recv_tag = west_recv_tag(my_rank, rank_to_recv_from) @debug "Receiving west halo: my_rank=$my_rank, rank_to_recv_from=$rank_to_recv_from, recv_tag=$recv_tag" MPI.Recv!(recv_buffer, rank_to_recv_from, recv_tag, MPI.COMM_WORLD) - copy_recv_buffer_into_west_halo!(c, grid.Nx, grid.Hx, recv_buffer) - return nothing end -function fill_east_and_west_halos!(c, east_bc::HaloCommunicationBC, west_bc::HaloCommunicationBC, arch, barrier, grid, args...) +function fill_east_and_west_halos!(c, east_bc::HaloCommunicationBC, west_bc::HaloCommunicationBC, arch, barrier, grid, c_location, args...) my_rank = east_bc.condition.from - send_east_halo(c, grid, my_rank, east_bc.condition.to) - send_west_halo(c, grid, my_rank, west_bc.condition.to) + send_east_halo(c, grid, c_location, my_rank, east_bc.condition.to) + send_west_halo(c, grid, c_location, my_rank, west_bc.condition.to) - recv_and_fill_east_halo!(c, grid, my_rank, east_bc.condition.to) - recv_and_fill_west_halo!(c, grid, my_rank, west_bc.condition.to) + recv_and_fill_east_halo!(c, grid, c_location, my_rank, east_bc.condition.to) + recv_and_fill_west_halo!(c, grid, c_location, my_rank, west_bc.condition.to) return nothing, nothing end From 8fcfe01ee832ed7bc408d73030aaefe9f3c39fd0 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Fri, 5 Feb 2021 22:47:01 -0500 Subject: [PATCH 054/100] Nuke super ancient sandbox --- sandbox/tiled_halos.jl | 70 ------------ sandbox/tiled_halos_mpi.jl | 215 ------------------------------------- 2 files changed, 285 deletions(-) delete mode 100644 sandbox/tiled_halos.jl delete mode 100644 sandbox/tiled_halos_mpi.jl diff --git a/sandbox/tiled_halos.jl b/sandbox/tiled_halos.jl deleted file mode 100644 index e2f90babb4..0000000000 --- a/sandbox/tiled_halos.jl +++ /dev/null @@ -1,70 +0,0 @@ -using Oceananigans, Test - -@inline incmod1(a, n) = ifelse(a==n, 1, a + 1) -@inline decmod1(a, n) = ifelse(a==1, n, a - 1) -@inline index2rank(I, J, Mx, My) = J*My + I - -@inline north_halo(tile) = @views @inbounds tile.data[1-tile.grid.Hx:0, :, :] -@inline south_halo(tile) = @views @inbounds tile.data[tile.grid.Nx+1:tile.grid.Nx+tile.grid.Hx, :, :] -@inline west_halo(tile) = @views @inbounds tile.data[:, 1-tile.grid.Hy:0, :] -@inline east_halo(tile) = @views @inbounds tile.data[:, tile.grid.Ny+1:tile.grid.Ny+tile.grid.Hy, :] - -@inline north_data(tile) = @views @inbounds tile.data[1:tile.grid.Hx, :, :] -@inline south_data(tile) = @views @inbounds tile.data[tile.grid.Nx-tile.grid.Hx+1:tile.grid.Nx, :, :] -@inline west_data(tile) = @views @inbounds tile.data[:, 1:tile.grid.Hy, :] -@inline east_data(tile) = @views @inbounds tile.data[:, tile.grid.Ny-tile.grid.Hy+1:tile.grid.Ny, :] - -function fill_halo_regions_tiled!(tiles, Mx, My) - for J in 0:My-1, I in 0:Mx-1 - rank = index2rank(I, J, Mx, My) - - I⁻, I⁺ = mod(I-1, Mx), mod(I+1, Mx) - J⁻, J⁺ = mod(J-1, My), mod(J+1, My) - - north_rank = index2rank(I, J⁻, Mx, My) - south_rank = index2rank(I, J⁺, Mx, My) - east_rank = index2rank(I⁺, J, Mx, My) - west_rank = index2rank(I⁻, J, Mx, My) - - east_halo(tiles[rank+1]) .= west_data(tiles[east_rank+1]) - west_halo(tiles[rank+1]) .= east_data(tiles[west_rank+1]) - north_halo(tiles[rank+1]) .= south_data(tiles[north_rank+1]) - south_halo(tiles[rank+1]) .= north_data(tiles[south_rank+1]) - end -end - -FT, arch = Float64, CPU() - -Nx, Ny, Nz = 16, 16, 16 -Lx, Ly, Lz = 10, 10, 10 -N, L = (Nx, Ny, Nz), (Lx, Ly, Lz) - -grid = RegularCartesianGrid(N, L) - -# MPI ranks along each dimension -Mx, My = 2, 2 - -R = rand(Nx, Ny, Nz) - -tiles = [] -for I in 0:Mx-1, J in 0:My-1 - Nx′, Ny′, Nz′ = Int(Nx/Mx), Int(Ny/My), Nz - Lx′, Ly′, Lz′ = Lx/Mx, Ly/My, Lz - tile_grid = RegularCartesianGrid((Nx′, Ny′, Nz′), (Lx′, Ly′, Lz′)) - - tile = CellField(FT, arch, tile_grid) - - i1, i2 = I*Nx′+1, (I+1)*Nx′ - j1, j2 = J*Ny′+1, (J+1)*Ny′ - data(tile) .= R[i1:i2, j1:j2, :] - - push!(tiles, tile) -end - -fill_halo_regions_tiled!(tiles, Mx, My) -fill_halo_regions_tiled!(tiles, Mx, My) - -@test all(tiles[1].data[1:end, 1:end, :] .== R[1:9, 1:9, :]) -@test all(tiles[2].data[1:end, 0:end-1, :] .== R[1:9, 8:end, :]) -@test all(tiles[3].data[0:end-1, 1:end, :] .== R[8:end, 1:9, :]) -@test all(tiles[4].data[0:end-1, 0:end-1, :] .== R[8:end, 8:end, :]) diff --git a/sandbox/tiled_halos_mpi.jl b/sandbox/tiled_halos_mpi.jl deleted file mode 100644 index 4a01425482..0000000000 --- a/sandbox/tiled_halos_mpi.jl +++ /dev/null @@ -1,215 +0,0 @@ -using Printf - -using CuArrays -import MPI - -using Oceananigans - -# Source: https://github.com/JuliaCI/BenchmarkTools.jl/blob/master/src/trials.jl -function prettytime(t) - if t < 1e3 - value, units = t, "ns" - elseif t < 1e6 - value, units = t / 1e3, "μs" - elseif t < 1e9 - value, units = t / 1e6, "ms" - else - s = t / 1e9 - if s < 60 - value, units = s, "s" - else - value, units = (s / 60), "min" - end - end - return string(@sprintf("%.3f", value), " ", units) -end - -function prettybandwidth(b) - if b < 1024 - val, units = b, "B/s" - elseif b < 1024^2 - val, units = b / 1024, "KiB/s" - elseif b < 1024^3 - val, units = b / 1024^2, "MiB/s" - else - val, units = b / 1024^3, "GiB/s" - end - return string(@sprintf("%.3f", val), " ", units) -end - -@inline index2rank(I, J, Mx, My) = J*My + I -@inline rank2index(r, Mx, My) = mod(r, Mx), div(r, My) - -@inline north_halo(tile) = @views @inbounds tile.data.parent[1:tile.grid.Hx, :, :] -@inline south_halo(tile) = @views @inbounds tile.data.parent[tile.grid.Nx+tile.grid.Hx+1:tile.grid.Nx+2tile.grid.Hx, :, :] -@inline west_halo(tile) = @views @inbounds tile.data.parent[:, 1:tile.grid.Hy, :] -@inline east_halo(tile) = @views @inbounds tile.data.parent[:, tile.grid.Ny+tile.grid.Hy+1:tile.grid.Ny+2tile.grid.Hy, :] - -@inline north_data(tile) = @views @inbounds tile.data.parent[1+tile.grid.Hx:2tile.grid.Hx, :, :] -@inline south_data(tile) = @views @inbounds tile.data.parent[tile.grid.Nx+1:tile.grid.Nx+tile.grid.Hx, :, :] -@inline west_data(tile) = @views @inbounds tile.data.parent[:, 1+tile.grid.Hy:2tile.grid.Hy, :] -@inline east_data(tile) = @views @inbounds tile.data.parent[:, tile.grid.Ny+1:tile.grid.Ny+tile.grid.Hy, :] - -@inline distribute_tag(rank) = 100 + rank -@inline send_west_tag(rank) = 200 + rank -@inline send_east_tag(rank) = 300 + rank -@inline send_north_tag(rank) = 400 + rank -@inline send_south_tag(rank) = 500 + rank - -function send_halo_data(tile, Mx, My, comm) - rank = MPI.Comm_rank(comm) - - I, J = rank2index(rank, Mx, My) - I⁻, I⁺ = mod(I-1, Mx), mod(I+1, Mx) - J⁻, J⁺ = mod(J-1, My), mod(J+1, My) - - north_rank = index2rank(I, J⁻, Mx, My) - south_rank = index2rank(I, J⁺, Mx, My) - east_rank = index2rank(I⁺, J, Mx, My) - west_rank = index2rank(I⁻, J, Mx, My) - - # cuzeros doesn't exist anymore. Use similar! - west_data_buf = zeros(size(west_data(tile))) - east_data_buf = zeros(size(east_data(tile))) - north_data_buf = zeros(size(north_data(tile))) - south_data_buf = zeros(size(south_data(tile))) - - west_data_buf .= copy(west_data(tile)) - east_data_buf .= copy(east_data(tile)) - north_data_buf .= copy(north_data(tile)) - south_data_buf .= copy(south_data(tile)) - - se_req = MPI.Isend(east_data_buf, east_rank, send_east_tag(rank), comm) - sw_req = MPI.Isend(west_data_buf, west_rank, send_west_tag(rank), comm) - sn_req = MPI.Isend(north_data_buf, north_rank, send_north_tag(rank), comm) - ss_req = MPI.Isend(south_data_buf, south_rank, send_south_tag(rank), comm) - - @debug "[rank $rank] sending #$(send_east_tag(rank)) to rank $east_rank" - @debug "[rank $rank] sending #$(send_west_tag(rank)) to rank $west_rank" - @debug "[rank $rank] sending #$(send_north_tag(rank)) to rank $north_rank" - @debug "[rank $rank] sending #$(send_south_tag(rank)) to rank $south_rank" -end - -function receive_halo_data(tile, Mx, My, comm) - rank = MPI.Comm_rank(comm) - - I, J = rank2index(rank, Mx, My) - I⁻, I⁺ = mod(I-1, Mx), mod(I+1, Mx) - J⁻, J⁺ = mod(J-1, My), mod(J+1, My) - - north_rank = index2rank(I, J⁻, Mx, My) - south_rank = index2rank(I, J⁺, Mx, My) - east_rank = index2rank(I⁺, J, Mx, My) - west_rank = index2rank(I⁻, J, Mx, My) - - west_halo_buf = zeros(size(west_halo(tile))) - east_halo_buf = zeros(size(east_halo(tile))) - north_halo_buf = zeros(size(north_halo(tile))) - south_halo_buf = zeros(size(south_halo(tile))) - - re_req = MPI.Irecv!(west_halo_buf, west_rank, send_east_tag(west_rank), comm) - rw_req = MPI.Irecv!(east_halo_buf, east_rank, send_west_tag(east_rank), comm) - rn_req = MPI.Irecv!(south_halo_buf, south_rank, send_north_tag(south_rank), comm) - rs_req = MPI.Irecv!(north_halo_buf, north_rank, send_south_tag(north_rank), comm) - - @debug "[rank $rank] waiting for #$(send_east_tag(west_rank)) from rank $west_rank..." - @debug "[rank $rank] waiting for #$(send_west_tag(east_rank)) from rank $east_rank..." - @debug "[rank $rank] waiting for #$(send_north_tag(south_rank)) from rank $south_rank..." - @debug "[rank $rank] waiting for #$(send_south_tag(north_rank)) from rank $north_rank..." - - MPI.Waitall!([re_req, rw_req, rn_req, rs_req]) - - east_halo(tile) .= CuArray(east_halo_buf) - west_halo(tile) .= CuArray(west_halo_buf) - north_halo(tile) .= CuArray(north_halo_buf) - south_halo(tile) .= CuArray(south_halo_buf) -end - -function fill_halo_regions_mpi!(FT, arch, Nx, Ny, Nz, Mx, My) - Lx, Ly, Lz = 10, 10, 10 - - Nx′, Ny′, Nz′ = Int(Nx/Mx), Int(Ny/My), Nz - Lx′, Ly′, Lz′ = Lx/Mx, Ly/My, Lz - - comm = MPI.COMM_WORLD - - MPI.Barrier(comm) - - rank = MPI.Comm_rank(comm) - R = MPI.Comm_size(comm) - - I, J = rank2index(rank, Mx, My) - I⁻, I⁺ = mod(I-1, Mx), mod(I+1, Mx) - J⁻, J⁺ = mod(J-1, My), mod(J+1, My) - Nx′, Ny′, Nz′ = Int(Nx/Mx), Int(Ny/My), Nz - Lx′, Ly′, Lz′ = Lx/Mx, Ly/My, Lz - - north_rank = index2rank(I, J⁻, Mx, My) - south_rank = index2rank(I, J⁺, Mx, My) - east_rank = index2rank(I⁺, J, Mx, My) - west_rank = index2rank(I⁻, J, Mx, My) - - tile_grid = RegularCartesianGrid((Nx′, Ny′, Nz′), (Lx′, Ly′, Lz′)) - tile = CellField(FT, arch, tile_grid) - - send_reqs = MPI.Request[] - if rank == 0 - rands = rand(Nx, Ny, Nz) - - for r in 1:Mx*My-1 - I′, J′ = rank2index(r, Mx, My) - i1, i2 = I′*Nx′+1, (I′+1)*Nx′ - j1, j2 = J′*Ny′+1, (J′+1)*Ny′ - send_mesg = rands[i1:i2, j1:j2, :] - - println("[rank $rank] Sending rands[$i1:$i2, $j1:$j2, :] to rank $r...") - sreq = MPI.Isend(send_mesg, r, distribute_tag(r), comm) - push!(send_reqs, sreq) - end - - data(tile) .= rands[1:Nx′, 1:Ny′, :] - - MPI.Waitall!(send_reqs) - end - - if rank != 0 - println("[rank $rank] Receiving tile from rank 0...") - recv_mesg = zeros(FT, Nx′, Ny′, Nz′) - rreq = MPI.Irecv!(recv_mesg, 0, distribute_tag(rank), comm) - - stats = MPI.Wait!(rreq) - data(tile) .= recv_mesg - end - - println("[rank $rank] Sending halo data...") - send_halo_data(tile, Mx, My, comm) - - println("[rank $rank] Receiving halo data...") - receive_halo_data(tile, Mx, My, comm) - - MPI.Barrier(comm) - if rank == 0 - tic = time_ns() - end - - println("[rank $rank] Sending halo data...") - send_halo_data(tile, Mx, My, comm) - - println("[rank $rank] Receiving halo data...") - receive_halo_data(tile, Mx, My, comm) - - MPI.Barrier(comm) - if rank == 0 - t = (time_ns() - tic) - ts = t / 1e9 - @info "$R ranks halo communication time: $(prettytime(t))" - - Hx, Hy = 1, 1 - data_size = sizeof(FT) * 2Nz*(Hx*Nx + Hy*Ny) - @info "$R ranks halo communication bandwidth: $(prettybandwidth(data_size/ts))" - end -end - -MPI.Init() -fill_halo_regions_mpi!(Float64, GPU(), 192, 192, 192, 3, 3) -MPI.Finalize() From f816d546101b00c2ba09541b4b35d26f3e628d9b Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Fri, 5 Feb 2021 22:47:49 -0500 Subject: [PATCH 055/100] Beautiful metaprogramming for halo communication --- src/Distributed/distributed_utils.jl | 2 +- src/Distributed/halo_communication.jl | 130 ++++++++++++++-------- src/Distributed/test_distributed_model.jl | 38 ++++++- 3 files changed, 122 insertions(+), 48 deletions(-) diff --git a/src/Distributed/distributed_utils.jl b/src/Distributed/distributed_utils.jl index dfd6dc0fbc..117ae74f3c 100644 --- a/src/Distributed/distributed_utils.jl +++ b/src/Distributed/distributed_utils.jl @@ -16,7 +16,7 @@ north_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = bottom_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = view(f.data, :, :, left_halo_indices(LZ, topology(f, 3), f.grid.Nz, f.grid.Hz), :) -bottom_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = +top_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = view(f.data, :, :, right_halo_indices(LZ, topology(f, 3), f.grid.Nz, f.grid.Hz), :) underlying_west_halo(f, grid, location) = diff --git a/src/Distributed/halo_communication.jl b/src/Distributed/halo_communication.jl index e773fa9356..2dc48f4857 100644 --- a/src/Distributed/halo_communication.jl +++ b/src/Distributed/halo_communication.jl @@ -24,7 +24,7 @@ opposite_side = Dict( ) # Unfortunately can't call MPI.Comm_size(MPI.COMM_WORLD) before MPI.Init(). -const MAX_RANKS = 10^3 +MAX_RANKS = 10^3 RANK_DIGITS = 3 # Define functions that return unique send and recv MPI tags for each side. @@ -35,8 +35,8 @@ RANK_DIGITS = 3 for side in sides side_str = string(side) - send_tag_fn_name = Symbol(side, :_send_tag) - recv_tag_fn_name = Symbol(side, :_recv_tag) + send_tag_fn_name = Symbol("$(side)_send_tag") + recv_tag_fn_name = Symbol("$(side)_recv_tag") @eval begin function $send_tag_fn_name(my_rank, rank_to_send_to) from_digits = string(my_rank, pad=RANK_DIGITS) @@ -61,74 +61,112 @@ end fill_halo_regions!(field::AbstractField{LX, LY, LZ}, arch::AbstractMultiArchitecture, args...) where {LX, LY, LZ} = fill_halo_regions!(field.data, field.boundary_conditions, arch, field.grid, (LX, LY, LZ), args...) -function fill_halo_regions!(c::AbstractArray, bcs, arch::AbstractMultiArchitecture, grid, location, args...) +function fill_halo_regions!(c::AbstractArray, bcs, arch::AbstractMultiArchitecture, grid, c_location, args...) barrier = Event(device(child_architecture(arch))) - east_event, west_event = fill_east_and_west_halos!(c, bcs.east, bcs.west, arch, barrier, grid, location, args...) - # north_event, south_event = fill_north_and_south_halos!(c, bcs.north, bcs.south, arch, barrier, grid, args...) - # top_event, bottom_event = fill_top_and_bottom_halos!(c, bcs.east, bcs.west, arch, barrier, grid, args...) + east_event, west_event = fill_east_and_west_halos!(c, bcs.east, bcs.west, arch, barrier, grid, c_location, args...) + north_event, south_event = fill_north_and_south_halos!(c, bcs.north, bcs.south, arch, barrier, grid, c_location, args...) + top_event, bottom_event = fill_top_and_bottom_halos!(c, bcs.top, bcs.bottom, arch, barrier, grid, c_location, args...) - events = [east_event, west_event] # , north_event, south_event, top_event, bottom_event] + events = [east_event, west_event, north_event, south_event, top_event, bottom_event] events = filter(e -> e isa Event, events) wait(device(child_architecture(arch)), MultiEvent(Tuple(events))) return nothing end -function fill_east_and_west_halos!(c, east_bc, west_bc, arch, barrier, grid, location, args...) - east_event = fill_east_halo!(c, east_bc, child_architecture(arch), barrier, grid, args...) - west_event = fill_west_halo!(c, west_bc, child_architecture(arch), barrier, grid, args...) - return east_event, west_event -end - -function send_east_halo(c, grid, c_location, my_rank, rank_to_send_to) - send_buffer = underlying_east_halo(c, grid, c_location) - send_tag = east_send_tag(my_rank, rank_to_send_to) +##### +##### fill_east_and_west_halos! } +##### fill_north_and_south_halos! } for non-communicating boundary conditions (fallback) +##### fill_top_and_bottom_halos! } +##### - @debug "Sending east halo: my_rank=$my_rank, rank_to_send_to=$rank_to_send_to, send_tag=$send_tag" - status = MPI.Isend(send_buffer, rank_to_send_to, send_tag, MPI.COMM_WORLD) +for (side, opposite_side) in zip([:east, :north, :top], [:west, :south, :bottom]) + fill_both_halos! = Symbol("fill_$(side)_and_$(opposite_side)_halos!") + fill_side_halo! = Symbol("fill_$(side)_halo!") + fill_opposite_side_halo! = Symbol("fill_$(opposite_side)_halo!") - return status + @eval begin + function $fill_both_halos!(c, bc_side, bc_opposite_side, arch, barrier, grid, args...) + event_side = $fill_side_halo!(c, bc_side, child_architecture(arch), barrier, grid, args...) + event_opposite_side = $fill_opposite_side_halo!(c, bc_opposite_side, child_architecture(arch), barrier, grid, args...) + return event_side, event_opposite_side + end + end end -function send_west_halo(c, grid, c_location, my_rank, rank_to_send_to) - send_buffer = underlying_west_halo(c, grid, c_location) - send_tag = west_send_tag(my_rank, rank_to_send_to) +##### +##### fill_east_and_west_halos! } +##### fill_north_and_south_halos! } for when both halos are communicative +##### fill_top_and_bottom_halos! } +##### - @debug "Sending west halo: my_rank=$my_rank, rank_to_send_to=$rank_to_send_to, send_tag=$send_tag" - status = MPI.Isend(send_buffer, rank_to_send_to, send_tag, MPI.COMM_WORLD) +for (side, opposite_side) in zip([:east, :north, :top], [:west, :south, :bottom]) + fill_both_halos! = Symbol("fill_$(side)_and_$(opposite_side)_halos!") + send_side_halo = Symbol("send_$(side)_halo") + send_opposite_side_halo = Symbol("send_$(opposite_side)_halo") + recv_and_fill_side_halo! = Symbol("recv_and_fill_$(side)_halo!") + recv_and_fill_opposite_side_halo! = Symbol("recv_and_fill_$(opposite_side)_halo!") - return status -end + @eval begin + function $fill_both_halos!(c, bc_side::HaloCommunicationBC, bc_opposite_side::HaloCommunicationBC, arch, barrier, grid, c_location, args...) + @assert bc_side.condition.from == bc_opposite_side.condition.from # Extra protection in case of bugs + my_rank = bc_side.condition.from -function recv_and_fill_east_halo!(c, grid, c_location, my_rank, rank_to_recv_from) - recv_buffer = underlying_east_halo(c, grid, c_location) - recv_tag = east_recv_tag(my_rank, rank_to_recv_from) + $send_side_halo(c, grid, c_location, my_rank, bc_side.condition.to) + $send_opposite_side_halo(c, grid, c_location, my_rank, bc_opposite_side.condition.to) - @debug "Receiving east halo: my_rank=$my_rank, rank_to_recv_from=$rank_to_recv_from, recv_tag=$recv_tag" - MPI.Recv!(recv_buffer, rank_to_recv_from, recv_tag, MPI.COMM_WORLD) + $recv_and_fill_side_halo!(c, grid, c_location, my_rank, bc_side.condition.to) + $recv_and_fill_opposite_side_halo!(c, grid, c_location, my_rank, bc_opposite_side.condition.to) - return nothing + return nothing, nothing + end + end end -function recv_and_fill_west_halo!(c, grid, c_location, my_rank, rank_to_recv_from) - recv_buffer = underlying_west_halo(c, grid, c_location) - recv_tag = west_recv_tag(my_rank, rank_to_recv_from) +##### +##### Sending halos +##### - @debug "Receiving west halo: my_rank=$my_rank, rank_to_recv_from=$rank_to_recv_from, recv_tag=$recv_tag" - MPI.Recv!(recv_buffer, rank_to_recv_from, recv_tag, MPI.COMM_WORLD) +for side in sides + side_str = string(side) + send_side_halo = Symbol("send_$(side)_halo") + underlying_side_halo = Symbol("underlying_$(side)_halo") + side_send_tag = Symbol("$(side)_send_tag") - return nothing + @eval begin + function $send_side_halo(c, grid, c_location, my_rank, rank_to_send_to) + send_buffer = $underlying_side_halo(c, grid, c_location) + send_tag = $side_send_tag(my_rank, rank_to_send_to) + + @debug "Sending " * $side_str * " halo: my_rank=$my_rank, rank_to_send_to=$rank_to_send_to, send_tag=$send_tag" + status = MPI.Isend(send_buffer, rank_to_send_to, send_tag, MPI.COMM_WORLD) + + return status + end + end end -function fill_east_and_west_halos!(c, east_bc::HaloCommunicationBC, west_bc::HaloCommunicationBC, arch, barrier, grid, c_location, args...) - my_rank = east_bc.condition.from - send_east_halo(c, grid, c_location, my_rank, east_bc.condition.to) - send_west_halo(c, grid, c_location, my_rank, west_bc.condition.to) +##### +##### Receiving and filling halos (buffer is a view so should get filled upon receive) +##### + +for side in sides + side_str = string(side) + recv_and_fill_side_halo! = Symbol("recv_and_fill_$(side)_halo!") + underlying_side_halo = Symbol("underlying_$(side)_halo") + side_recv_tag = Symbol("$(side)_recv_tag") + + @eval begin + function $recv_and_fill_side_halo!(c, grid, c_location, my_rank, rank_to_recv_from) + recv_buffer = $underlying_side_halo(c, grid, c_location) + recv_tag = $side_recv_tag(my_rank, rank_to_recv_from) - recv_and_fill_east_halo!(c, grid, c_location, my_rank, east_bc.condition.to) - recv_and_fill_west_halo!(c, grid, c_location, my_rank, west_bc.condition.to) + @debug "Receiving " * $side_str * " halo: my_rank=$my_rank, rank_to_recv_from=$rank_to_recv_from, recv_tag=$recv_tag" + MPI.Recv!(recv_buffer, rank_to_recv_from, recv_tag, MPI.COMM_WORLD) - return nothing, nothing + return nothing + end + end end diff --git a/src/Distributed/test_distributed_model.jl b/src/Distributed/test_distributed_model.jl index 77e4d22ae1..0121b32d61 100644 --- a/src/Distributed/test_distributed_model.jl +++ b/src/Distributed/test_distributed_model.jl @@ -236,7 +236,7 @@ end function run_triply_periodic_halo_communication_tests_with_411_ranks() topo = (Periodic, Periodic, Periodic) - full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 1), extent=(1, 2, 3)) + full_grid = RegularCartesianGrid(topology=topo, size=(8, 6, 4), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) dm = DistributedModel(architecture=arch, grid=full_grid) @@ -251,6 +251,40 @@ function run_triply_periodic_halo_communication_tests_with_411_ranks() return nothing end +function run_triply_periodic_halo_communication_tests_with_141_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularCartesianGrid(topology=topo, size=(3, 8, 2), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) + dm = DistributedModel(architecture=arch, grid=full_grid) + + for field in fields(dm.model) + set!(field, arch.my_rank) + fill_halo_regions!(field, arch) + + @test all(north_halo(field) .== arch.connectivity.north) + @test all(south_halo(field) .== arch.connectivity.south) + end + + return nothing +end + +function run_triply_periodic_halo_communication_tests_with_114_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularCartesianGrid(topology=topo, size=(3, 5, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) + dm = DistributedModel(architecture=arch, grid=full_grid) + + for field in fields(dm.model) + set!(field, arch.my_rank) + fill_halo_regions!(field, arch) + + @test all(top_halo(field) .== arch.connectivity.top) + @test all(bottom_halo(field) .== arch.connectivity.bottom) + end + + return nothing +end + ##### ##### Run tests! ##### @@ -283,6 +317,8 @@ end @testset "Halo communication" begin @info " Testing halo communication..." run_triply_periodic_halo_communication_tests_with_411_ranks() + run_triply_periodic_halo_communication_tests_with_141_ranks() + run_triply_periodic_halo_communication_tests_with_114_ranks() end # TODO: 221 ranks From d18f6700e882344d72d9b29b42a364049ce446bb Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Fri, 5 Feb 2021 23:59:56 -0500 Subject: [PATCH 056/100] Testing xy decompositions --- ...buted_model.jl => test_distributed_mpi.jl} | 241 +++++++++++++----- 1 file changed, 178 insertions(+), 63 deletions(-) rename src/Distributed/{test_distributed_model.jl => test_distributed_mpi.jl} (56%) diff --git a/src/Distributed/test_distributed_model.jl b/src/Distributed/test_distributed_mpi.jl similarity index 56% rename from src/Distributed/test_distributed_model.jl rename to src/Distributed/test_distributed_mpi.jl index 0121b32d61..c76b45b482 100644 --- a/src/Distributed/test_distributed_model.jl +++ b/src/Distributed/test_distributed_mpi.jl @@ -31,6 +31,10 @@ function run_triply_periodic_rank_connectivity_tests_with_411_ranks() @test isnothing(connectivity.top) @test isnothing(connectivity.bottom) + # +---+---+---+---+ + # | 0 | 1 | 2 | 3 | + # +---+---+---+---+ + if my_rank == 0 @test connectivity.east == 1 @test connectivity.west == 3 @@ -64,6 +68,16 @@ function run_triply_periodic_rank_connectivity_tests_with_141_ranks() @test isnothing(connectivity.top) @test isnothing(connectivity.bottom) + # +---+ + # | 0 | + # +---+ + # | 1 | + # +---+ + # | 2 | + # +---+ + # | 3 | + # +---+ + if my_rank == 0 @test connectivity.north == 1 @test connectivity.south == 3 @@ -97,6 +111,19 @@ function run_triply_periodic_rank_connectivity_tests_with_114_ranks() @test isnothing(connectivity.north) @test isnothing(connectivity.south) + # /---/ + # / 3 / + # /---/ + # /---/ + # / 2 / + # /---/ + # /---/ + # / 1 / + # /---/ + # /---/ + # / 0 / + # /---/ + if my_rank == 0 @test connectivity.top == 1 @test connectivity.bottom == 3 @@ -114,6 +141,51 @@ function run_triply_periodic_rank_connectivity_tests_with_114_ranks() return nothing end +function run_triply_periodic_rank_connectivity_tests_with_221_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) + + my_rank = MPI.Comm_rank(MPI.COMM_WORLD) + @test my_rank == index2rank(arch.my_index..., arch.ranks...) + + connectivity = arch.connectivity + + # No communication in z. + @test isnothing(connectivity.top) + @test isnothing(connectivity.bottom) + + # +---+---+ + # | 0 | 2 | + # +---+---+ + # | 1 | 3 | + # +---+---+ + + if my_rank == 0 + @test connectivity.east == 2 + @test connectivity.west == 2 + @test connectivity.north == 1 + @test connectivity.south == 1 + elseif my_rank == 1 + @test connectivity.east == 3 + @test connectivity.west == 3 + @test connectivity.north == 0 + @test connectivity.south == 0 + elseif my_rank == 2 + @test connectivity.east == 0 + @test connectivity.west == 0 + @test connectivity.north == 3 + @test connectivity.south == 3 + elseif my_rank == 3 + @test connectivity.east == 1 + @test connectivity.west == 1 + @test connectivity.north == 2 + @test connectivity.south == 2 + end + + return nothing +end + ##### ##### Local grids for distributed models ##### @@ -125,15 +197,15 @@ function run_triply_periodic_local_grid_tests_with_411_ranks() dm = DistributedModel(architecture=arch, grid=full_grid) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) + local_grid = dm.model.grid + nx, ny, nz = size(local_grid) - model = dm.model - nx, ny, nz = size(model.grid) - @test model.grid.xF[1] == 0.25*my_rank - @test model.grid.xF[nx+1] == 0.25*(my_rank+1) - @test model.grid.yF[1] == 0 - @test model.grid.yF[ny+1] == 2 - @test model.grid.zF[1] == -3 - @test model.grid.zF[nz+1] == 0 + @test local_grid.xF[1] == 0.25*my_rank + @test local_grid.xF[nx+1] == 0.25*(my_rank+1) + @test local_grid.yF[1] == 0 + @test local_grid.yF[ny+1] == 2 + @test local_grid.zF[1] == -3 + @test local_grid.zF[nz+1] == 0 return nothing end @@ -145,15 +217,15 @@ function run_triply_periodic_local_grid_tests_with_141_ranks() dm = DistributedModel(architecture=arch, grid=full_grid) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) + local_grid = dm.model.grid + nx, ny, nz = size(local_grid) - model = dm.model - nx, ny, nz = size(model.grid) - @test model.grid.xF[1] == 0 - @test model.grid.xF[nx+1] == 1 - @test model.grid.yF[1] == 0.5*my_rank - @test model.grid.yF[ny+1] == 0.5*(my_rank+1) - @test model.grid.zF[1] == -3 - @test model.grid.zF[nz+1] == 0 + @test local_grid.xF[1] == 0 + @test local_grid.xF[nx+1] == 1 + @test local_grid.yF[1] == 0.5*my_rank + @test local_grid.yF[ny+1] == 0.5*(my_rank+1) + @test local_grid.zF[1] == -3 + @test local_grid.zF[nz+1] == 0 return nothing end @@ -165,21 +237,41 @@ function run_triply_periodic_local_grid_tests_with_114_ranks() dm = DistributedModel(architecture=arch, grid=full_grid) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) + local_grid = dm.model.grid + nx, ny, nz = size(local_grid) - model = dm.model - nx, ny, nz = size(model.grid) - @test model.grid.xF[1] == 0 - @test model.grid.xF[nx+1] == 1 - @test model.grid.yF[1] == 0 - @test model.grid.yF[ny+1] == 2 - @test model.grid.zF[1] == -3 + 0.75*my_rank - @test model.grid.zF[nz+1] == -3 + 0.75*(my_rank+1) + @test local_grid.xF[1] == 0 + @test local_grid.xF[nx+1] == 1 + @test local_grid.yF[1] == 0 + @test local_grid.yF[ny+1] == 2 + @test local_grid.zF[1] == -3 + 0.75*my_rank + @test local_grid.zF[nz+1] == -3 + 0.75*(my_rank+1) + + return nothing +end + +function run_triply_periodic_local_grid_tests_with_221_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) + dm = DistributedModel(architecture=arch, grid=full_grid) + + i, j, k = arch.my_index + local_grid = dm.model.grid + nx, ny, nz = size(local_grid) + + @test local_grid.xF[1] == 0.5*(i-1) + @test local_grid.xF[nx+1] == 0.5*i + @test local_grid.yF[1] == j-1 + @test local_grid.yF[ny+1] == j + @test local_grid.zF[1] == -3 + @test local_grid.zF[nz+1] == 0 return nothing end ##### -##### +##### Injection of halo communication BCs ##### function run_triply_periodic_bc_injection_tests_with_411_ranks() @@ -189,12 +281,13 @@ function run_triply_periodic_bc_injection_tests_with_411_ranks() dm = DistributedModel(architecture=arch, grid=full_grid) for field in fields(dm.model) - @test field.boundary_conditions.east isa HaloCommunicationBC - @test field.boundary_conditions.west isa HaloCommunicationBC - @test !isa(field.boundary_conditions.north, HaloCommunicationBC) - @test !isa(field.boundary_conditions.south, HaloCommunicationBC) - @test !isa(field.boundary_conditions.top, HaloCommunicationBC) - @test !isa(field.boundary_conditions.bottom, HaloCommunicationBC) + fbcs = field.boundary_conditions + @test fbcs.east isa HaloCommunicationBC + @test fbcs.west isa HaloCommunicationBC + @test !isa(fbcs.north, HaloCommunicationBC) + @test !isa(fbcs.south, HaloCommunicationBC) + @test !isa(fbcs.top, HaloCommunicationBC) + @test !isa(fbcs.bottom, HaloCommunicationBC) end end @@ -205,12 +298,13 @@ function run_triply_periodic_bc_injection_tests_with_141_ranks() dm = DistributedModel(architecture=arch, grid=full_grid) for field in fields(dm.model) - @test !isa(field.boundary_conditions.east, HaloCommunicationBC) - @test !isa(field.boundary_conditions.west, HaloCommunicationBC) - @test field.boundary_conditions.north isa HaloCommunicationBC - @test field.boundary_conditions.south isa HaloCommunicationBC - @test !isa(field.boundary_conditions.top, HaloCommunicationBC) - @test !isa(field.boundary_conditions.bottom, HaloCommunicationBC) + fbcs = field.boundary_conditions + @test !isa(fbcs.east, HaloCommunicationBC) + @test !isa(fbcs.west, HaloCommunicationBC) + @test fbcs.north isa HaloCommunicationBC + @test fbcs.south isa HaloCommunicationBC + @test !isa(fbcs.top, HaloCommunicationBC) + @test !isa(fbcs.bottom, HaloCommunicationBC) end end @@ -221,12 +315,30 @@ function run_triply_periodic_bc_injection_tests_with_114_ranks() dm = DistributedModel(architecture=arch, grid=full_grid) for field in fields(dm.model) - @test !isa(field.boundary_conditions.east, HaloCommunicationBC) - @test !isa(field.boundary_conditions.west, HaloCommunicationBC) - @test !isa(field.boundary_conditions.north, HaloCommunicationBC) - @test !isa(field.boundary_conditions.south, HaloCommunicationBC) - @test field.boundary_conditions.top isa HaloCommunicationBC - @test field.boundary_conditions.bottom isa HaloCommunicationBC + fbcs = field.boundary_conditions + @test !isa(fbcs.east, HaloCommunicationBC) + @test !isa(fbcs.west, HaloCommunicationBC) + @test !isa(fbcs.north, HaloCommunicationBC) + @test !isa(fbcs.south, HaloCommunicationBC) + @test fbcs.top isa HaloCommunicationBC + @test fbcs.bottom isa HaloCommunicationBC + end +end + +function run_triply_periodic_bc_injection_tests_with_221_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) + dm = DistributedModel(architecture=arch, grid=full_grid) + + for field in fields(dm.model) + fbcs = field.boundary_conditions + @test fbcs.east isa HaloCommunicationBC + @test fbcs.west isa HaloCommunicationBC + @test fbcs.north isa HaloCommunicationBC + @test fbcs.south isa HaloCommunicationBC + @test !isa(fbcs.top, HaloCommunicationBC) + @test !isa(fbcs.bottom, HaloCommunicationBC) end end @@ -292,26 +404,29 @@ end @testset "Distributed MPI Oceananigans" begin @info "Testing distributed MPI Oceananigans..." - @testset "Multi architectures rank connectivity" begin - @info " Testing multi architecture rank connectivity..." - run_triply_periodic_rank_connectivity_tests_with_411_ranks() - run_triply_periodic_rank_connectivity_tests_with_141_ranks() - run_triply_periodic_rank_connectivity_tests_with_114_ranks() - end - - @testset "Local grids for distributed models" begin - @info " Testing local grids for distributed models..." - run_triply_periodic_local_grid_tests_with_411_ranks() - run_triply_periodic_local_grid_tests_with_141_ranks() - run_triply_periodic_local_grid_tests_with_114_ranks() - end - - @testset "Injection of halo communication BCs" begin - @info " Testing injection of halo communication BCs..." - run_triply_periodic_bc_injection_tests_with_411_ranks() - run_triply_periodic_bc_injection_tests_with_141_ranks() - run_triply_periodic_bc_injection_tests_with_114_ranks() - end + # @testset "Multi architectures rank connectivity" begin + # @info " Testing multi architecture rank connectivity..." + # run_triply_periodic_rank_connectivity_tests_with_411_ranks() + # run_triply_periodic_rank_connectivity_tests_with_141_ranks() + # run_triply_periodic_rank_connectivity_tests_with_114_ranks() + # run_triply_periodic_rank_connectivity_tests_with_221_ranks() + # end + + # @testset "Local grids for distributed models" begin + # @info " Testing local grids for distributed models..." + # run_triply_periodic_local_grid_tests_with_411_ranks() + # run_triply_periodic_local_grid_tests_with_141_ranks() + # run_triply_periodic_local_grid_tests_with_114_ranks() + # run_triply_periodic_local_grid_tests_with_221_ranks() + # end + + # @testset "Injection of halo communication BCs" begin + # @info " Testing injection of halo communication BCs..." + # run_triply_periodic_bc_injection_tests_with_411_ranks() + # run_triply_periodic_bc_injection_tests_with_141_ranks() + # run_triply_periodic_bc_injection_tests_with_114_ranks() + # run_triply_periodic_bc_injection_tests_with_221_ranks() + # end # TODO: Larger halos! @testset "Halo communication" begin From fff4de7bcad283306b5e42baf4aa94d4e0b60d43 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sat, 6 Feb 2021 00:00:26 -0500 Subject: [PATCH 057/100] Add `include_corners` kwarg for halo functions --- src/Distributed/distributed_utils.jl | 47 ++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/src/Distributed/distributed_utils.jl b/src/Distributed/distributed_utils.jl index 117ae74f3c..51dcd77301 100644 --- a/src/Distributed/distributed_utils.jl +++ b/src/Distributed/distributed_utils.jl @@ -1,17 +1,46 @@ -using Oceananigans.Grids: left_halo_indices, right_halo_indices, underlying_left_halo_indices, underlying_right_halo_indices using Oceananigans.Fields: AbstractField +using Oceananigans.Grids: + interior_indices, + left_halo_indices, right_halo_indices, + underlying_left_halo_indices, underlying_right_halo_indices -west_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = - view(f.data, left_halo_indices(LX, topology(f, 1), f.grid.Nx, f.grid.Hx), :, :) +# TODO: Move to Grids/grid_utils.jl -east_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = - view(f.data, right_halo_indices(LX, topology(f, 1), f.grid.Nx, f.grid.Hx), :, :) +west_halo(f::AbstractField{LX, LY, LZ}; include_corners=true) where {LX, LY, LZ} = + include_corners ? view(f.data, left_halo_indices(LX, topology(f, 1), f.grid.Nx, f.grid.Hx), :, :) : + view(f.data, left_halo_indices(LX, topology(f, 1), f.grid.Nx, f.grid.Hx), + interior_indices(LY, topology(f, 2), f.grid.Ny), + interior_indices(LZ, topology(f, 3), f.grid.Nz)) -south_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = - view(f.data, :, left_halo_indices(LY, topology(f, 2), f.grid.Ny, f.grid.Hy), :) +east_halo(f::AbstractField{LX, LY, LZ}; include_corners=true) where {LX, LY, LZ} = + include_corners ? view(f.data, right_halo_indices(LX, topology(f, 1), f.grid.Nx, f.grid.Hx), :, :) : + view(f.data, right_halo_indices(LX, topology(f, 1), f.grid.Nx, f.grid.Hx), + interior_indices(LY, topology(f, 2), f.grid.Ny), + interior_indices(LZ, topology(f, 3), f.grid.Nz)) -north_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = - view(f.data, :, right_halo_indices(LY, topology(f, 2), f.grid.Ny, f.grid.Hy), :) +south_halo(f::AbstractField{LX, LY, LZ}; include_corners=true) where {LX, LY, LZ} = + include_corners ? view(f.data, :, left_halo_indices(LY, topology(f, 2), f.grid.Ny, f.grid.Hy), :) : + view(f.data, interior_indices(LX, topology(f, 1), f.grid.Nx), + left_halo_indices(LY, topology(f, 2), f.grid.Ny, f.grid.Hy), + interior_indices(LZ, topology(f, 3), f.grid.Nz)) + +north_halo(f::AbstractField{LX, LY, LZ}; include_corners=true) where {LX, LY, LZ} = + include_corners ? view(f.data, :, right_halo_indices(LY, topology(f, 2), f.grid.Ny, f.grid.Hy), :) : + view(f.data, interior_indices(LX, topology(f, 1), f.grid.Nx), + right_halo_indices(LY, topology(f, 2), f.grid.Ny, f.grid.Hy), + interior_indices(LZ, topology(f, 3), f.grid.Nz)) + +bottom_halo(f::AbstractField{LX, LY, LZ}; include_corners=true) where {LX, LY, LZ} = + include_corners ? view(f.data, :, :, left_halo_indices(LZ, topology(f, 3), f.grid.Nz, f.grid.Hz)) : + view(f.data, interior_indices(LX, topology(f, 1), f.grid.Nx), + interior_indices(LY, topology(f, 2), f.grid.Ny), + left_halo_indices(LZ, topology(f, 3), f.grid.Nz, f.grid.Hz)) + +top_halo(f::AbstractField{LX, LY, LZ}; include_corners=true) where {LX, LY, LZ} = + include_corners ? view(f.data, :, :, right_halo_indices(LZ, topology(f, 3), f.grid.Nz, f.grid.Hz)) : + view(f.data, interior_indices(LX, topology(f, 1), f.grid.Nx), + interior_indices(LY, topology(f, 2), f.grid.Ny), + left_halo_indices(LZ, topology(f, 3), f.grid.Nz, f.grid.Hz)) bottom_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = view(f.data, :, :, left_halo_indices(LZ, topology(f, 3), f.grid.Nz, f.grid.Hz), :) From 0dec95812ab0da75873a517f0d17517bb42eb4c6 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sat, 6 Feb 2021 00:07:20 -0500 Subject: [PATCH 058/100] Test that halo communication doesn't leak --- src/Distributed/test_distributed_mpi.jl | 91 ++++++++++++++++++------- 1 file changed, 65 insertions(+), 26 deletions(-) diff --git a/src/Distributed/test_distributed_mpi.jl b/src/Distributed/test_distributed_mpi.jl index c76b45b482..f52a402ab0 100644 --- a/src/Distributed/test_distributed_mpi.jl +++ b/src/Distributed/test_distributed_mpi.jl @@ -358,6 +358,12 @@ function run_triply_periodic_halo_communication_tests_with_411_ranks() @test all(east_halo(field) .== arch.connectivity.east) @test all(west_halo(field) .== arch.connectivity.west) + + @test all(interior(field) .== arch.my_rank) + @test all(north_halo(field, include_corners=false) .== arch.my_rank) + @test all(south_halo(field, include_corners=false) .== arch.my_rank) + @test all(top_halo(field, include_corners=false) .== arch.my_rank) + @test all(bottom_halo(field, include_corners=false) .== arch.my_rank) end return nothing @@ -375,6 +381,12 @@ function run_triply_periodic_halo_communication_tests_with_141_ranks() @test all(north_halo(field) .== arch.connectivity.north) @test all(south_halo(field) .== arch.connectivity.south) + + @test all(interior(field) .== arch.my_rank) + @test all(east_halo(field, include_corners=false) .== arch.my_rank) + @test all(west_halo(field, include_corners=false) .== arch.my_rank) + @test all(top_halo(field, include_corners=false) .== arch.my_rank) + @test all(bottom_halo(field, include_corners=false) .== arch.my_rank) end return nothing @@ -392,6 +404,35 @@ function run_triply_periodic_halo_communication_tests_with_114_ranks() @test all(top_halo(field) .== arch.connectivity.top) @test all(bottom_halo(field) .== arch.connectivity.bottom) + + @test all(interior(field) .== arch.my_rank) + @test all(east_halo(field, include_corners=false) .== arch.my_rank) + @test all(west_halo(field, include_corners=false) .== arch.my_rank) + @test all(north_halo(field, include_corners=false) .== arch.my_rank) + @test all(south_halo(field, include_corners=false) .== arch.my_rank) + end + + return nothing +end + +function run_triply_periodic_halo_communication_tests_with_221_ranks() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 3), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) + dm = DistributedModel(architecture=arch, grid=full_grid) + + for field in fields(dm.model) + set!(field, arch.my_rank) + fill_halo_regions!(field, arch) + + @test all(east_halo(field) .== arch.connectivity.east) + @test all(west_halo(field) .== arch.connectivity.west) + @test all(north_halo(field) .== arch.connectivity.north) + @test all(south_halo(field) .== arch.connectivity.south) + + @test all(interior(field) .== arch.my_rank) + @test all(top_halo(field, include_corners=false) .== arch.my_rank) + @test all(bottom_halo(field, include_corners=false) .== arch.my_rank) end return nothing @@ -404,29 +445,29 @@ end @testset "Distributed MPI Oceananigans" begin @info "Testing distributed MPI Oceananigans..." - # @testset "Multi architectures rank connectivity" begin - # @info " Testing multi architecture rank connectivity..." - # run_triply_periodic_rank_connectivity_tests_with_411_ranks() - # run_triply_periodic_rank_connectivity_tests_with_141_ranks() - # run_triply_periodic_rank_connectivity_tests_with_114_ranks() - # run_triply_periodic_rank_connectivity_tests_with_221_ranks() - # end - - # @testset "Local grids for distributed models" begin - # @info " Testing local grids for distributed models..." - # run_triply_periodic_local_grid_tests_with_411_ranks() - # run_triply_periodic_local_grid_tests_with_141_ranks() - # run_triply_periodic_local_grid_tests_with_114_ranks() - # run_triply_periodic_local_grid_tests_with_221_ranks() - # end - - # @testset "Injection of halo communication BCs" begin - # @info " Testing injection of halo communication BCs..." - # run_triply_periodic_bc_injection_tests_with_411_ranks() - # run_triply_periodic_bc_injection_tests_with_141_ranks() - # run_triply_periodic_bc_injection_tests_with_114_ranks() - # run_triply_periodic_bc_injection_tests_with_221_ranks() - # end + @testset "Multi architectures rank connectivity" begin + @info " Testing multi architecture rank connectivity..." + run_triply_periodic_rank_connectivity_tests_with_411_ranks() + run_triply_periodic_rank_connectivity_tests_with_141_ranks() + run_triply_periodic_rank_connectivity_tests_with_114_ranks() + run_triply_periodic_rank_connectivity_tests_with_221_ranks() + end + + @testset "Local grids for distributed models" begin + @info " Testing local grids for distributed models..." + run_triply_periodic_local_grid_tests_with_411_ranks() + run_triply_periodic_local_grid_tests_with_141_ranks() + run_triply_periodic_local_grid_tests_with_114_ranks() + run_triply_periodic_local_grid_tests_with_221_ranks() + end + + @testset "Injection of halo communication BCs" begin + @info " Testing injection of halo communication BCs..." + run_triply_periodic_bc_injection_tests_with_411_ranks() + run_triply_periodic_bc_injection_tests_with_141_ranks() + run_triply_periodic_bc_injection_tests_with_114_ranks() + run_triply_periodic_bc_injection_tests_with_221_ranks() + end # TODO: Larger halos! @testset "Halo communication" begin @@ -434,10 +475,8 @@ end run_triply_periodic_halo_communication_tests_with_411_ranks() run_triply_periodic_halo_communication_tests_with_141_ranks() run_triply_periodic_halo_communication_tests_with_114_ranks() + # run_triply_periodic_halo_communication_tests_with_221_ranks() end - - # TODO: 221 ranks - # TODO: triply bounded end # MPI.Finalize() From 7dbe2bb137b5ec07791ac4496e2ff44f04f51beb Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sat, 6 Feb 2021 09:17:59 -0500 Subject: [PATCH 059/100] Distributed FFT based Poisson solver --- .../distributed_fft_based_poisson_solver.jl | 66 +++++++++++++++++++ src/Distributed/distributed_utils.jl | 2 +- src/Distributed/halo_communication.jl | 2 +- 3 files changed, 68 insertions(+), 2 deletions(-) create mode 100644 src/Distributed/distributed_fft_based_poisson_solver.jl diff --git a/src/Distributed/distributed_fft_based_poisson_solver.jl b/src/Distributed/distributed_fft_based_poisson_solver.jl new file mode 100644 index 0000000000..df384b3fbd --- /dev/null +++ b/src/Distributed/distributed_fft_based_poisson_solver.jl @@ -0,0 +1,66 @@ +using PencilFFTs + +struct DistributedFFTBasedPoissonSolver{P, F, L, λ, S} + plan :: P + full_grid :: F + my_grid :: L + my_eigenvalues :: λ + storage :: S +end + +reshaped_size(N, dim) = dim == 1 ? (N, 1, 1) : + dim == 2 ? (1, N, 1) : + dim == 3 ? (1, 1, N) : nothing + +function poisson_eigenvalues(N, L, dim, ::Periodic) + inds = reshape(1:N, reshaped_size(N, dim)...) + return @. (2sin((inds - 1) * π / N) / (L / N))^2 +end + +function DistributedFFTBasedPoissonSolver(arch, full_grid, local_grid) + topo = (TX, TY, TZ) = topology(full_grid) + + λx = poisson_eigenvalues(full_grid.Nx, full_grid.Lx, 1, TX()) + λy = poisson_eigenvalues(full_grid.Ny, full_grid.Ly, 2, TY()) + λz = poisson_eigenvalues(full_grid.Nz, full_grid.Lz, 3, TZ()) + + I, J, K = arch.my_index + my_eigenvalues = ( + λx = λx[(I-1)*local_grid.Nx+1:I*local_grid.Nx, :, :], + λy = λy[:, (J-1)*local_grid.Ny+1:J*local_grid.Ny, :], + λz = λz[:, :, (K-1)*local_grid.Nz+1:K*local_grid.Nz] + ) + + transform = PencilFFTs.Transforms.FFT!() + proc_dims = (arch.ranks[2], arch.ranks[3]) + plan = PencilFFTPlan(size(full_grid), transform, proc_dims, MPI.COMM_WORLD) + + storage = allocate_input(plan) + + return DistributedFFTBasedPoissonSolver(plan, full_grid, local_grid, my_eigenvalues, storage) +end + +function solve_poisson_equation!(solver::DistributedFFTBasedPoissonSolver) + λx, λy, λz = solver.my_eigenvalues + + # https://jipolanco.github.io/PencilFFTs.jl/dev/PencilFFTs/#PencilFFTs.allocate_input + RHS = ϕ = first(solver.storage) + + # Apply forward transforms. + solver.plan * solver.storage + + # Solve the discrete Poisson equation. + @. ϕ = -RHS / (λx + λy + λz) + + # Setting DC component of the solution (the mean) to be zero. This is also + # necessary because the source term to the Poisson equation has zero mean + # and so the DC component comes out to be ∞. + if MPI.Comm_rank(MPI.COMM_WORLD) == 0 + ϕ[1, 1, 1] = 0 + end + + # Apply backward transforms. + solver.plan \ solver.storage + + return nothing +end diff --git a/src/Distributed/distributed_utils.jl b/src/Distributed/distributed_utils.jl index 51dcd77301..1fef722396 100644 --- a/src/Distributed/distributed_utils.jl +++ b/src/Distributed/distributed_utils.jl @@ -58,7 +58,7 @@ underlying_south_halo(f, grid, location) = view(f.parent, :, underlying_left_halo_indices(location, topology(grid, 2), grid.Ny, grid.Hy), :) underlying_north_halo(f, grid, location) = - view(f.parent, :, underlying_right_halo_indices(location, topology(grid, 2), grid.Nz, grid.Hz), :) + view(f.parent, :, underlying_right_halo_indices(location, topology(grid, 2), grid.Ny, grid.Hy), :) underlying_bottom_halo(f, grid, location) = view(f.parent, :, :, underlying_left_halo_indices(location, topology(grid, 3), grid.Nz, grid.Hz)) diff --git a/src/Distributed/halo_communication.jl b/src/Distributed/halo_communication.jl index 2dc48f4857..bc82f48fd3 100644 --- a/src/Distributed/halo_communication.jl +++ b/src/Distributed/halo_communication.jl @@ -149,7 +149,7 @@ for side in sides end ##### -##### Receiving and filling halos (buffer is a view so should get filled upon receive) +##### Receiving and filling halos (buffer is a view so it gets filled upon receive) ##### for side in sides From da21e2f522d994054b4029b150ed84e47cdebb37 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sat, 6 Feb 2021 11:21:16 -0500 Subject: [PATCH 060/100] Janky Poisson solve works out --- .../distributed_fft_based_poisson_solver.jl | 37 ++++-- .../test_distributed_poisson_solvers.jl | 116 ++++++++++++++++++ 2 files changed, 141 insertions(+), 12 deletions(-) create mode 100644 src/Distributed/test_distributed_poisson_solvers.jl diff --git a/src/Distributed/distributed_fft_based_poisson_solver.jl b/src/Distributed/distributed_fft_based_poisson_solver.jl index df384b3fbd..480e163788 100644 --- a/src/Distributed/distributed_fft_based_poisson_solver.jl +++ b/src/Distributed/distributed_fft_based_poisson_solver.jl @@ -24,14 +24,17 @@ function DistributedFFTBasedPoissonSolver(arch, full_grid, local_grid) λy = poisson_eigenvalues(full_grid.Ny, full_grid.Ly, 2, TY()) λz = poisson_eigenvalues(full_grid.Nz, full_grid.Lz, 3, TZ()) - I, J, K = arch.my_index - my_eigenvalues = ( - λx = λx[(I-1)*local_grid.Nx+1:I*local_grid.Nx, :, :], - λy = λy[:, (J-1)*local_grid.Ny+1:J*local_grid.Ny, :], - λz = λz[:, :, (K-1)*local_grid.Nz+1:K*local_grid.Nz] - ) - - transform = PencilFFTs.Transforms.FFT!() + my_eigenvalues = (; λx, λy, λz) + + # I, J, K = arch.my_index + # my_eigenvalues = ( + # λx = λx[(I-1)*local_grid.Nx+1:I*local_grid.Nx, :, :], + # λy = λy[:, (J-1)*local_grid.Ny+1:J*local_grid.Ny, :], + # λz = λz[:, :, (K-1)*local_grid.Nz+1:K*local_grid.Nz] + # ) + + # transform = PencilFFTs.Transforms.FFT!() + transform = PencilFFTs.Transforms.FFT() proc_dims = (arch.ranks[2], arch.ranks[3]) plan = PencilFFTPlan(size(full_grid), transform, proc_dims, MPI.COMM_WORLD) @@ -44,13 +47,22 @@ function solve_poisson_equation!(solver::DistributedFFTBasedPoissonSolver) λx, λy, λz = solver.my_eigenvalues # https://jipolanco.github.io/PencilFFTs.jl/dev/PencilFFTs/#PencilFFTs.allocate_input - RHS = ϕ = first(solver.storage) + # RHS = ϕ = first(solver.storage) + RHS = ϕ = solver.storage # Apply forward transforms. - solver.plan * solver.storage + # ϕ = solver.plan * solver.storage + ϕ = solver.plan * RHS + + @show size(RHS) + @show size(ϕ) + + λx = reshape(λx, 1, solver.my_grid.Nx, 1) + λy = reshape(λy, solver.my_grid.Ny, 1, 1) # Solve the discrete Poisson equation. - @. ϕ = -RHS / (λx + λy + λz) + # @. ϕ = -RHS / (λx + λy + λz) + @. ϕ = -ϕ / (λx + λy + λz) # Setting DC component of the solution (the mean) to be zero. This is also # necessary because the source term to the Poisson equation has zero mean @@ -60,7 +72,8 @@ function solve_poisson_equation!(solver::DistributedFFTBasedPoissonSolver) end # Apply backward transforms. - solver.plan \ solver.storage + # solver.plan \ solver.storage + solver.storage .= solver.plan \ ϕ return nothing end diff --git a/src/Distributed/test_distributed_poisson_solvers.jl b/src/Distributed/test_distributed_poisson_solvers.jl new file mode 100644 index 0000000000..993004b807 --- /dev/null +++ b/src/Distributed/test_distributed_poisson_solvers.jl @@ -0,0 +1,116 @@ +using Test +using Oceananigans +using Oceananigans.Architectures +using Oceananigans.Solvers +using Oceananigans.Utils +using Oceananigans.Operators +using Oceananigans.BoundaryConditions: fill_halo_regions! +using KernelAbstractions: @kernel, @index, Event + +@kernel function ∇²!(grid, f, ∇²f) + i, j, k = @index(Global, NTuple) + @inbounds ∇²f[i, j, k] = ∇²(i, j, k, grid, f) +end + +@kernel function divergence!(grid, u, v, w, div) + i, j, k = @index(Global, NTuple) + @inbounds div[i, j, k] = divᶜᶜᶜ(i, j, k, grid, u, v, w) +end + +function random_divergent_source_term(FT, arch, grid) + # Generate right hand side from a random (divergent) velocity field. + Ru = CenterField(FT, arch, grid, UVelocityBoundaryConditions(grid)) + Rv = CenterField(FT, arch, grid, VVelocityBoundaryConditions(grid)) + Rw = CenterField(FT, arch, grid, WVelocityBoundaryConditions(grid)) + U = (u=Ru, v=Rv, w=Rw) + + Nx, Ny, Nz = size(grid) + set!(Ru, rand(Nx, Ny, Nz)) + set!(Rv, rand(Nx, Ny, Nz)) + set!(Rw, rand(Nx, Ny, Nz)) + + # Adding (nothing, nothing) in case we need to dispatch on ::NFBC + fill_halo_regions!(Ru, arch, nothing, nothing) + fill_halo_regions!(Rv, arch, nothing, nothing) + fill_halo_regions!(Rw, arch, nothing, nothing) + + # Compute the right hand side R = ∇⋅U + ArrayType = array_type(arch) + R = zeros(Nx, Ny, Nz) |> ArrayType + event = launch!(arch, grid, :xyz, divergence!, grid, U.u.data, U.v.data, U.w.data, R, + dependencies=Event(device(arch))) + wait(device(arch), event) + + return R +end + +function compute_∇²!(∇²ϕ, ϕ, arch, grid) + fill_halo_regions!(ϕ, arch) + child_arch = child_architecture(arch) + event = launch!(child_arch, grid, :xyz, ∇²!, grid, ϕ.data, ∇²ϕ.data, dependencies=Event(device(child_arch))) + wait(device(child_arch), event) + fill_halo_regions!(∇²ϕ, arch) + return nothing +end + +function divergence_free_poisson_solution_triply_periodic() + topo = (Periodic, Periodic, Periodic) + full_grid = RegularCartesianGrid(topology=topo, size=(16, 16, 1), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) + dm = DistributedModel(architecture=arch, grid=full_grid) + + local_grid = dm.model.grid + solver = DistributedFFTBasedPoissonSolver(arch, full_grid, local_grid) + + R = random_divergent_source_term(Float64, child_architecture(arch), local_grid) + # first(solver.storage) .= R + solver.storage .= R + + solve_poisson_equation!(solver) + + p_bcs = PressureBoundaryConditions(local_grid) + p_bcs = inject_halo_communication_boundary_conditions(p_bcs, arch.my_rank, arch.connectivity) + + ϕ = CenterField(Float64, child_architecture(arch), local_grid, p_bcs) # "pressure" + ∇²ϕ = CenterField(Float64, child_architecture(arch), local_grid, p_bcs) + + interior(ϕ) .= real(solver.storage) + compute_∇²!(∇²ϕ, ϕ, arch, local_grid) + + return nothing +end + +topo = (Periodic, Periodic, Periodic) +full_grid = RegularCartesianGrid(topology=topo, size=(16, 16, 1), extent=(1, 2, 3)) +arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) +dm = DistributedModel(architecture=arch, grid=full_grid) +local_grid = dm.model.grid +solver = DistributedFFTBasedPoissonSolver(arch, full_grid, local_grid) +Random.seed!(0) +R = rand(size(full_grid)...) +I, J, K = arch.my_index +R = R[:, local_grid.Ny*(J-1)+1:local_grid.Ny*J, :] +solver.storage .= R +F = solver.plan * solver.storage +λx, λy, λz = solver.my_eigenvalues +λx = λx[(J-1)*local_grid.Ny+1:J*local_grid.Ny, :, :] +@. F = -F / (λx + λy + λz) +if MPI.Comm_rank(MPI.COMM_WORLD) == 0 + F[1, 1, 1] = 0 +end +B = real(solver.plan \ F) + +Nx, Ny, Nz = 16, 16, 1 +Lx, Ly, Lz = 1, 2, 3 +Random.seed!(0) +R = rand(16, 16, 1) +F = fft(R) +λx = @. (2sin((0:Nx - 1) * π / Nx) / (Lx / Nx))^2 +λy = @. (2sin((0:Ny - 1) * π / Ny) / (Ly / Ny))^2 +λz = @. (2sin((0:Nz - 1) * π / Nz) / (Lz / Nz))^2 +λx = reshape(λx, Nx, 1, 1) +λy = reshape(λy, 1, Ny, 1) +λz = reshape(λz, 1, 1, Nz) +@. F = -F / (λx + λy + λz) +F[1, 1, 1] = 0 +B = real(ifft(F)) From 20d5f076bcafd07f87597f39b6d9dbcfc16eb033 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sat, 6 Feb 2021 12:30:59 -0500 Subject: [PATCH 061/100] Gotta send boundary points, not halos! --- src/Distributed/distributed_utils.jl | 44 +++++++++++++++++++++---- src/Distributed/halo_communication.jl | 4 +-- src/Distributed/test_distributed_mpi.jl | 10 +++--- 3 files changed, 45 insertions(+), 13 deletions(-) diff --git a/src/Distributed/distributed_utils.jl b/src/Distributed/distributed_utils.jl index 1fef722396..8ce5fe3827 100644 --- a/src/Distributed/distributed_utils.jl +++ b/src/Distributed/distributed_utils.jl @@ -6,6 +6,10 @@ using Oceananigans.Grids: # TODO: Move to Grids/grid_utils.jl +##### +##### Viewing halos +##### + west_halo(f::AbstractField{LX, LY, LZ}; include_corners=true) where {LX, LY, LZ} = include_corners ? view(f.data, left_halo_indices(LX, topology(f, 1), f.grid.Nx, f.grid.Hx), :, :) : view(f.data, left_halo_indices(LX, topology(f, 1), f.grid.Nx, f.grid.Hx), @@ -42,12 +46,6 @@ top_halo(f::AbstractField{LX, LY, LZ}; include_corners=true) where {LX, LY, LZ} interior_indices(LY, topology(f, 2), f.grid.Ny), left_halo_indices(LZ, topology(f, 3), f.grid.Nz, f.grid.Hz)) -bottom_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = - view(f.data, :, :, left_halo_indices(LZ, topology(f, 3), f.grid.Nz, f.grid.Hz), :) - -top_halo(f::AbstractField{LX, LY, LZ}) where {LX, LY, LZ} = - view(f.data, :, :, right_halo_indices(LZ, topology(f, 3), f.grid.Nz, f.grid.Hz), :) - underlying_west_halo(f, grid, location) = view(f.parent, underlying_left_halo_indices(location, topology(grid, 1), grid.Nx, grid.Hx), :, :) @@ -65,3 +63,37 @@ underlying_bottom_halo(f, grid, location) = underlying_top_halo(f, grid, location) = view(f.parent, :, :, underlying_right_halo_indices(location, topology(grid, 3), grid.Nz, grid.Hz)) + +##### +##### Viewing boundary grid points (used to fill other halos) +##### + +left_boundary_indices(loc, topo, N, H) = 1:H +left_boundary_indices(::Type{Nothing}, topo, N, H) = 1:0 # empty + +right_boundary_indices(loc, topo, N, H) = N-H+1:N +right_boundary_indices(::Type{Nothing}, topo, N, H) = 1:0 # empty + +underlying_left_boundary_indices(loc, topo, N, H) = 1+H:2H +underlying_left_boundary_indices(::Type{Nothing}, topo, N, H) = 1:0 # empty + +underlying_right_boundary_indices(loc, topo, N, H) = N+1:N+H +underlying_right_boundary_indices(::Type{Nothing}, topo, N, H) = 1:0 # empty + +underlying_west_boundary(f, grid, location) = + view(f.parent, underlying_left_boundary_indices(location, topology(grid, 1), grid.Nx, grid.Hx), :, :) + +underlying_east_boundary(f, grid, location) = + view(f.parent, underlying_right_boundary_indices(location, topology(grid, 1), grid.Nx, grid.Hx), :, :) + +underlying_south_boundary(f, grid, location) = + view(f.parent, :, underlying_left_boundary_indices(location, topology(grid, 2), grid.Ny, grid.Hy), :) + +underlying_north_boundary(f, grid, location) = + view(f.parent, :, underlying_right_boundary_indices(location, topology(grid, 2), grid.Ny, grid.Hy), :) + +underlying_bottom_boundary(f, grid, location) = + view(f.parent, :, :, underlying_left_boundary_indices(location, topology(grid, 3), grid.Nz, grid.Hz)) + +underlying_top_boundary(f, grid, location) = + view(f.parent, :, :, underlying_right_boundary_indices(location, topology(grid, 3), grid.Nz, grid.Hz)) diff --git a/src/Distributed/halo_communication.jl b/src/Distributed/halo_communication.jl index bc82f48fd3..d4515a7de0 100644 --- a/src/Distributed/halo_communication.jl +++ b/src/Distributed/halo_communication.jl @@ -132,12 +132,12 @@ end for side in sides side_str = string(side) send_side_halo = Symbol("send_$(side)_halo") - underlying_side_halo = Symbol("underlying_$(side)_halo") + underlying_side_boundary = Symbol("underlying_$(side)_boundary") side_send_tag = Symbol("$(side)_send_tag") @eval begin function $send_side_halo(c, grid, c_location, my_rank, rank_to_send_to) - send_buffer = $underlying_side_halo(c, grid, c_location) + send_buffer = $underlying_side_boundary(c, grid, c_location) send_tag = $side_send_tag(my_rank, rank_to_send_to) @debug "Sending " * $side_str * " halo: my_rank=$my_rank, rank_to_send_to=$rank_to_send_to, send_tag=$send_tag" diff --git a/src/Distributed/test_distributed_mpi.jl b/src/Distributed/test_distributed_mpi.jl index f52a402ab0..e860c22cbd 100644 --- a/src/Distributed/test_distributed_mpi.jl +++ b/src/Distributed/test_distributed_mpi.jl @@ -353,7 +353,7 @@ function run_triply_periodic_halo_communication_tests_with_411_ranks() dm = DistributedModel(architecture=arch, grid=full_grid) for field in fields(dm.model) - set!(field, arch.my_rank) + interior(field) .= arch.my_rank fill_halo_regions!(field, arch) @test all(east_halo(field) .== arch.connectivity.east) @@ -376,7 +376,7 @@ function run_triply_periodic_halo_communication_tests_with_141_ranks() dm = DistributedModel(architecture=arch, grid=full_grid) for field in fields(dm.model) - set!(field, arch.my_rank) + interior(field) .= arch.my_rank fill_halo_regions!(field, arch) @test all(north_halo(field) .== arch.connectivity.north) @@ -399,7 +399,7 @@ function run_triply_periodic_halo_communication_tests_with_114_ranks() dm = DistributedModel(architecture=arch, grid=full_grid) for field in fields(dm.model) - set!(field, arch.my_rank) + interior(field) .= arch.my_rank fill_halo_regions!(field, arch) @test all(top_halo(field) .== arch.connectivity.top) @@ -422,7 +422,7 @@ function run_triply_periodic_halo_communication_tests_with_221_ranks() dm = DistributedModel(architecture=arch, grid=full_grid) for field in fields(dm.model) - set!(field, arch.my_rank) + interior(field) .= arch.my_rank fill_halo_regions!(field, arch) @test all(east_halo(field) .== arch.connectivity.east) @@ -469,7 +469,7 @@ end run_triply_periodic_bc_injection_tests_with_221_ranks() end - # TODO: Larger halos! + # TODO: Test larger halos! @testset "Halo communication" begin @info " Testing halo communication..." run_triply_periodic_halo_communication_tests_with_411_ranks() From 5e14925a0586eef260e2d100c6efaa4d56a4896b Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sat, 6 Feb 2021 12:34:14 -0500 Subject: [PATCH 062/100] Distributed Poisson solver solution is divergence-free! --- .../distributed_fft_based_poisson_solver.jl | 23 +++--------- .../test_distributed_poisson_solvers.jl | 37 +------------------ 2 files changed, 7 insertions(+), 53 deletions(-) diff --git a/src/Distributed/distributed_fft_based_poisson_solver.jl b/src/Distributed/distributed_fft_based_poisson_solver.jl index 480e163788..1f3fce1fa5 100644 --- a/src/Distributed/distributed_fft_based_poisson_solver.jl +++ b/src/Distributed/distributed_fft_based_poisson_solver.jl @@ -4,7 +4,7 @@ struct DistributedFFTBasedPoissonSolver{P, F, L, λ, S} plan :: P full_grid :: F my_grid :: L - my_eigenvalues :: λ + eigenvalues :: λ storage :: S end @@ -24,27 +24,22 @@ function DistributedFFTBasedPoissonSolver(arch, full_grid, local_grid) λy = poisson_eigenvalues(full_grid.Ny, full_grid.Ly, 2, TY()) λz = poisson_eigenvalues(full_grid.Nz, full_grid.Lz, 3, TZ()) - my_eigenvalues = (; λx, λy, λz) + I, J, K = arch.my_index + λx = λx[(J-1)*local_grid.Ny+1:J*local_grid.Ny, :, :] - # I, J, K = arch.my_index - # my_eigenvalues = ( - # λx = λx[(I-1)*local_grid.Nx+1:I*local_grid.Nx, :, :], - # λy = λy[:, (J-1)*local_grid.Ny+1:J*local_grid.Ny, :], - # λz = λz[:, :, (K-1)*local_grid.Nz+1:K*local_grid.Nz] - # ) + eigenvalues = (; λx, λy, λz) # transform = PencilFFTs.Transforms.FFT!() transform = PencilFFTs.Transforms.FFT() proc_dims = (arch.ranks[2], arch.ranks[3]) plan = PencilFFTPlan(size(full_grid), transform, proc_dims, MPI.COMM_WORLD) - storage = allocate_input(plan) - return DistributedFFTBasedPoissonSolver(plan, full_grid, local_grid, my_eigenvalues, storage) + return DistributedFFTBasedPoissonSolver(plan, full_grid, local_grid, eigenvalues, storage) end function solve_poisson_equation!(solver::DistributedFFTBasedPoissonSolver) - λx, λy, λz = solver.my_eigenvalues + λx, λy, λz = solver.eigenvalues # https://jipolanco.github.io/PencilFFTs.jl/dev/PencilFFTs/#PencilFFTs.allocate_input # RHS = ϕ = first(solver.storage) @@ -54,12 +49,6 @@ function solve_poisson_equation!(solver::DistributedFFTBasedPoissonSolver) # ϕ = solver.plan * solver.storage ϕ = solver.plan * RHS - @show size(RHS) - @show size(ϕ) - - λx = reshape(λx, 1, solver.my_grid.Nx, 1) - λy = reshape(λy, solver.my_grid.Ny, 1, 1) - # Solve the discrete Poisson equation. # @. ϕ = -RHS / (λx + λy + λz) @. ϕ = -ϕ / (λx + λy + λz) diff --git a/src/Distributed/test_distributed_poisson_solvers.jl b/src/Distributed/test_distributed_poisson_solvers.jl index 993004b807..785418e6c5 100644 --- a/src/Distributed/test_distributed_poisson_solvers.jl +++ b/src/Distributed/test_distributed_poisson_solvers.jl @@ -77,40 +77,5 @@ function divergence_free_poisson_solution_triply_periodic() interior(ϕ) .= real(solver.storage) compute_∇²!(∇²ϕ, ϕ, arch, local_grid) - return nothing -end - -topo = (Periodic, Periodic, Periodic) -full_grid = RegularCartesianGrid(topology=topo, size=(16, 16, 1), extent=(1, 2, 3)) -arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) -dm = DistributedModel(architecture=arch, grid=full_grid) -local_grid = dm.model.grid -solver = DistributedFFTBasedPoissonSolver(arch, full_grid, local_grid) -Random.seed!(0) -R = rand(size(full_grid)...) -I, J, K = arch.my_index -R = R[:, local_grid.Ny*(J-1)+1:local_grid.Ny*J, :] -solver.storage .= R -F = solver.plan * solver.storage -λx, λy, λz = solver.my_eigenvalues -λx = λx[(J-1)*local_grid.Ny+1:J*local_grid.Ny, :, :] -@. F = -F / (λx + λy + λz) -if MPI.Comm_rank(MPI.COMM_WORLD) == 0 - F[1, 1, 1] = 0 + return @test R ≈ interior(∇²ϕ) end -B = real(solver.plan \ F) - -Nx, Ny, Nz = 16, 16, 1 -Lx, Ly, Lz = 1, 2, 3 -Random.seed!(0) -R = rand(16, 16, 1) -F = fft(R) -λx = @. (2sin((0:Nx - 1) * π / Nx) / (Lx / Nx))^2 -λy = @. (2sin((0:Ny - 1) * π / Ny) / (Ly / Ny))^2 -λz = @. (2sin((0:Nz - 1) * π / Nz) / (Lz / Nz))^2 -λx = reshape(λx, Nx, 1, 1) -λy = reshape(λy, 1, Ny, 1) -λz = reshape(λz, 1, 1, Nz) -@. F = -F / (λx + λy + λz) -F[1, 1, 1] = 0 -B = real(ifft(F)) From 42c36e40c8d37e1f52d7749241002e40383f81d3 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sat, 6 Feb 2021 12:57:09 -0500 Subject: [PATCH 063/100] In-place distributed FFTs --- .../distributed_fft_based_poisson_solver.jl | 17 +++++------------ src/Distributed/test_distributed_mpi.jl | 2 ++ .../test_distributed_poisson_solvers.jl | 12 ++++++++---- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/Distributed/distributed_fft_based_poisson_solver.jl b/src/Distributed/distributed_fft_based_poisson_solver.jl index 1f3fce1fa5..8ec92d10a5 100644 --- a/src/Distributed/distributed_fft_based_poisson_solver.jl +++ b/src/Distributed/distributed_fft_based_poisson_solver.jl @@ -29,8 +29,7 @@ function DistributedFFTBasedPoissonSolver(arch, full_grid, local_grid) eigenvalues = (; λx, λy, λz) - # transform = PencilFFTs.Transforms.FFT!() - transform = PencilFFTs.Transforms.FFT() + transform = PencilFFTs.Transforms.FFT!() proc_dims = (arch.ranks[2], arch.ranks[3]) plan = PencilFFTPlan(size(full_grid), transform, proc_dims, MPI.COMM_WORLD) storage = allocate_input(plan) @@ -41,17 +40,12 @@ end function solve_poisson_equation!(solver::DistributedFFTBasedPoissonSolver) λx, λy, λz = solver.eigenvalues - # https://jipolanco.github.io/PencilFFTs.jl/dev/PencilFFTs/#PencilFFTs.allocate_input - # RHS = ϕ = first(solver.storage) - RHS = ϕ = solver.storage - # Apply forward transforms. - # ϕ = solver.plan * solver.storage - ϕ = solver.plan * RHS + solver.plan * solver.storage # Solve the discrete Poisson equation. - # @. ϕ = -RHS / (λx + λy + λz) - @. ϕ = -ϕ / (λx + λy + λz) + RHS = ϕ = solver.storage[2] + @. ϕ = - RHS / (λx + λy + λz) # Setting DC component of the solution (the mean) to be zero. This is also # necessary because the source term to the Poisson equation has zero mean @@ -61,8 +55,7 @@ function solve_poisson_equation!(solver::DistributedFFTBasedPoissonSolver) end # Apply backward transforms. - # solver.plan \ solver.storage - solver.storage .= solver.plan \ ϕ + solver.plan \ solver.storage return nothing end diff --git a/src/Distributed/test_distributed_mpi.jl b/src/Distributed/test_distributed_mpi.jl index e860c22cbd..1b33d98ade 100644 --- a/src/Distributed/test_distributed_mpi.jl +++ b/src/Distributed/test_distributed_mpi.jl @@ -477,6 +477,8 @@ end run_triply_periodic_halo_communication_tests_with_114_ranks() # run_triply_periodic_halo_communication_tests_with_221_ranks() end + + include("test_distributed_poisson_solvers.jl") end # MPI.Finalize() diff --git a/src/Distributed/test_distributed_poisson_solvers.jl b/src/Distributed/test_distributed_poisson_solvers.jl index 785418e6c5..ce8b5443b0 100644 --- a/src/Distributed/test_distributed_poisson_solvers.jl +++ b/src/Distributed/test_distributed_poisson_solvers.jl @@ -63,8 +63,7 @@ function divergence_free_poisson_solution_triply_periodic() solver = DistributedFFTBasedPoissonSolver(arch, full_grid, local_grid) R = random_divergent_source_term(Float64, child_architecture(arch), local_grid) - # first(solver.storage) .= R - solver.storage .= R + first(solver.storage) .= R solve_poisson_equation!(solver) @@ -74,8 +73,13 @@ function divergence_free_poisson_solution_triply_periodic() ϕ = CenterField(Float64, child_architecture(arch), local_grid, p_bcs) # "pressure" ∇²ϕ = CenterField(Float64, child_architecture(arch), local_grid, p_bcs) - interior(ϕ) .= real(solver.storage) + interior(ϕ) .= real(first(solver.storage)) compute_∇²!(∇²ϕ, ϕ, arch, local_grid) - return @test R ≈ interior(∇²ϕ) + return R ≈ interior(∇²ϕ) +end + +@testset "Distributed FFT-based Poisson solver" begin + @info " Testing distributed FFT-based Poisson solver..." + @test divergence_free_poisson_solution_triply_periodic() end From 999d1f4ff967cb1826ffa735752c7a660d1b1241 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sat, 6 Feb 2021 13:17:48 -0500 Subject: [PATCH 064/100] Distributed Poisson solver needs work for rectangular grids --- src/Distributed/distributed_fft_based_poisson_solver.jl | 2 ++ src/Distributed/distributed_model.jl | 2 ++ src/Distributed/test_distributed_poisson_solvers.jl | 9 +++++---- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/Distributed/distributed_fft_based_poisson_solver.jl b/src/Distributed/distributed_fft_based_poisson_solver.jl index 8ec92d10a5..b6d8399cf0 100644 --- a/src/Distributed/distributed_fft_based_poisson_solver.jl +++ b/src/Distributed/distributed_fft_based_poisson_solver.jl @@ -1,5 +1,7 @@ using PencilFFTs +import Oceananigans.Solvers: solve_poisson_equation! + struct DistributedFFTBasedPoissonSolver{P, F, L, λ, S} plan :: P full_grid :: F diff --git a/src/Distributed/distributed_model.jl b/src/Distributed/distributed_model.jl index c2d02c16d5..0caf77e4fa 100644 --- a/src/Distributed/distributed_model.jl +++ b/src/Distributed/distributed_model.jl @@ -7,6 +7,7 @@ include("distributed_utils.jl") include("distributed_architectures.jl") include("halo_communication_bcs.jl") include("halo_communication.jl") +include("distributed_fft_based_poisson_solver.jl") ##### ##### Distributed model struct and constructor @@ -77,6 +78,7 @@ function DistributedModel(; architecture, grid, boundary_conditions=nothing, mod architecture = child_architecture(architecture), grid = my_grid, boundary_conditions = communicative_bcs, + # pressure_solver = DistributedFFTBasedPoissonSolver(architecture, grid, my_grid), model_kwargs... ) diff --git a/src/Distributed/test_distributed_poisson_solvers.jl b/src/Distributed/test_distributed_poisson_solvers.jl index ce8b5443b0..0feae84a48 100644 --- a/src/Distributed/test_distributed_poisson_solvers.jl +++ b/src/Distributed/test_distributed_poisson_solvers.jl @@ -53,10 +53,10 @@ function compute_∇²!(∇²ϕ, ϕ, arch, grid) return nothing end -function divergence_free_poisson_solution_triply_periodic() +function divergence_free_poisson_solution_triply_periodic(grid_points, ranks) topo = (Periodic, Periodic, Periodic) - full_grid = RegularCartesianGrid(topology=topo, size=(16, 16, 1), extent=(1, 2, 3)) - arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) + full_grid = RegularCartesianGrid(topology=topo, size=grid_points, extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=ranks) dm = DistributedModel(architecture=arch, grid=full_grid) local_grid = dm.model.grid @@ -81,5 +81,6 @@ end @testset "Distributed FFT-based Poisson solver" begin @info " Testing distributed FFT-based Poisson solver..." - @test divergence_free_poisson_solution_triply_periodic() + @test divergence_free_poisson_solution_triply_periodic((16, 16, 1), (1, 4, 1)) + @test divergence_free_poisson_solution_triply_periodic((64, 64, 1), (1, 4, 1)) end From 1b2bda26776b7fb82d837130687cef26fbfa23d3 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sat, 6 Feb 2021 15:01:49 -0500 Subject: [PATCH 065/100] Pass distributed pressure solver to model --- src/Distributed/distributed_model.jl | 5 ++++- src/Distributed/test_distributed_mpi.jl | 24 ++++++++++++------------ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/Distributed/distributed_model.jl b/src/Distributed/distributed_model.jl index 0caf77e4fa..4d0b93e0e3 100644 --- a/src/Distributed/distributed_model.jl +++ b/src/Distributed/distributed_model.jl @@ -74,11 +74,14 @@ function DistributedModel(; architecture, grid, boundary_conditions=nothing, mod ## Construct local model + pressure_solver = haskey(model_kwargs, :pressure_solver) ? Dict(model_kwargs)[:pressure_solver] : + DistributedFFTBasedPoissonSolver(architecture, grid, my_grid) + my_model = IncompressibleModel(; architecture = child_architecture(architecture), grid = my_grid, boundary_conditions = communicative_bcs, - # pressure_solver = DistributedFFTBasedPoissonSolver(architecture, grid, my_grid), + pressure_solver = pressure_solver, model_kwargs... ) diff --git a/src/Distributed/test_distributed_mpi.jl b/src/Distributed/test_distributed_mpi.jl index 1b33d98ade..6af7f94dea 100644 --- a/src/Distributed/test_distributed_mpi.jl +++ b/src/Distributed/test_distributed_mpi.jl @@ -194,7 +194,7 @@ function run_triply_periodic_local_grid_tests_with_411_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) - dm = DistributedModel(architecture=arch, grid=full_grid) + dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) local_grid = dm.model.grid @@ -214,7 +214,7 @@ function run_triply_periodic_local_grid_tests_with_141_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) - dm = DistributedModel(architecture=arch, grid=full_grid) + dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) local_grid = dm.model.grid @@ -234,7 +234,7 @@ function run_triply_periodic_local_grid_tests_with_114_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) - dm = DistributedModel(architecture=arch, grid=full_grid) + dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) local_grid = dm.model.grid @@ -254,7 +254,7 @@ function run_triply_periodic_local_grid_tests_with_221_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) - dm = DistributedModel(architecture=arch, grid=full_grid) + dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) i, j, k = arch.my_index local_grid = dm.model.grid @@ -278,7 +278,7 @@ function run_triply_periodic_bc_injection_tests_with_411_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) - dm = DistributedModel(architecture=arch, grid=full_grid) + dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) for field in fields(dm.model) fbcs = field.boundary_conditions @@ -295,7 +295,7 @@ function run_triply_periodic_bc_injection_tests_with_141_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) - dm = DistributedModel(architecture=arch, grid=full_grid) + dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) for field in fields(dm.model) fbcs = field.boundary_conditions @@ -312,7 +312,7 @@ function run_triply_periodic_bc_injection_tests_with_114_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) - dm = DistributedModel(architecture=arch, grid=full_grid) + dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) for field in fields(dm.model) fbcs = field.boundary_conditions @@ -329,7 +329,7 @@ function run_triply_periodic_bc_injection_tests_with_221_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) - dm = DistributedModel(architecture=arch, grid=full_grid) + dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) for field in fields(dm.model) fbcs = field.boundary_conditions @@ -350,7 +350,7 @@ function run_triply_periodic_halo_communication_tests_with_411_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularCartesianGrid(topology=topo, size=(8, 6, 4), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) - dm = DistributedModel(architecture=arch, grid=full_grid) + dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) for field in fields(dm.model) interior(field) .= arch.my_rank @@ -373,7 +373,7 @@ function run_triply_periodic_halo_communication_tests_with_141_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularCartesianGrid(topology=topo, size=(3, 8, 2), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) - dm = DistributedModel(architecture=arch, grid=full_grid) + dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) for field in fields(dm.model) interior(field) .= arch.my_rank @@ -396,7 +396,7 @@ function run_triply_periodic_halo_communication_tests_with_114_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularCartesianGrid(topology=topo, size=(3, 5, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) - dm = DistributedModel(architecture=arch, grid=full_grid) + dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) for field in fields(dm.model) interior(field) .= arch.my_rank @@ -419,7 +419,7 @@ function run_triply_periodic_halo_communication_tests_with_221_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 3), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) - dm = DistributedModel(architecture=arch, grid=full_grid) + dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) for field in fields(dm.model) interior(field) .= arch.my_rank From 080db7400f0da654d4782825b10087280775a978 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sat, 6 Feb 2021 16:29:21 -0500 Subject: [PATCH 066/100] MPI incompressible turbulence! --- src/Distributed/halo_communication.jl | 4 +- src/Distributed/mpi_turbulence.jl | 88 +++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 2 deletions(-) create mode 100644 src/Distributed/mpi_turbulence.jl diff --git a/src/Distributed/halo_communication.jl b/src/Distributed/halo_communication.jl index d4515a7de0..eba4fde6cd 100644 --- a/src/Distributed/halo_communication.jl +++ b/src/Distributed/halo_communication.jl @@ -58,10 +58,10 @@ end ##### Filling halos for halo communication boundary conditions ##### -fill_halo_regions!(field::AbstractField{LX, LY, LZ}, arch::AbstractMultiArchitecture, args...) where {LX, LY, LZ} = +fill_halo_regions!(field::AbstractField{LX, LY, LZ}, arch, args...) where {LX, LY, LZ} = fill_halo_regions!(field.data, field.boundary_conditions, arch, field.grid, (LX, LY, LZ), args...) -function fill_halo_regions!(c::AbstractArray, bcs, arch::AbstractMultiArchitecture, grid, c_location, args...) +function fill_halo_regions!(c::AbstractArray, bcs, arch, grid, c_location, args...) barrier = Event(device(child_architecture(arch))) diff --git a/src/Distributed/mpi_turbulence.jl b/src/Distributed/mpi_turbulence.jl new file mode 100644 index 0000000000..eab5a3d321 --- /dev/null +++ b/src/Distributed/mpi_turbulence.jl @@ -0,0 +1,88 @@ +include("distributed_model.jl") + +using Statistics + +using Oceananigans.Fields +using Oceananigans.OutputWriters +using Oceananigans.AbstractOperations + +using Oceananigans.Solvers: calculate_pressure_right_hand_side!, copy_pressure! + +import Oceananigans.Solvers: solve_for_pressure! + +child_architecture(::CPU) = CPU() + +function solve_for_pressure!(pressure, solver::DistributedFFTBasedPoissonSolver, arch, grid, Δt, U★) + + RHS = first(solver.storage) + + rhs_event = launch!(arch, grid, :xyz, + calculate_pressure_right_hand_side!, RHS, arch, grid, Δt, U★, + dependencies = Event(device(arch))) + + wait(device(arch), rhs_event) + + solve_poisson_equation!(solver) + + ϕ = first(solver.storage) + + copy_event = launch!(arch, grid, :xyz, + copy_pressure!, pressure, ϕ, arch, grid, + dependencies = Event(device(arch))) + + wait(device(arch), copy_event) + + return nothing +end + +topo = (Periodic, Periodic, Periodic) +full_grid = RegularCartesianGrid(topology=topo, size=(128, 128, 1), extent=(2π, 2π, 1)) +arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) +dm = DistributedModel(architecture=arch, grid=full_grid, closure=IsotropicDiffusivity(ν=1e-3)) + +model = dm.model +u₀ = rand(size(model.grid)...) +u₀ .-= mean(u₀) +set!(model, u=0.01u₀, v=0.01u₀) + +# [time_step!(model, 0.1) for _ in 1:10] + +progress(sim) = @info "Iteration: $(sim.model.clock.iteration), time: $(sim.model.clock.time)" +simulation = Simulation(model, Δt=0.1, stop_time=50, iteration_interval=1, progress=progress) + +u, v, w = model.velocities +outputs = (ζ=ComputedField(∂x(v) - ∂y(u)),) +simulation.output_writers[:fields] = NetCDFOutputWriter(model, outputs, filepath="mpi_turbulence_rank$(arch.my_rank).nc", schedule=IterationInterval(1)) + +run!(simulation) + +using Printf +using NCDatasets +using CairoMakie + +if arch.my_rank == 0 + ranks = prod(arch.ranks) + + ds = [NCDataset("mpi_turbulence_rank$r.nc") for r in 0:ranks-1] + + frame = Node(1) + title = @lift @sprintf("MPI turbulence t = %.2f", ds[1]["time"][$frame]) + ζ = [@lift ds[r]["ζ"][:, :, 1, $frame] for r in 1:ranks] + + fig = Figure(resolution=(1600, 1600)) + + for r in 1:ranks + ax = fig[0, 1] = Axis(fig, title="rank $r") # , xlabel="x", ylabel="y") + hm = CairoMakie.heatmap!(ax, ds[r]["xC"], ds[r]["yC"], ζ[r], colormap=:balance, colorrange=(-0.01, 0.01)) + r == ranks && (cb1 = fig[:, 2] = Colorbar(fig, hm, width=30)) + end + + supertitle = fig[0, :] = Label(fig, title, textsize=30) + + record(fig, "mpi_turbulence.mp4", 1:10:length(ds[1]["time"]), framerate=15) do n + @info "Animating MPI turbulence $var frame $n/$(length(ds[1]["time"]))..." + frame[] = n + end + + [close(d) for d in ds] +end From 631d7efa4ad66cbaff3576762ef48489e518e293 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sat, 6 Feb 2021 17:50:42 -0500 Subject: [PATCH 067/100] Gotta communicate pressure halos --- src/Distributed/distributed_architectures.jl | 2 +- src/Distributed/distributed_model.jl | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Distributed/distributed_architectures.jl b/src/Distributed/distributed_architectures.jl index 796ef3a708..848aae5706 100644 --- a/src/Distributed/distributed_architectures.jl +++ b/src/Distributed/distributed_architectures.jl @@ -1,6 +1,6 @@ using Oceananigans.Architectures -using Oceananigans.Grids: validate_tupled_argument +using Oceananigans.Grids: topology, validate_tupled_argument # TODO: Put connectivity inside architecture? MPI should be initialize so you can construct it in there. # Might have to make it MultiCPU(; grid, ranks) diff --git a/src/Distributed/distributed_model.jl b/src/Distributed/distributed_model.jl index 4d0b93e0e3..04c7c94d0d 100644 --- a/src/Distributed/distributed_model.jl +++ b/src/Distributed/distributed_model.jl @@ -77,11 +77,19 @@ function DistributedModel(; architecture, grid, boundary_conditions=nothing, mod pressure_solver = haskey(model_kwargs, :pressure_solver) ? Dict(model_kwargs)[:pressure_solver] : DistributedFFTBasedPoissonSolver(architecture, grid, my_grid) + p_bcs = PressureBoundaryConditions(grid) + p_bcs = inject_halo_communication_boundary_conditions(p_bcs, my_rank, my_connectivity) + + pHY′ = CenterField(child_architecture(architecture), my_grid, p_bcs) + pNHS = CenterField(child_architecture(architecture), my_grid, p_bcs) + pressures = (pHY′=pHY′, pNHS=pNHS) + my_model = IncompressibleModel(; architecture = child_architecture(architecture), grid = my_grid, boundary_conditions = communicative_bcs, pressure_solver = pressure_solver, + pressures = pressures, model_kwargs... ) From c6b92eb397b0ca28730c28a98800da0832150b06 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sat, 6 Feb 2021 19:43:45 -0500 Subject: [PATCH 068/100] Beautiful MPI turbulence --- src/Distributed/distributed_model.jl | 4 +- src/Distributed/mpi_turbulence.jl | 61 +++++++++++++++++-------- src/Distributed/test_distributed_mpi.jl | 1 + 3 files changed, 45 insertions(+), 21 deletions(-) diff --git a/src/Distributed/distributed_model.jl b/src/Distributed/distributed_model.jl index 04c7c94d0d..0570918522 100644 --- a/src/Distributed/distributed_model.jl +++ b/src/Distributed/distributed_model.jl @@ -3,6 +3,8 @@ using MPI using Oceananigans using Oceananigans.Grids +using Oceananigans.Grids: halo_size + include("distributed_utils.jl") include("distributed_architectures.jl") include("halo_communication_bcs.jl") @@ -48,7 +50,7 @@ function DistributedModel(; architecture, grid, boundary_conditions=nothing, mod z₁, z₂ = zL + (k-1)*lz, zL + k*lz # FIXME: local grid might have different topology! - my_grid = RegularCartesianGrid(topology=topology(grid), size=(nx, ny, nz), x=(x₁, x₂), y=(y₁, y₂), z=(z₁, z₂)) + my_grid = RegularCartesianGrid(topology=topology(grid), size=(nx, ny, nz), x=(x₁, x₂), y=(y₁, y₂), z=(z₁, z₂), halo=halo_size(grid)) ## Change appropriate boundary conditions to halo communication BCs diff --git a/src/Distributed/mpi_turbulence.jl b/src/Distributed/mpi_turbulence.jl index eab5a3d321..296a4361ac 100644 --- a/src/Distributed/mpi_turbulence.jl +++ b/src/Distributed/mpi_turbulence.jl @@ -1,10 +1,16 @@ include("distributed_model.jl") +using MPI + +MPI.Initialized() || MPI.Init() + using Statistics +using Oceananigans.Advection using Oceananigans.Fields using Oceananigans.OutputWriters using Oceananigans.AbstractOperations +using Oceananigans.Utils using Oceananigans.Solvers: calculate_pressure_right_hand_side!, copy_pressure! @@ -36,23 +42,30 @@ function solve_for_pressure!(pressure, solver::DistributedFFTBasedPoissonSolver, end topo = (Periodic, Periodic, Periodic) -full_grid = RegularCartesianGrid(topology=topo, size=(128, 128, 1), extent=(2π, 2π, 1)) +full_grid = RegularCartesianGrid(topology=topo, size=(512, 512, 1), extent=(4π, 4π, 1), halo=(3, 3, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) -dm = DistributedModel(architecture=arch, grid=full_grid, closure=IsotropicDiffusivity(ν=1e-3)) -model = dm.model -u₀ = rand(size(model.grid)...) -u₀ .-= mean(u₀) -set!(model, u=0.01u₀, v=0.01u₀) +dm = DistributedModel( + architecture = arch, + grid = full_grid, + timestepper = :RungeKutta3, + advection = WENO5(), + closure = IsotropicDiffusivity(ν=1e-5) +) -# [time_step!(model, 0.1) for _ in 1:10] +model = dm.model +u₀ = rand(size(model.grid)...); +u₀ .-= mean(u₀); +set!(model, u=u₀, v=u₀) progress(sim) = @info "Iteration: $(sim.model.clock.iteration), time: $(sim.model.clock.time)" -simulation = Simulation(model, Δt=0.1, stop_time=50, iteration_interval=1, progress=progress) +simulation = Simulation(model, Δt=0.05, stop_time=50, iteration_interval=1, progress=progress) u, v, w = model.velocities outputs = (ζ=ComputedField(∂x(v) - ∂y(u)),) -simulation.output_writers[:fields] = NetCDFOutputWriter(model, outputs, filepath="mpi_turbulence_rank$(arch.my_rank).nc", schedule=IterationInterval(1)) +simulation.output_writers[:fields] = NetCDFOutputWriter(model, outputs, filepath="mpi_turbulence_rank$(arch.my_rank).nc", schedule=TimeInterval(0.1)) + +MPI.Barrier(MPI.COMM_WORLD) run!(simulation) @@ -61,26 +74,34 @@ using NCDatasets using CairoMakie if arch.my_rank == 0 - ranks = prod(arch.ranks) + ranks = 4 ds = [NCDataset("mpi_turbulence_rank$r.nc") for r in 0:ranks-1] frame = Node(1) - title = @lift @sprintf("MPI turbulence t = %.2f", ds[1]["time"][$frame]) + plot_title = @lift @sprintf("Oceananigans.jl + MPI: 2D turbulence t = %.1f", ds[1]["time"][$frame]) ζ = [@lift ds[r]["ζ"][:, :, 1, $frame] for r in 1:ranks] - fig = Figure(resolution=(1600, 1600)) - - for r in 1:ranks - ax = fig[0, 1] = Axis(fig, title="rank $r") # , xlabel="x", ylabel="y") - hm = CairoMakie.heatmap!(ax, ds[r]["xC"], ds[r]["yC"], ζ[r], colormap=:balance, colorrange=(-0.01, 0.01)) - r == ranks && (cb1 = fig[:, 2] = Colorbar(fig, hm, width=30)) + fig = Figure(resolution=(1600, 1200)) + + for r in reverse(1:ranks) + ax = fig[ranks-r+1, 1] = Axis(fig, ylabel="rank $(r-1)", xticks = MultiplesTicks(9, pi, "π"), yticks = MultiplesTicks(3, pi, "π")) + hm = CairoMakie.heatmap!(ax, ds[r]["xF"], ds[r]["yF"], ζ[r], colormap=:balance, colorrange=(-2, 2)) + r > 1 && hidexdecorations!(ax, grid=false) + if r == 1 + cb = fig[:, 2] = Colorbar(fig, hm, label = "Vorticity ζ = ∂x(v) - ∂y(u)", width=30) + cb.height = Relative(2/3) + end + xlims!(ax, [0, 4π]) + ylims!(ax, [(r-1)*π, r*π]) end - supertitle = fig[0, :] = Label(fig, title, textsize=30) + supertitle = fig[0, :] = Label(fig, plot_title, textsize=30) + + trim!(fig.layout) - record(fig, "mpi_turbulence.mp4", 1:10:length(ds[1]["time"]), framerate=15) do n - @info "Animating MPI turbulence $var frame $n/$(length(ds[1]["time"]))..." + record(fig, "mpi_turbulence.mp4", 1:length(ds[1]["time"])-1, framerate=30) do n + @info "Animating MPI turbulence frame $n/$(length(ds[1]["time"]))..." frame[] = n end diff --git a/src/Distributed/test_distributed_mpi.jl b/src/Distributed/test_distributed_mpi.jl index 6af7f94dea..99b9a38708 100644 --- a/src/Distributed/test_distributed_mpi.jl +++ b/src/Distributed/test_distributed_mpi.jl @@ -461,6 +461,7 @@ end run_triply_periodic_local_grid_tests_with_221_ranks() end + # Test pressure bcs! @testset "Injection of halo communication BCs" begin @info " Testing injection of halo communication BCs..." run_triply_periodic_bc_injection_tests_with_411_ranks() From eddbdb0bd6a2f155e2e5b8ca9936defffb4bc136 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Sat, 6 Feb 2021 20:45:18 -0500 Subject: [PATCH 069/100] Fix bug in grid used for distributed pressure BCs --- src/Distributed/distributed_model.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Distributed/distributed_model.jl b/src/Distributed/distributed_model.jl index 0570918522..f712d3c00b 100644 --- a/src/Distributed/distributed_model.jl +++ b/src/Distributed/distributed_model.jl @@ -79,7 +79,7 @@ function DistributedModel(; architecture, grid, boundary_conditions=nothing, mod pressure_solver = haskey(model_kwargs, :pressure_solver) ? Dict(model_kwargs)[:pressure_solver] : DistributedFFTBasedPoissonSolver(architecture, grid, my_grid) - p_bcs = PressureBoundaryConditions(grid) + p_bcs = PressureBoundaryConditions(my_grid) p_bcs = inject_halo_communication_boundary_conditions(p_bcs, my_rank, my_connectivity) pHY′ = CenterField(child_architecture(architecture), my_grid, p_bcs) From 72cb4e29bef83b92fc7a8786199d862438a5a849 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Thu, 25 Feb 2021 13:43:31 -0500 Subject: [PATCH 070/100] Make it easier to run MPI tests --- src/Distributed/distributed_architectures.jl | 1 + src/Distributed/test_distributed_mpi.jl | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/Distributed/distributed_architectures.jl b/src/Distributed/distributed_architectures.jl index 848aae5706..0b6cc114e6 100644 --- a/src/Distributed/distributed_architectures.jl +++ b/src/Distributed/distributed_architectures.jl @@ -15,6 +15,7 @@ struct MultiCPU{R, I, ρ, C} <: AbstractMultiArchitecture end child_architecture(::MultiCPU) = CPU() +child_architecture(::CPU) = CPU() ##### ##### Converting between index and MPI rank taking k as the fast index diff --git a/src/Distributed/test_distributed_mpi.jl b/src/Distributed/test_distributed_mpi.jl index 99b9a38708..4320f68390 100644 --- a/src/Distributed/test_distributed_mpi.jl +++ b/src/Distributed/test_distributed_mpi.jl @@ -7,6 +7,8 @@ using Oceananigans.BoundaryConditions: fill_halo_regions! MPI.Initialized() || MPI.Init() comm = MPI.COMM_WORLD +include("distributed_model.jl") + # Right now just testing with 4 ranks! mpi_ranks = MPI.Comm_size(comm) @assert mpi_ranks == 4 From 865164e5f7bc9d43ff0508602cd8991e68436a8b Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Thu, 4 Mar 2021 13:16:06 -0500 Subject: [PATCH 071/100] Add MPI.jl and PencilFFTs.jl as dependencies --- Manifest.toml | 202 +++++++++++++++++++++++++++++++++++++------------- Project.toml | 2 + 2 files changed, 154 insertions(+), 50 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index 5b65c6b50b..4d3e9d6205 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -2,9 +2,9 @@ [[AbstractFFTs]] deps = ["LinearAlgebra"] -git-tree-sha1 = "051c95d6836228d120f5f4b984dd5aba1624f716" +git-tree-sha1 = "485ee0867925449198280d4af84bdb46a2a404d0" uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" -version = "0.5.0" +version = "1.0.1" [[Adapt]] deps = ["LinearAlgebra"] @@ -12,11 +12,17 @@ git-tree-sha1 = "ffcfa2d345aaee0ef3d8346a073d5dd03c983ebe" uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" version = "3.2.0" +[[ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" + +[[ArrayInterface]] +deps = ["IfElse", "LinearAlgebra", "Requires", "SparseArrays", "Static"] +git-tree-sha1 = "e7edcc1ac140cce87b7442ff0fa88b5f19fb71fa" +uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" +version = "3.1.3" + [[Artifacts]] -deps = ["Pkg"] -git-tree-sha1 = "c30985d8821e0cd73870b17b0ed0ce6dc44cb744" uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" -version = "1.3.0" [[BFloat16s]] deps = ["LinearAlgebra", "Test"] @@ -39,10 +45,10 @@ uuid = "179af706-886a-5703-950a-314cd64e0468" version = "0.1.1" [[CUDA]] -deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CompilerSupportLibraries_jll", "DataStructures", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "Libdl", "LinearAlgebra", "Logging", "MacroTools", "NNlib", "Pkg", "Printf", "Random", "Reexport", "Requires", "SparseArrays", "Statistics", "TimerOutputs"] -git-tree-sha1 = "6ccc73b2d8b671f7a65c92b5f08f81422ebb7547" +deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CompilerSupportLibraries_jll", "DataStructures", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "LazyArtifacts", "Libdl", "LinearAlgebra", "Logging", "MacroTools", "Memoize", "NNlib", "Printf", "Random", "Reexport", "Requires", "SparseArrays", "Statistics", "TimerOutputs"] +git-tree-sha1 = "2d90e6c29706856928f02e11ae15e71889905e34" uuid = "052768ef-5323-5732-b1bb-66c8b64840ba" -version = "2.4.1" +version = "2.6.1" [[Cassette]] git-tree-sha1 = "9cc225870ec32ce7b9c773d4dcdaef32f622cf89" @@ -68,10 +74,8 @@ uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" version = "3.25.0" [[CompilerSupportLibraries_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "8e695f735fca77e9708e795eda62afdb869cbb70" +deps = ["Artifacts", "Libdl"] uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" -version = "0.3.4+0" [[Crayons]] git-tree-sha1 = "3f71217b538d7aaee0b69ab47d9b7724ca8afa0d" @@ -106,6 +110,16 @@ uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" deps = ["Random", "Serialization", "Sockets"] uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" +[[DocStringExtensions]] +deps = ["LibGit2", "Markdown", "Pkg", "Test"] +git-tree-sha1 = "50ddf44c53698f5e784bbebb3f4b21c5807401b1" +uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +version = "0.8.3" + +[[Downloads]] +deps = ["ArgTools", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" + [[ExprTools]] git-tree-sha1 = "10407a39b87f29d47ebaca8edbc75d7c302ff93e" uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" @@ -113,9 +127,9 @@ version = "0.1.3" [[FFTW]] deps = ["AbstractFFTs", "FFTW_jll", "IntelOpenMP_jll", "Libdl", "LinearAlgebra", "MKL_jll", "Reexport"] -git-tree-sha1 = "8fda0934cb99db617171f7296dc361f4d6fa5424" +git-tree-sha1 = "1b48dbde42f307e48685fa9213d8b9f8c0d87594" uuid = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" -version = "1.3.0" +version = "1.3.2" [[FFTW_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] @@ -130,10 +144,10 @@ uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" version = "6.2.0" [[GPUCompiler]] -deps = ["DataStructures", "InteractiveUtils", "LLVM", "Libdl", "Scratch", "Serialization", "TimerOutputs", "UUIDs"] -git-tree-sha1 = "c853c810b52a80f9aad79ab109207889e57f41ef" +deps = ["DataStructures", "ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "Scratch", "Serialization", "TimerOutputs", "UUIDs"] +git-tree-sha1 = "ef2839b063e158672583b9c09d2cf4876a8d3d55" uuid = "61eb1bfa-7361-4325-ad38-22787b887f55" -version = "0.8.3" +version = "0.10.0" [[Glob]] git-tree-sha1 = "4df9f7e06108728ebf00a0a11edee4b29a482bb2" @@ -146,6 +160,11 @@ git-tree-sha1 = "fd83fa0bde42e01952757f01149dd968c06c4dba" uuid = "0234f1f7-429e-5d53-9886-15a909be8d59" version = "1.12.0+1" +[[IfElse]] +git-tree-sha1 = "28e837ff3e7a6c3cdb252ce49fb412c8eb3caeef" +uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" +version = "0.1.0" + [[IntelOpenMP_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] git-tree-sha1 = "d979e54b71da82f3a65b62553da4fc3d18c9004c" @@ -163,15 +182,21 @@ version = "1.0.0" [[JLD2]] deps = ["CodecZlib", "DataStructures", "MacroTools", "Mmap", "Pkg", "Printf", "Requires", "UUIDs"] -git-tree-sha1 = "bb9a457481adf060ab5898823a49d4f854ff4ddd" +git-tree-sha1 = "b8343a7f96591404ade118b3a7014e1a52062465" uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819" -version = "0.4.0" +version = "0.4.2" [[JLLWrappers]] git-tree-sha1 = "a431f5f2ca3f4feef3bd7a5e94b8b8d4f2f647a0" uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" version = "1.2.0" +[[JSON3]] +deps = ["Dates", "Mmap", "Parsers", "StructTypes", "UUIDs"] +git-tree-sha1 = "62d4063c67d7c84d5788107878bb925ceaadd252" +uuid = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" +version = "1.7.1" + [[KernelAbstractions]] deps = ["Adapt", "CUDA", "Cassette", "InteractiveUtils", "MacroTools", "SpecialFunctions", "StaticArrays", "UUIDs"] git-tree-sha1 = "ee7f03c23d874c8353813a44315daf82a1e82046" @@ -184,21 +209,25 @@ git-tree-sha1 = "b616937c31337576360cb9fb872ec7633af7b194" uuid = "929cbde3-209d-540e-8aea-75f648917ca0" version = "3.6.0" +[[LazyArtifacts]] +deps = ["Artifacts", "Pkg"] +uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" + +[[LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" + [[LibCURL_jll]] -deps = ["LibSSH2_jll", "Libdl", "MbedTLS_jll", "Pkg", "Zlib_jll", "nghttp2_jll"] -git-tree-sha1 = "897d962c20031e6012bba7b3dcb7a667170dad17" +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" -version = "7.70.0+2" [[LibGit2]] -deps = ["Printf"] +deps = ["Base64", "NetworkOptions", "Printf", "SHA"] uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" [[LibSSH2_jll]] -deps = ["Libdl", "MbedTLS_jll", "Pkg"] -git-tree-sha1 = "717705533148132e5466f2924b9a3657b16158e8" +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" -version = "1.9.0+3" [[Libdl]] uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" @@ -211,10 +240,22 @@ uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" [[MKL_jll]] -deps = ["IntelOpenMP_jll", "Libdl", "Pkg"] -git-tree-sha1 = "eb540ede3aabb8284cb482aa41d00d6ca850b1f8" +deps = ["Artifacts", "IntelOpenMP_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg"] +git-tree-sha1 = "c253236b0ed414624b083e6b72bfe891fbd2c7af" uuid = "856f044c-d86e-5d09-b602-aeab76dc8ba7" -version = "2020.2.254+0" +version = "2021.1.1+1" + +[[MPI]] +deps = ["Distributed", "DocStringExtensions", "Libdl", "MPICH_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "Pkg", "Random", "Requires", "Serialization", "Sockets"] +git-tree-sha1 = "d3aae0fd4d9e1a09c3e2fc728fbe2522ec6d54bc" +uuid = "da04e1cc-30fd-572f-bb4f-1f8673147195" +version = "0.16.1" + +[[MPICH_jll]] +deps = ["CompilerSupportLibraries_jll", "Libdl", "Pkg"] +git-tree-sha1 = "4d37f1e07b4e2a74462eebf9ee48c626d15ffdac" +uuid = "7cb0a576-ebde-5e09-9194-50597f1243b4" +version = "3.3.2+10" [[MacroTools]] deps = ["Markdown", "Random"] @@ -227,14 +268,27 @@ deps = ["Base64"] uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" [[MbedTLS_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "0eef589dd1c26a3ac9d753fe1a8bcad63f956fa6" +deps = ["Artifacts", "Libdl"] uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" -version = "2.16.8+1" + +[[Memoize]] +deps = ["MacroTools"] +git-tree-sha1 = "2b1dfcba103de714d31c033b5dacc2e4a12c7caa" +uuid = "c03570c3-d221-55d1-a50c-7939bbd78826" +version = "0.4.4" + +[[MicrosoftMPI_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "e5c90234b3967684c9c6f87b4a54549b4ce21836" +uuid = "9237b28f-5490-5468-be7b-bb81f5f5e6cf" +version = "10.1.3+0" [[Mmap]] uuid = "a63ad114-7e13-5084-954f-fe012c677804" +[[MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" + [[NCDatasets]] deps = ["CFTime", "DataStructures", "Dates", "NetCDF_jll", "Printf"] git-tree-sha1 = "b71d83c87d80f5c54c55a7a9a3aa42bf931c72aa" @@ -249,15 +303,24 @@ version = "0.7.14" [[NetCDF_jll]] deps = ["Artifacts", "HDF5_jll", "JLLWrappers", "LibCURL_jll", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Pkg", "Zlib_jll", "nghttp2_jll"] -git-tree-sha1 = "d5835f95aea3b93965a1a7c06de9aace8cb82d99" +git-tree-sha1 = "0cf4d1bf2ef45156aed85c9ac5f8c7e697d9288c" uuid = "7243133f-43d8-5620-bbf4-c2c921802cf3" -version = "400.701.400+0" +version = "400.702.400+0" + +[[NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" [[OffsetArrays]] deps = ["Adapt"] -git-tree-sha1 = "76622f08645764e040b4d7e86d0ff471fd126ae4" +git-tree-sha1 = "b3dfef5f2be7d7eb0e782ba9146a5271ee426e90" uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" -version = "1.5.3" +version = "1.6.2" + +[[OpenMPI_jll]] +deps = ["Libdl", "Pkg"] +git-tree-sha1 = "41b983e26a7ab8c9bf05f7d70c274b817d541b46" +uuid = "fe0851c0-eecd-5654-98d4-656369965a5c" +version = "4.0.2+2" [[OpenSSL_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] @@ -272,12 +335,30 @@ uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" version = "0.5.3+4" [[OrderedCollections]] -git-tree-sha1 = "d45739abcfc03b51f6a42712894a593f74c80a23" +git-tree-sha1 = "4fa2ba51070ec13fcc7517db714445b4ab986bdf" uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" -version = "1.3.3" +version = "1.4.0" + +[[Parsers]] +deps = ["Dates"] +git-tree-sha1 = "223a825cccef2228f3fdbf2ecc7ca93363059073" +uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" +version = "1.0.16" + +[[PencilArrays]] +deps = ["ArrayInterface", "JSON3", "Libdl", "LinearAlgebra", "MPI", "OffsetArrays", "Reexport", "Requires", "StaticArrays", "StaticPermutations", "TimerOutputs"] +git-tree-sha1 = "6921d07316f41e2be5befd8b815eee28d3fab9f8" +uuid = "0e08944d-e94e-41b1-9406-dcf66b6a9d2e" +version = "0.9.0" + +[[PencilFFTs]] +deps = ["AbstractFFTs", "FFTW", "LinearAlgebra", "MPI", "PencilArrays", "Reexport", "TimerOutputs"] +git-tree-sha1 = "a7665838a566accd7d9cf308bbb497126dc5edf4" +uuid = "4a48f351-57a6-4416-9ec4-c37015456aae" +version = "0.12.1" [[Pkg]] -deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] +deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs"] uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" [[Printf]] @@ -285,7 +366,7 @@ deps = ["Unicode"] uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" [[REPL]] -deps = ["InteractiveUtils", "Markdown", "Sockets"] +deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" [[Random]] @@ -340,9 +421,15 @@ uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [[SpecialFunctions]] deps = ["ChainRulesCore", "OpenSpecFun_jll"] -git-tree-sha1 = "75394dbe2bd346beeed750fb02baa6445487b862" +git-tree-sha1 = "5919936c0e92cff40e57d0ddf0ceb667d42e5902" uuid = "276daf66-3868-5448-9aa4-cd146d93841b" -version = "1.2.1" +version = "1.3.0" + +[[Static]] +deps = ["IfElse"] +git-tree-sha1 = "98ace568bf638e89eac33c99337f3c8c6e2227b8" +uuid = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" +version = "0.2.0" [[StaticArrays]] deps = ["LinearAlgebra", "Random", "Statistics"] @@ -350,6 +437,11 @@ git-tree-sha1 = "9da72ed50e94dbff92036da395275ed114e04d49" uuid = "90137ffa-7385-5640-81b9-e52037218182" version = "1.0.1" +[[StaticPermutations]] +git-tree-sha1 = "193c3daa18ff3e55c1dae66acb6a762c4a3bdb0b" +uuid = "15972242-4b8f-49a0-b8a1-9ac0e7a1a45d" +version = "0.3.0" + [[Statistics]] deps = ["LinearAlgebra", "SparseArrays"] uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" @@ -360,6 +452,16 @@ git-tree-sha1 = "26ea43b4be7e919a2390c3c0f824e7eb4fc19a0a" uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" version = "0.5.0" +[[StructTypes]] +deps = ["Dates", "UUIDs"] +git-tree-sha1 = "d7f4287dbc1e590265f50ceda1b40ed2bb31bbbb" +uuid = "856f2bd8-1eba-4b0a-8007-ebc267875bd4" +version = "1.4.0" + +[[TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" + [[TableTraits]] deps = ["IteratorInterfaceExtensions"] git-tree-sha1 = "b1ad568ba658d8cbb3b892ed5380a6f3e781a81e" @@ -372,15 +474,19 @@ git-tree-sha1 = "a716dde43d57fa537a19058d044b495301ba6565" uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" version = "1.3.2" +[[Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" + [[Test]] -deps = ["Distributed", "InteractiveUtils", "Logging", "Random"] +deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [[TimerOutputs]] deps = ["Printf"] -git-tree-sha1 = "3318281dd4121ecf9713ce1383b9ace7d7476fdd" +git-tree-sha1 = "32cdbe6cd2d214c25a0b88f985c9e0092877c236" uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" -version = "0.5.7" +version = "0.5.8" [[TranscodingStreams]] deps = ["Random", "Test"] @@ -396,13 +502,9 @@ uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" [[Zlib_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "320228915c8debb12cb434c59057290f0834dbf6" +deps = ["Libdl"] uuid = "83775a58-1f1d-513f-b197-d71354ab007a" -version = "1.2.11+18" [[nghttp2_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "8e2c44ab4d49ad9518f359ed8b62f83ba8beede4" +deps = ["Artifacts", "Libdl"] uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" -version = "1.40.0+2" diff --git a/Project.toml b/Project.toml index d67e8d9b83..400c8bd74f 100644 --- a/Project.toml +++ b/Project.toml @@ -14,9 +14,11 @@ JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" +MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +PencilFFTs = "4a48f351-57a6-4416-9ec4-c37015456aae" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" From efe89da39e224c51e99545bfb0c0f9d6bffd02ba Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Thu, 4 Mar 2021 13:17:32 -0500 Subject: [PATCH 072/100] New `Distributed` sub-module --- src/Distributed/Distributed.jl | 16 ++++++++++++++++ .../distributed_fft_based_poisson_solver.jl | 17 ++++------------- src/Distributed/distributed_model.jl | 7 ------- src/Distributed/halo_communication_bcs.jl | 5 +++-- ..._architectures.jl => multi_architectures.jl} | 0 src/Oceananigans.jl | 5 +++++ 6 files changed, 28 insertions(+), 22 deletions(-) create mode 100644 src/Distributed/Distributed.jl rename src/Distributed/{distributed_architectures.jl => multi_architectures.jl} (100%) diff --git a/src/Distributed/Distributed.jl b/src/Distributed/Distributed.jl new file mode 100644 index 0000000000..42d77c19c8 --- /dev/null +++ b/src/Distributed/Distributed.jl @@ -0,0 +1,16 @@ +module Distributed + +export + MultiCPU, + HaloCommunication, HaloCommunicationBC, + DistributedFFTBasedPoissonSolver, + DistributedModel + +include("distributed_utils.jl") +include("multi_architectures.jl") +include("halo_communication_bcs.jl") +include("halo_communication.jl") +include("distributed_fft_based_poisson_solver.jl") +include("distributed_model.jl") + +end # module diff --git a/src/Distributed/distributed_fft_based_poisson_solver.jl b/src/Distributed/distributed_fft_based_poisson_solver.jl index b6d8399cf0..5595e1b35c 100644 --- a/src/Distributed/distributed_fft_based_poisson_solver.jl +++ b/src/Distributed/distributed_fft_based_poisson_solver.jl @@ -1,6 +1,6 @@ -using PencilFFTs +import PencilFFTs -import Oceananigans.Solvers: solve_poisson_equation! +import Oceananigans.Solvers: poisson_eigenvalues, solve_poisson_equation! struct DistributedFFTBasedPoissonSolver{P, F, L, λ, S} plan :: P @@ -10,15 +10,6 @@ struct DistributedFFTBasedPoissonSolver{P, F, L, λ, S} storage :: S end -reshaped_size(N, dim) = dim == 1 ? (N, 1, 1) : - dim == 2 ? (1, N, 1) : - dim == 3 ? (1, 1, N) : nothing - -function poisson_eigenvalues(N, L, dim, ::Periodic) - inds = reshape(1:N, reshaped_size(N, dim)...) - return @. (2sin((inds - 1) * π / N) / (L / N))^2 -end - function DistributedFFTBasedPoissonSolver(arch, full_grid, local_grid) topo = (TX, TY, TZ) = topology(full_grid) @@ -33,8 +24,8 @@ function DistributedFFTBasedPoissonSolver(arch, full_grid, local_grid) transform = PencilFFTs.Transforms.FFT!() proc_dims = (arch.ranks[2], arch.ranks[3]) - plan = PencilFFTPlan(size(full_grid), transform, proc_dims, MPI.COMM_WORLD) - storage = allocate_input(plan) + plan = PencilFFTs.PencilFFTPlan(size(full_grid), transform, proc_dims, MPI.COMM_WORLD) + storage = PencilFFTs.allocate_input(plan) return DistributedFFTBasedPoissonSolver(plan, full_grid, local_grid, eigenvalues, storage) end diff --git a/src/Distributed/distributed_model.jl b/src/Distributed/distributed_model.jl index f712d3c00b..6feebaf4fe 100644 --- a/src/Distributed/distributed_model.jl +++ b/src/Distributed/distributed_model.jl @@ -2,15 +2,8 @@ using MPI using Oceananigans using Oceananigans.Grids - using Oceananigans.Grids: halo_size -include("distributed_utils.jl") -include("distributed_architectures.jl") -include("halo_communication_bcs.jl") -include("halo_communication.jl") -include("distributed_fft_based_poisson_solver.jl") - ##### ##### Distributed model struct and constructor ##### diff --git a/src/Distributed/halo_communication_bcs.jl b/src/Distributed/halo_communication_bcs.jl index aa21385545..ca72f7c485 100644 --- a/src/Distributed/halo_communication_bcs.jl +++ b/src/Distributed/halo_communication_bcs.jl @@ -1,10 +1,11 @@ +using Oceananigans.BoundaryConditions using Oceananigans.BoundaryConditions: BCType import Oceananigans.BoundaryConditions: bctype_str, print_condition struct HaloCommunication <: BCType end -HaloCommunicationBC = BoundaryCondition{<:HaloCommunication} +const HaloCommunicationBC = BoundaryCondition{<:HaloCommunication} bctype_str(::HaloCommunicationBC) ="HaloCommunication" @@ -51,4 +52,4 @@ function inject_halo_communication_boundary_conditions(field_bcs, my_rank, conne isnothing(rank_top) ? field_bcs.top : top_comm_bc) return FieldBoundaryConditions(x_bcs, y_bcs, z_bcs) -end \ No newline at end of file +end diff --git a/src/Distributed/distributed_architectures.jl b/src/Distributed/multi_architectures.jl similarity index 100% rename from src/Distributed/distributed_architectures.jl rename to src/Distributed/multi_architectures.jl diff --git a/src/Oceananigans.jl b/src/Oceananigans.jl index 4ca575c376..554409df70 100644 --- a/src/Oceananigans.jl +++ b/src/Oceananigans.jl @@ -75,6 +75,9 @@ export # Abstract operations ∂x, ∂y, ∂z, @at, + # Distributed + MultiCPU, + # Utils prettytime @@ -165,6 +168,7 @@ include("Diagnostics/Diagnostics.jl") include("OutputWriters/OutputWriters.jl") include("Simulations/Simulations.jl") include("AbstractOperations/AbstractOperations.jl") +include("Distributed/Distributed.jl") ##### ##### Needed so we can export names from sub-modules at the top-level @@ -190,6 +194,7 @@ using .Diagnostics using .OutputWriters using .Simulations using .AbstractOperations +using .Distributed function __init__() threads = Threads.nthreads() From e75209aff981017db11de3b8702a9fe6174ca974 Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Thu, 4 Mar 2021 13:48:56 -0500 Subject: [PATCH 073/100] Move MPI tests into `test` directory --- src/Distributed/Distributed.jl | 3 +- src/Distributed/distributed_model.jl | 2 +- src/Distributed/mpi_turbulence.jl | 2 +- src/Distributed/multi_architectures.jl | 4 +-- src/Distributed/test_distributed_mpi.jl | 32 +++++++++---------- test/runtests.jl | 22 ++++++++----- .../test_distributed_poisson_solvers.jl | 21 ++---------- 7 files changed, 38 insertions(+), 48 deletions(-) rename {src/Distributed => test}/test_distributed_poisson_solvers.jl (81%) diff --git a/src/Distributed/Distributed.jl b/src/Distributed/Distributed.jl index 42d77c19c8..a6b74188e9 100644 --- a/src/Distributed/Distributed.jl +++ b/src/Distributed/Distributed.jl @@ -1,8 +1,9 @@ module Distributed export - MultiCPU, + MultiCPU, child_architecture, HaloCommunication, HaloCommunicationBC, + inject_halo_communication_boundary_conditions, DistributedFFTBasedPoissonSolver, DistributedModel diff --git a/src/Distributed/distributed_model.jl b/src/Distributed/distributed_model.jl index 6feebaf4fe..e5d263275d 100644 --- a/src/Distributed/distributed_model.jl +++ b/src/Distributed/distributed_model.jl @@ -43,7 +43,7 @@ function DistributedModel(; architecture, grid, boundary_conditions=nothing, mod z₁, z₂ = zL + (k-1)*lz, zL + k*lz # FIXME: local grid might have different topology! - my_grid = RegularCartesianGrid(topology=topology(grid), size=(nx, ny, nz), x=(x₁, x₂), y=(y₁, y₂), z=(z₁, z₂), halo=halo_size(grid)) + my_grid = RegularRectilinearGrid(topology=topology(grid), size=(nx, ny, nz), x=(x₁, x₂), y=(y₁, y₂), z=(z₁, z₂), halo=halo_size(grid)) ## Change appropriate boundary conditions to halo communication BCs diff --git a/src/Distributed/mpi_turbulence.jl b/src/Distributed/mpi_turbulence.jl index 296a4361ac..aaebb6dc5d 100644 --- a/src/Distributed/mpi_turbulence.jl +++ b/src/Distributed/mpi_turbulence.jl @@ -42,7 +42,7 @@ function solve_for_pressure!(pressure, solver::DistributedFFTBasedPoissonSolver, end topo = (Periodic, Periodic, Periodic) -full_grid = RegularCartesianGrid(topology=topo, size=(512, 512, 1), extent=(4π, 4π, 1), halo=(3, 3, 3)) +full_grid = RegularRectilinearGrid(topology=topo, size=(512, 512, 1), extent=(4π, 4π, 1), halo=(3, 3, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) dm = DistributedModel( diff --git a/src/Distributed/multi_architectures.jl b/src/Distributed/multi_architectures.jl index 0b6cc114e6..6a92003bb3 100644 --- a/src/Distributed/multi_architectures.jl +++ b/src/Distributed/multi_architectures.jl @@ -117,9 +117,7 @@ function MultiCPU(; grid, ranks) if total_ranks != mpi_ranks throw(ArgumentError("ranks=($Rx, $Ry, $Rz) [$total_ranks total] inconsistent " * - "with number of MPI ranks: $mpi_ranks. Exiting with return code 1.")) - MPI.Finalize() - exit(code=1) + "with number of MPI ranks: $mpi_ranks.")) end comm = MPI.COMM_WORLD diff --git a/src/Distributed/test_distributed_mpi.jl b/src/Distributed/test_distributed_mpi.jl index 4320f68390..4780ff253a 100644 --- a/src/Distributed/test_distributed_mpi.jl +++ b/src/Distributed/test_distributed_mpi.jl @@ -19,7 +19,7 @@ mpi_ranks = MPI.Comm_size(comm) function run_triply_periodic_rank_connectivity_tests_with_411_ranks() topo = (Periodic, Periodic, Periodic) - full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) @@ -56,7 +56,7 @@ end function run_triply_periodic_rank_connectivity_tests_with_141_ranks() topo = (Periodic, Periodic, Periodic) - full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) @@ -99,7 +99,7 @@ end function run_triply_periodic_rank_connectivity_tests_with_114_ranks() topo = (Periodic, Periodic, Periodic) - full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) @@ -145,7 +145,7 @@ end function run_triply_periodic_rank_connectivity_tests_with_221_ranks() topo = (Periodic, Periodic, Periodic) - full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) @@ -194,7 +194,7 @@ end function run_triply_periodic_local_grid_tests_with_411_ranks() topo = (Periodic, Periodic, Periodic) - full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) @@ -214,7 +214,7 @@ end function run_triply_periodic_local_grid_tests_with_141_ranks() topo = (Periodic, Periodic, Periodic) - full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) @@ -234,7 +234,7 @@ end function run_triply_periodic_local_grid_tests_with_114_ranks() topo = (Periodic, Periodic, Periodic) - full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) @@ -254,7 +254,7 @@ end function run_triply_periodic_local_grid_tests_with_221_ranks() topo = (Periodic, Periodic, Periodic) - full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) @@ -278,7 +278,7 @@ end function run_triply_periodic_bc_injection_tests_with_411_ranks() topo = (Periodic, Periodic, Periodic) - full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) @@ -295,7 +295,7 @@ end function run_triply_periodic_bc_injection_tests_with_141_ranks() topo = (Periodic, Periodic, Periodic) - full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) @@ -312,7 +312,7 @@ end function run_triply_periodic_bc_injection_tests_with_114_ranks() topo = (Periodic, Periodic, Periodic) - full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) @@ -329,7 +329,7 @@ end function run_triply_periodic_bc_injection_tests_with_221_ranks() topo = (Periodic, Periodic, Periodic) - full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) @@ -350,7 +350,7 @@ end function run_triply_periodic_halo_communication_tests_with_411_ranks() topo = (Periodic, Periodic, Periodic) - full_grid = RegularCartesianGrid(topology=topo, size=(8, 6, 4), extent=(1, 2, 3)) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 6, 4), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) @@ -373,7 +373,7 @@ end function run_triply_periodic_halo_communication_tests_with_141_ranks() topo = (Periodic, Periodic, Periodic) - full_grid = RegularCartesianGrid(topology=topo, size=(3, 8, 2), extent=(1, 2, 3)) + full_grid = RegularRectilinearGrid(topology=topo, size=(3, 8, 2), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) @@ -396,7 +396,7 @@ end function run_triply_periodic_halo_communication_tests_with_114_ranks() topo = (Periodic, Periodic, Periodic) - full_grid = RegularCartesianGrid(topology=topo, size=(3, 5, 8), extent=(1, 2, 3)) + full_grid = RegularRectilinearGrid(topology=topo, size=(3, 5, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) @@ -419,7 +419,7 @@ end function run_triply_periodic_halo_communication_tests_with_221_ranks() topo = (Periodic, Periodic, Periodic) - full_grid = RegularCartesianGrid(topology=topo, size=(8, 8, 3), extent=(1, 2, 3)) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 3), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) diff --git a/test/runtests.jl b/test/runtests.jl index 5bd1027752..ea294bfabc 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,6 +6,7 @@ using LinearAlgebra using Logging using CUDA +using MPI using JLD2 using FFTW using OffsetArrays @@ -118,6 +119,14 @@ group = get(ENV, "TEST_GROUP", :all) |> Symbol end end + if group == :shallow_water || group == :all + include("test_shallow_water_models.jl") + end + + if group == :hydrostatic_free_surface || group == :all + include("test_hydrostatic_free_surface_models.jl") + end + if group == :simulation || group == :all @testset "Simulation tests" begin include("test_simulations.jl") @@ -128,6 +137,11 @@ group = get(ENV, "TEST_GROUP", :all) |> Symbol end end + if group == :distributed || group == :all + MPI.Initialized() || MPI.Init() + include("test_distributed_poisson_solvers.jl") + end + if group == :regression || group == :all include("test_regression.jl") end @@ -141,12 +155,4 @@ group = get(ENV, "TEST_GROUP", :all) |> Symbol if group == :convergence include("test_convergence.jl") end - - if group == :shallow_water || group == :all - include("test_shallow_water_models.jl") - end - - if group == :hydrostatic_free_surface || group == :all - include("test_hydrostatic_free_surface_models.jl") - end end diff --git a/src/Distributed/test_distributed_poisson_solvers.jl b/test/test_distributed_poisson_solvers.jl similarity index 81% rename from src/Distributed/test_distributed_poisson_solvers.jl rename to test/test_distributed_poisson_solvers.jl index 0feae84a48..29621bda19 100644 --- a/src/Distributed/test_distributed_poisson_solvers.jl +++ b/test/test_distributed_poisson_solvers.jl @@ -1,21 +1,6 @@ -using Test -using Oceananigans -using Oceananigans.Architectures -using Oceananigans.Solvers -using Oceananigans.Utils -using Oceananigans.Operators -using Oceananigans.BoundaryConditions: fill_halo_regions! -using KernelAbstractions: @kernel, @index, Event - -@kernel function ∇²!(grid, f, ∇²f) - i, j, k = @index(Global, NTuple) - @inbounds ∇²f[i, j, k] = ∇²(i, j, k, grid, f) -end -@kernel function divergence!(grid, u, v, w, div) - i, j, k = @index(Global, NTuple) - @inbounds div[i, j, k] = divᶜᶜᶜ(i, j, k, grid, u, v, w) -end +using Oceananigans +using Oceananigans.Distributed function random_divergent_source_term(FT, arch, grid) # Generate right hand side from a random (divergent) velocity field. @@ -55,7 +40,7 @@ end function divergence_free_poisson_solution_triply_periodic(grid_points, ranks) topo = (Periodic, Periodic, Periodic) - full_grid = RegularCartesianGrid(topology=topo, size=grid_points, extent=(1, 2, 3)) + full_grid = RegularRectilinearGrid(topology=topo, size=grid_points, extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=ranks) dm = DistributedModel(architecture=arch, grid=full_grid) From 0356e0dae98a0d64e66c1c5d45bc7970389cf3f5 Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Thu, 4 Mar 2021 14:25:27 -0500 Subject: [PATCH 074/100] MPI tests passing locally --- src/Distributed/multi_architectures.jl | 2 -- test/runtests.jl | 2 ++ .../test_distributed_models.jl | 14 ++------------ test/test_distributed_poisson_solvers.jl | 3 --- 4 files changed, 4 insertions(+), 17 deletions(-) rename src/Distributed/test_distributed_mpi.jl => test/test_distributed_models.jl (98%) diff --git a/src/Distributed/multi_architectures.jl b/src/Distributed/multi_architectures.jl index 6a92003bb3..4e71da67da 100644 --- a/src/Distributed/multi_architectures.jl +++ b/src/Distributed/multi_architectures.jl @@ -120,8 +120,6 @@ function MultiCPU(; grid, ranks) "with number of MPI ranks: $mpi_ranks.")) end - comm = MPI.COMM_WORLD - my_connectivity = RankConnectivity(my_index, ranks, topology(grid)) return MultiCPU(my_rank, my_index, ranks, my_connectivity) diff --git a/test/runtests.jl b/test/runtests.jl index ea294bfabc..14917ca304 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -29,6 +29,7 @@ using Oceananigans.Diagnostics using Oceananigans.OutputWriters using Oceananigans.TurbulenceClosures using Oceananigans.AbstractOperations +using Oceananigans.Distributed using Oceananigans.Logger using Oceananigans.Units using Oceananigans.Utils @@ -139,6 +140,7 @@ group = get(ENV, "TEST_GROUP", :all) |> Symbol if group == :distributed || group == :all MPI.Initialized() || MPI.Init() + include("test_distributed_models.jl") include("test_distributed_poisson_solvers.jl") end diff --git a/src/Distributed/test_distributed_mpi.jl b/test/test_distributed_models.jl similarity index 98% rename from src/Distributed/test_distributed_mpi.jl rename to test/test_distributed_models.jl index 4780ff253a..6f85ed2d3e 100644 --- a/src/Distributed/test_distributed_mpi.jl +++ b/test/test_distributed_models.jl @@ -1,15 +1,10 @@ -using Test using MPI -using Oceananigans using Oceananigans.BoundaryConditions: fill_halo_regions! - -MPI.Initialized() || MPI.Init() -comm = MPI.COMM_WORLD - -include("distributed_model.jl") +using Oceananigans.Distributed: index2rank, east_halo, west_halo, north_halo, south_halo, top_halo, bottom_halo # Right now just testing with 4 ranks! +comm = MPI.COMM_WORLD mpi_ranks = MPI.Comm_size(comm) @assert mpi_ranks == 4 @@ -480,9 +475,4 @@ end run_triply_periodic_halo_communication_tests_with_114_ranks() # run_triply_periodic_halo_communication_tests_with_221_ranks() end - - include("test_distributed_poisson_solvers.jl") end - -# MPI.Finalize() -# @test MPI.Finalized() diff --git a/test/test_distributed_poisson_solvers.jl b/test/test_distributed_poisson_solvers.jl index 29621bda19..305b144ce4 100644 --- a/test/test_distributed_poisson_solvers.jl +++ b/test/test_distributed_poisson_solvers.jl @@ -1,7 +1,4 @@ -using Oceananigans -using Oceananigans.Distributed - function random_divergent_source_term(FT, arch, grid) # Generate right hand side from a random (divergent) velocity field. Ru = CenterField(FT, arch, grid, UVelocityBoundaryConditions(grid)) From bc118999d890751a29b80fff0384d5976424bcf2 Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Thu, 4 Mar 2021 14:28:41 -0500 Subject: [PATCH 075/100] Need a distributed `solve_for_pressure` --- .../Distributed => sandbox}/mpi_turbulence.jl | 29 ------------------- src/Distributed/Distributed.jl | 1 + .../distributed_solve_for_pressure.jl | 24 +++++++++++++++ 3 files changed, 25 insertions(+), 29 deletions(-) rename {src/Distributed => sandbox}/mpi_turbulence.jl (76%) create mode 100644 src/Distributed/distributed_solve_for_pressure.jl diff --git a/src/Distributed/mpi_turbulence.jl b/sandbox/mpi_turbulence.jl similarity index 76% rename from src/Distributed/mpi_turbulence.jl rename to sandbox/mpi_turbulence.jl index aaebb6dc5d..1d92fa9f4f 100644 --- a/src/Distributed/mpi_turbulence.jl +++ b/sandbox/mpi_turbulence.jl @@ -1,5 +1,3 @@ -include("distributed_model.jl") - using MPI MPI.Initialized() || MPI.Init() @@ -14,33 +12,6 @@ using Oceananigans.Utils using Oceananigans.Solvers: calculate_pressure_right_hand_side!, copy_pressure! -import Oceananigans.Solvers: solve_for_pressure! - -child_architecture(::CPU) = CPU() - -function solve_for_pressure!(pressure, solver::DistributedFFTBasedPoissonSolver, arch, grid, Δt, U★) - - RHS = first(solver.storage) - - rhs_event = launch!(arch, grid, :xyz, - calculate_pressure_right_hand_side!, RHS, arch, grid, Δt, U★, - dependencies = Event(device(arch))) - - wait(device(arch), rhs_event) - - solve_poisson_equation!(solver) - - ϕ = first(solver.storage) - - copy_event = launch!(arch, grid, :xyz, - copy_pressure!, pressure, ϕ, arch, grid, - dependencies = Event(device(arch))) - - wait(device(arch), copy_event) - - return nothing -end - topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(512, 512, 1), extent=(4π, 4π, 1), halo=(3, 3, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) diff --git a/src/Distributed/Distributed.jl b/src/Distributed/Distributed.jl index a6b74188e9..73b9704054 100644 --- a/src/Distributed/Distributed.jl +++ b/src/Distributed/Distributed.jl @@ -12,6 +12,7 @@ include("multi_architectures.jl") include("halo_communication_bcs.jl") include("halo_communication.jl") include("distributed_fft_based_poisson_solver.jl") +include("distributed_solve_for_pressure.jl") include("distributed_model.jl") end # module diff --git a/src/Distributed/distributed_solve_for_pressure.jl b/src/Distributed/distributed_solve_for_pressure.jl new file mode 100644 index 0000000000..47f56cef49 --- /dev/null +++ b/src/Distributed/distributed_solve_for_pressure.jl @@ -0,0 +1,24 @@ +import Oceananigans.Solvers: solve_for_pressure! + +function solve_for_pressure!(pressure, solver::DistributedFFTBasedPoissonSolver, arch, grid, Δt, U★) + + RHS = first(solver.storage) + + rhs_event = launch!(arch, grid, :xyz, + calculate_pressure_right_hand_side!, RHS, arch, grid, Δt, U★, + dependencies = Event(device(arch))) + + wait(device(arch), rhs_event) + + solve_poisson_equation!(solver) + + ϕ = first(solver.storage) + + copy_event = launch!(arch, grid, :xyz, + copy_pressure!, pressure, ϕ, arch, grid, + dependencies = Event(device(arch))) + + wait(device(arch), copy_event) + + return nothing +end From bfcf223738fc9a7e13583d811fc2ed6b4b3c70ce Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Thu, 4 Mar 2021 14:52:18 -0500 Subject: [PATCH 076/100] Test time stepping and running simulations --- test/test_distributed_models.jl | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/test/test_distributed_models.jl b/test/test_distributed_models.jl index 6f85ed2d3e..8407bf76e2 100644 --- a/test/test_distributed_models.jl +++ b/test/test_distributed_models.jl @@ -475,4 +475,21 @@ end run_triply_periodic_halo_communication_tests_with_114_ranks() # run_triply_periodic_halo_communication_tests_with_221_ranks() end + + @testset "Time stepping" begin + topo = (Periodic, Periodic, Periodic) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) + dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + model = dm.model + + time_step!(model, 1) + @test dm isa DistributedModel + @test model.clock.time == 1 + + simulation = Simulation(model, Δt=1, stop_iteration=2) + run!(simulation) + @test dm isa DistributedModel + @test model.clock.time == 2 + end end From 703f9e07accf00ab83542e335e5ae377f9396a8c Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Thu, 4 Mar 2021 15:01:04 -0500 Subject: [PATCH 077/100] `DistributedModel` -> `DistributedIncompressibleModel` --- sandbox/mpi_turbulence.jl | 2 +- src/Distributed/Distributed.jl | 4 +-- ...jl => distributed_incompressible_model.jl} | 10 +++---- test/test_distributed_models.jl | 30 +++++++++---------- test/test_distributed_poisson_solvers.jl | 2 +- 5 files changed, 24 insertions(+), 24 deletions(-) rename src/Distributed/{distributed_model.jl => distributed_incompressible_model.jl} (90%) diff --git a/sandbox/mpi_turbulence.jl b/sandbox/mpi_turbulence.jl index 1d92fa9f4f..d5b9824ad7 100644 --- a/sandbox/mpi_turbulence.jl +++ b/sandbox/mpi_turbulence.jl @@ -16,7 +16,7 @@ topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(512, 512, 1), extent=(4π, 4π, 1), halo=(3, 3, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) -dm = DistributedModel( +dm = DistributedIncompressibleModel( architecture = arch, grid = full_grid, timestepper = :RungeKutta3, diff --git a/src/Distributed/Distributed.jl b/src/Distributed/Distributed.jl index 73b9704054..a7965b3bca 100644 --- a/src/Distributed/Distributed.jl +++ b/src/Distributed/Distributed.jl @@ -5,7 +5,7 @@ export HaloCommunication, HaloCommunicationBC, inject_halo_communication_boundary_conditions, DistributedFFTBasedPoissonSolver, - DistributedModel + DistributedIncompressibleModel include("distributed_utils.jl") include("multi_architectures.jl") @@ -13,6 +13,6 @@ include("halo_communication_bcs.jl") include("halo_communication.jl") include("distributed_fft_based_poisson_solver.jl") include("distributed_solve_for_pressure.jl") -include("distributed_model.jl") +include("distributed_incompressible_model.jl") end # module diff --git a/src/Distributed/distributed_model.jl b/src/Distributed/distributed_incompressible_model.jl similarity index 90% rename from src/Distributed/distributed_model.jl rename to src/Distributed/distributed_incompressible_model.jl index e5d263275d..aaff1788ed 100644 --- a/src/Distributed/distributed_model.jl +++ b/src/Distributed/distributed_incompressible_model.jl @@ -8,13 +8,13 @@ using Oceananigans.Grids: halo_size ##### Distributed model struct and constructor ##### -struct DistributedModel{A, G, M} +struct DistributedIncompressibleModel{A, G, M} architecture :: A grid :: G model :: M end -function DistributedModel(; architecture, grid, boundary_conditions=nothing, model_kwargs...) +function DistributedIncompressibleModel(; architecture, grid, boundary_conditions=nothing, model_kwargs...) my_rank = architecture.my_rank i, j, k = architecture.my_index Rx, Ry, Rz = architecture.ranks @@ -88,10 +88,10 @@ function DistributedModel(; architecture, grid, boundary_conditions=nothing, mod model_kwargs... ) - return DistributedModel(architecture, grid, my_model) + return DistributedIncompressibleModel(architecture, grid, my_model) end -function Base.show(io::IO, dm::DistributedModel) - print(io, "DistributedModel with ") +function Base.show(io::IO, dm::DistributedIncompressibleModel) + print(io, "DistributedIncompressibleModel with ") print(io, dm.architecture) end diff --git a/test/test_distributed_models.jl b/test/test_distributed_models.jl index 8407bf76e2..318bedbf9b 100644 --- a/test/test_distributed_models.jl +++ b/test/test_distributed_models.jl @@ -191,7 +191,7 @@ function run_triply_periodic_local_grid_tests_with_411_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) - dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) local_grid = dm.model.grid @@ -211,7 +211,7 @@ function run_triply_periodic_local_grid_tests_with_141_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) - dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) local_grid = dm.model.grid @@ -231,7 +231,7 @@ function run_triply_periodic_local_grid_tests_with_114_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) - dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) local_grid = dm.model.grid @@ -251,7 +251,7 @@ function run_triply_periodic_local_grid_tests_with_221_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) - dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) i, j, k = arch.my_index local_grid = dm.model.grid @@ -275,7 +275,7 @@ function run_triply_periodic_bc_injection_tests_with_411_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) - dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) for field in fields(dm.model) fbcs = field.boundary_conditions @@ -292,7 +292,7 @@ function run_triply_periodic_bc_injection_tests_with_141_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) - dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) for field in fields(dm.model) fbcs = field.boundary_conditions @@ -309,7 +309,7 @@ function run_triply_periodic_bc_injection_tests_with_114_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) - dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) for field in fields(dm.model) fbcs = field.boundary_conditions @@ -326,7 +326,7 @@ function run_triply_periodic_bc_injection_tests_with_221_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) - dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) for field in fields(dm.model) fbcs = field.boundary_conditions @@ -347,7 +347,7 @@ function run_triply_periodic_halo_communication_tests_with_411_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 6, 4), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) - dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) for field in fields(dm.model) interior(field) .= arch.my_rank @@ -370,7 +370,7 @@ function run_triply_periodic_halo_communication_tests_with_141_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(3, 8, 2), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) - dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) for field in fields(dm.model) interior(field) .= arch.my_rank @@ -393,7 +393,7 @@ function run_triply_periodic_halo_communication_tests_with_114_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(3, 5, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) - dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) for field in fields(dm.model) interior(field) .= arch.my_rank @@ -416,7 +416,7 @@ function run_triply_periodic_halo_communication_tests_with_221_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 3), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) - dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) for field in fields(dm.model) interior(field) .= arch.my_rank @@ -480,16 +480,16 @@ end topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) - dm = DistributedModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) model = dm.model time_step!(model, 1) - @test dm isa DistributedModel + @test dm isa DistributedIncompressibleModel @test model.clock.time == 1 simulation = Simulation(model, Δt=1, stop_iteration=2) run!(simulation) - @test dm isa DistributedModel + @test dm isa DistributedIncompressibleModel @test model.clock.time == 2 end end diff --git a/test/test_distributed_poisson_solvers.jl b/test/test_distributed_poisson_solvers.jl index 305b144ce4..c730d72895 100644 --- a/test/test_distributed_poisson_solvers.jl +++ b/test/test_distributed_poisson_solvers.jl @@ -39,7 +39,7 @@ function divergence_free_poisson_solution_triply_periodic(grid_points, ranks) topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=grid_points, extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=ranks) - dm = DistributedModel(architecture=arch, grid=full_grid) + dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid) local_grid = dm.model.grid solver = DistributedFFTBasedPoissonSolver(arch, full_grid, local_grid) From c1e4c2c556dbbf91f1edd7cc341b6b78af5faa6f Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Thu, 4 Mar 2021 15:01:17 -0500 Subject: [PATCH 078/100] Add new Buildkite job for MPI --- .buildkite/pipeline.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 8ca97b7cc5..b21bedc368 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -240,6 +240,23 @@ steps: architecture: CPU depends_on: "init_cpu" +##### +##### Distributed/MPI +##### + + - label: "🐉 cpu distributed tests" + env: + JULIA_DEPOT_PATH: "$TARTARUS_HOME/.julia-$BUILDKITE_BUILD_NUMBER" + TEST_GROUP: "distributed" + CUDA_VISIBLE_DEVICES: "-1" + commands: + - "module load mpi/openmpi-x86_64" + - "mpiexec -np 4 $TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -O0 --color=yes --project -e 'using Pkg; Pkg.test()'" + agents: + queue: Oceananigans + architecture: CPU + depends_on: "init_cpu" + ##### ##### Regression ##### From 655bb44d7c048cf82c6c741686830bee9cc90236 Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Thu, 4 Mar 2021 15:08:35 -0500 Subject: [PATCH 079/100] Resolve packages for Julia 1.5 --- Manifest.toml | 86 +++++++++++++++++++++------------------------------ 1 file changed, 36 insertions(+), 50 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index 4d3e9d6205..d071cee7ef 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -12,9 +12,6 @@ git-tree-sha1 = "ffcfa2d345aaee0ef3d8346a073d5dd03c983ebe" uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" version = "3.2.0" -[[ArgTools]] -uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" - [[ArrayInterface]] deps = ["IfElse", "LinearAlgebra", "Requires", "SparseArrays", "Static"] git-tree-sha1 = "e7edcc1ac140cce87b7442ff0fa88b5f19fb71fa" @@ -22,7 +19,10 @@ uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" version = "3.1.3" [[Artifacts]] +deps = ["Pkg"] +git-tree-sha1 = "c30985d8821e0cd73870b17b0ed0ce6dc44cb744" uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" +version = "1.3.0" [[BFloat16s]] deps = ["LinearAlgebra", "Test"] @@ -45,10 +45,10 @@ uuid = "179af706-886a-5703-950a-314cd64e0468" version = "0.1.1" [[CUDA]] -deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CompilerSupportLibraries_jll", "DataStructures", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "LazyArtifacts", "Libdl", "LinearAlgebra", "Logging", "MacroTools", "Memoize", "NNlib", "Printf", "Random", "Reexport", "Requires", "SparseArrays", "Statistics", "TimerOutputs"] -git-tree-sha1 = "2d90e6c29706856928f02e11ae15e71889905e34" +deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CompilerSupportLibraries_jll", "DataStructures", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "Libdl", "LinearAlgebra", "Logging", "MacroTools", "NNlib", "Pkg", "Printf", "Random", "Reexport", "Requires", "SparseArrays", "Statistics", "TimerOutputs"] +git-tree-sha1 = "6ccc73b2d8b671f7a65c92b5f08f81422ebb7547" uuid = "052768ef-5323-5732-b1bb-66c8b64840ba" -version = "2.6.1" +version = "2.4.1" [[Cassette]] git-tree-sha1 = "9cc225870ec32ce7b9c773d4dcdaef32f622cf89" @@ -74,8 +74,10 @@ uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" version = "3.25.0" [[CompilerSupportLibraries_jll]] -deps = ["Artifacts", "Libdl"] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "8e695f735fca77e9708e795eda62afdb869cbb70" uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "0.3.4+0" [[Crayons]] git-tree-sha1 = "3f71217b538d7aaee0b69ab47d9b7724ca8afa0d" @@ -116,10 +118,6 @@ git-tree-sha1 = "50ddf44c53698f5e784bbebb3f4b21c5807401b1" uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" version = "0.8.3" -[[Downloads]] -deps = ["ArgTools", "LibCURL", "NetworkOptions"] -uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" - [[ExprTools]] git-tree-sha1 = "10407a39b87f29d47ebaca8edbc75d7c302ff93e" uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" @@ -144,10 +142,10 @@ uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" version = "6.2.0" [[GPUCompiler]] -deps = ["DataStructures", "ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "Scratch", "Serialization", "TimerOutputs", "UUIDs"] -git-tree-sha1 = "ef2839b063e158672583b9c09d2cf4876a8d3d55" +deps = ["DataStructures", "InteractiveUtils", "LLVM", "Libdl", "Scratch", "Serialization", "TimerOutputs", "UUIDs"] +git-tree-sha1 = "c853c810b52a80f9aad79ab109207889e57f41ef" uuid = "61eb1bfa-7361-4325-ad38-22787b887f55" -version = "0.10.0" +version = "0.8.3" [[Glob]] git-tree-sha1 = "4df9f7e06108728ebf00a0a11edee4b29a482bb2" @@ -210,24 +208,26 @@ uuid = "929cbde3-209d-540e-8aea-75f648917ca0" version = "3.6.0" [[LazyArtifacts]] -deps = ["Artifacts", "Pkg"] +deps = ["Pkg"] +git-tree-sha1 = "4bb5499a1fc437342ea9ab7e319ede5a457c0968" uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" - -[[LibCURL]] -deps = ["LibCURL_jll", "MozillaCACerts_jll"] -uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" +version = "1.3.0" [[LibCURL_jll]] -deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +deps = ["LibSSH2_jll", "Libdl", "MbedTLS_jll", "Pkg", "Zlib_jll", "nghttp2_jll"] +git-tree-sha1 = "897d962c20031e6012bba7b3dcb7a667170dad17" uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "7.70.0+2" [[LibGit2]] -deps = ["Base64", "NetworkOptions", "Printf", "SHA"] +deps = ["Printf"] uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" [[LibSSH2_jll]] -deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +deps = ["Libdl", "MbedTLS_jll", "Pkg"] +git-tree-sha1 = "717705533148132e5466f2924b9a3657b16158e8" uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.9.0+3" [[Libdl]] uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" @@ -268,14 +268,10 @@ deps = ["Base64"] uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" [[MbedTLS_jll]] -deps = ["Artifacts", "Libdl"] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "0eef589dd1c26a3ac9d753fe1a8bcad63f956fa6" uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" - -[[Memoize]] -deps = ["MacroTools"] -git-tree-sha1 = "2b1dfcba103de714d31c033b5dacc2e4a12c7caa" -uuid = "c03570c3-d221-55d1-a50c-7939bbd78826" -version = "0.4.4" +version = "2.16.8+1" [[MicrosoftMPI_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] @@ -286,9 +282,6 @@ version = "10.1.3+0" [[Mmap]] uuid = "a63ad114-7e13-5084-954f-fe012c677804" -[[MozillaCACerts_jll]] -uuid = "14a3606d-f60d-562e-9121-12d972cd8159" - [[NCDatasets]] deps = ["CFTime", "DataStructures", "Dates", "NetCDF_jll", "Printf"] git-tree-sha1 = "b71d83c87d80f5c54c55a7a9a3aa42bf931c72aa" @@ -303,12 +296,9 @@ version = "0.7.14" [[NetCDF_jll]] deps = ["Artifacts", "HDF5_jll", "JLLWrappers", "LibCURL_jll", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Pkg", "Zlib_jll", "nghttp2_jll"] -git-tree-sha1 = "0cf4d1bf2ef45156aed85c9ac5f8c7e697d9288c" +git-tree-sha1 = "d5835f95aea3b93965a1a7c06de9aace8cb82d99" uuid = "7243133f-43d8-5620-bbf4-c2c921802cf3" -version = "400.702.400+0" - -[[NetworkOptions]] -uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +version = "400.701.400+0" [[OffsetArrays]] deps = ["Adapt"] @@ -358,7 +348,7 @@ uuid = "4a48f351-57a6-4416-9ec4-c37015456aae" version = "0.12.1" [[Pkg]] -deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs"] +deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" [[Printf]] @@ -366,7 +356,7 @@ deps = ["Unicode"] uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" [[REPL]] -deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] +deps = ["InteractiveUtils", "Markdown", "Sockets"] uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" [[Random]] @@ -458,10 +448,6 @@ git-tree-sha1 = "d7f4287dbc1e590265f50ceda1b40ed2bb31bbbb" uuid = "856f2bd8-1eba-4b0a-8007-ebc267875bd4" version = "1.4.0" -[[TOML]] -deps = ["Dates"] -uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" - [[TableTraits]] deps = ["IteratorInterfaceExtensions"] git-tree-sha1 = "b1ad568ba658d8cbb3b892ed5380a6f3e781a81e" @@ -474,12 +460,8 @@ git-tree-sha1 = "a716dde43d57fa537a19058d044b495301ba6565" uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" version = "1.3.2" -[[Tar]] -deps = ["ArgTools", "SHA"] -uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" - [[Test]] -deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] +deps = ["Distributed", "InteractiveUtils", "Logging", "Random"] uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [[TimerOutputs]] @@ -502,9 +484,13 @@ uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" [[Zlib_jll]] -deps = ["Libdl"] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "320228915c8debb12cb434c59057290f0834dbf6" uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.11+18" [[nghttp2_jll]] -deps = ["Artifacts", "Libdl"] +deps = ["Libdl", "Pkg"] +git-tree-sha1 = "8e2c44ab4d49ad9518f359ed8b62f83ba8beede4" uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.40.0+2" From 8869b3d00bad5c9eab423df9819b0c01251aa7b3 Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Thu, 4 Mar 2021 15:52:14 -0500 Subject: [PATCH 080/100] Fix dispatch for `fill_halo_regions!` --- src/Distributed/halo_communication.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Distributed/halo_communication.jl b/src/Distributed/halo_communication.jl index eba4fde6cd..b71693ef68 100644 --- a/src/Distributed/halo_communication.jl +++ b/src/Distributed/halo_communication.jl @@ -58,7 +58,7 @@ end ##### Filling halos for halo communication boundary conditions ##### -fill_halo_regions!(field::AbstractField{LX, LY, LZ}, arch, args...) where {LX, LY, LZ} = +fill_halo_regions!(field::AbstractField{LX, LY, LZ}, arch::AbstractMultiArchitecture, args...) where {LX, LY, LZ} = fill_halo_regions!(field.data, field.boundary_conditions, arch, field.grid, (LX, LY, LZ), args...) function fill_halo_regions!(c::AbstractArray, bcs, arch, grid, c_location, args...) From 56c9727f202716544e017071f12f54db30909490 Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Thu, 4 Mar 2021 16:14:37 -0500 Subject: [PATCH 081/100] Buildkite should have access to `mpiexec` now --- .buildkite/pipeline.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index b21bedc368..edbd31a078 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -250,6 +250,7 @@ steps: TEST_GROUP: "distributed" CUDA_VISIBLE_DEVICES: "-1" commands: + - "source /etc/bashrc" # Needed to get access to the module command. - "module load mpi/openmpi-x86_64" - "mpiexec -np 4 $TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -O0 --color=yes --project -e 'using Pkg; Pkg.test()'" agents: From a89d2485661c1b0a8f24bce6aae993b2a3057be4 Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Thu, 4 Mar 2021 16:23:23 -0500 Subject: [PATCH 082/100] Use system MPI on Buildkite/Tartarus --- .buildkite/pipeline.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index edbd31a078..115f84172a 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -252,6 +252,7 @@ steps: commands: - "source /etc/bashrc" # Needed to get access to the module command. - "module load mpi/openmpi-x86_64" + - "$TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -e 'ENV["JULIA_MPI_BINARY"]="system"; using Pkg; Pkg.add("MPI"); Pkg.build("MPI"; verbose=true)'" - "mpiexec -np 4 $TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -O0 --color=yes --project -e 'using Pkg; Pkg.test()'" agents: queue: Oceananigans From 4307d82def5c8d5e39084f70c3d34ef96ad790ce Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Thu, 4 Mar 2021 16:26:36 -0500 Subject: [PATCH 083/100] Escape characters --- .buildkite/pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 115f84172a..4f4b80b0ec 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -252,7 +252,7 @@ steps: commands: - "source /etc/bashrc" # Needed to get access to the module command. - "module load mpi/openmpi-x86_64" - - "$TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -e 'ENV["JULIA_MPI_BINARY"]="system"; using Pkg; Pkg.add("MPI"); Pkg.build("MPI"; verbose=true)'" + - "$TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -e 'ENV[\"JULIA_MPI_BINARY\"]=\"system\"; using Pkg; Pkg.add(\"MPI\"); Pkg.build(\"MPI\"; verbose=true)'" - "mpiexec -np 4 $TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -O0 --color=yes --project -e 'using Pkg; Pkg.test()'" agents: queue: Oceananigans From fddca94003e3846cb9f33b3acb15616632989047 Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Thu, 4 Mar 2021 16:36:56 -0500 Subject: [PATCH 084/100] Fixing dispatch for `fill_halo_regions!`: part 2 --- src/Distributed/halo_communication.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Distributed/halo_communication.jl b/src/Distributed/halo_communication.jl index b71693ef68..d4515a7de0 100644 --- a/src/Distributed/halo_communication.jl +++ b/src/Distributed/halo_communication.jl @@ -61,7 +61,7 @@ end fill_halo_regions!(field::AbstractField{LX, LY, LZ}, arch::AbstractMultiArchitecture, args...) where {LX, LY, LZ} = fill_halo_regions!(field.data, field.boundary_conditions, arch, field.grid, (LX, LY, LZ), args...) -function fill_halo_regions!(c::AbstractArray, bcs, arch, grid, c_location, args...) +function fill_halo_regions!(c::AbstractArray, bcs, arch::AbstractMultiArchitecture, grid, c_location, args...) barrier = Event(device(child_architecture(arch))) From e240070b409411072616782f143add25c42a888e Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Thu, 4 Mar 2021 17:06:48 -0500 Subject: [PATCH 085/100] Everyone gets system MPI --- .buildkite/pipeline.yml | 8 +++++--- test/test_distributed_models.jl | 34 ++++++++++++++++----------------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 4f4b80b0ec..d8bcef8a99 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -47,6 +47,11 @@ steps: - "$TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -O0 --color=yes --project -e 'using Pkg; Pkg.precompile()'" - "$TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -O0 --color=yes --project -e 'using Pkg; Pkg.status()'" - "$TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -O0 --color=yes --project -e 'using Pkg; Pkg.test()'" + + # Use the system MPI + - "source /etc/bashrc" # Needed to get access to the module command. + - "module load mpi/openmpi-x86_64" + - "$TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -O0 --color=yes --project -e 'ENV[\"JULIA_MPI_BINARY\"]=\"system\"; using Pkg; Pkg.add(\"MPI\"); Pkg.build(\"MPI\"; verbose=true)'" agents: queue: Oceananigans architecture: CPU @@ -250,9 +255,6 @@ steps: TEST_GROUP: "distributed" CUDA_VISIBLE_DEVICES: "-1" commands: - - "source /etc/bashrc" # Needed to get access to the module command. - - "module load mpi/openmpi-x86_64" - - "$TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -e 'ENV[\"JULIA_MPI_BINARY\"]=\"system\"; using Pkg; Pkg.add(\"MPI\"); Pkg.build(\"MPI\"; verbose=true)'" - "mpiexec -np 4 $TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -O0 --color=yes --project -e 'using Pkg; Pkg.test()'" agents: queue: Oceananigans diff --git a/test/test_distributed_models.jl b/test/test_distributed_models.jl index 318bedbf9b..edb22a7a59 100644 --- a/test/test_distributed_models.jl +++ b/test/test_distributed_models.jl @@ -458,7 +458,7 @@ end run_triply_periodic_local_grid_tests_with_221_ranks() end - # Test pressure bcs! + # TODO: Test pressure bcs! @testset "Injection of halo communication BCs" begin @info " Testing injection of halo communication BCs..." run_triply_periodic_bc_injection_tests_with_411_ranks() @@ -476,20 +476,20 @@ end # run_triply_periodic_halo_communication_tests_with_221_ranks() end - @testset "Time stepping" begin - topo = (Periodic, Periodic, Periodic) - full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) - arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) - dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) - model = dm.model - - time_step!(model, 1) - @test dm isa DistributedIncompressibleModel - @test model.clock.time == 1 - - simulation = Simulation(model, Δt=1, stop_iteration=2) - run!(simulation) - @test dm isa DistributedIncompressibleModel - @test model.clock.time == 2 - end + # @testset "Time stepping" begin + # topo = (Periodic, Periodic, Periodic) + # full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + # arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) + # dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + # model = dm.model + + # time_step!(model, 1) + # @test dm isa DistributedIncompressibleModel + # @test model.clock.time == 1 + + # simulation = Simulation(model, Δt=1, stop_iteration=2) + # run!(simulation) + # @test dm isa DistributedIncompressibleModel + # @test model.clock.time == 2 + # end end From 56695d77bf73a2d5d1def1556054e987a06bc5c9 Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Thu, 4 Mar 2021 17:42:48 -0500 Subject: [PATCH 086/100] Use MPI.jl's `mpiexecjl` --- .buildkite/pipeline.yml | 8 +++----- src/Distributed/distributed_incompressible_model.jl | 4 +--- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index d8bcef8a99..898d8aabc8 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -48,10 +48,8 @@ steps: - "$TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -O0 --color=yes --project -e 'using Pkg; Pkg.status()'" - "$TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -O0 --color=yes --project -e 'using Pkg; Pkg.test()'" - # Use the system MPI - - "source /etc/bashrc" # Needed to get access to the module command. - - "module load mpi/openmpi-x86_64" - - "$TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -O0 --color=yes --project -e 'ENV[\"JULIA_MPI_BINARY\"]=\"system\"; using Pkg; Pkg.add(\"MPI\"); Pkg.build(\"MPI\"; verbose=true)'" + # Set up the mpiexecjl command + - "$TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -O0 --color=yes --project -e 'using MPI; MPI.install_mpiexecjl()'" agents: queue: Oceananigans architecture: CPU @@ -255,7 +253,7 @@ steps: TEST_GROUP: "distributed" CUDA_VISIBLE_DEVICES: "-1" commands: - - "mpiexec -np 4 $TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -O0 --color=yes --project -e 'using Pkg; Pkg.test()'" + - "$TARTARUS_HOME/.julia-$BUILDKITE_BUILD_NUMBER/bin/mpiexecjl -np 4 $TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -O0 --color=yes --project -e 'using Pkg; Pkg.test()'" agents: queue: Oceananigans architecture: CPU diff --git a/src/Distributed/distributed_incompressible_model.jl b/src/Distributed/distributed_incompressible_model.jl index aaff1788ed..5add326f64 100644 --- a/src/Distributed/distributed_incompressible_model.jl +++ b/src/Distributed/distributed_incompressible_model.jl @@ -20,11 +20,9 @@ function DistributedIncompressibleModel(; architecture, grid, boundary_condition Rx, Ry, Rz = architecture.ranks my_connectivity = architecture.connectivity - ## Construct local grid - Nx, Ny, Nz = size(grid) - # Pull out left and right endpoints for full model. + # Pull out endpoints for full model. xL, xR = grid.xF[1], grid.xF[Nx+1] yL, yR = grid.yF[1], grid.yF[Ny+1] zL, zR = grid.zF[1], grid.zF[Nz+1] From 922f155b2c0c13db50943e0de6d6d35b53a60295 Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Thu, 4 Mar 2021 17:56:36 -0500 Subject: [PATCH 087/100] Need julia binary in `$PATH` for mpiexecjl to work --- .buildkite/pipeline.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 898d8aabc8..dcbdd4c7fd 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -253,6 +253,7 @@ steps: TEST_GROUP: "distributed" CUDA_VISIBLE_DEVICES: "-1" commands: + - "PATH=$PATH:$TARTARUS_HOME/julia-$JULIA_VERSION/bin" # Need julia binary in $PATH for mpiexecjl to work. - "$TARTARUS_HOME/.julia-$BUILDKITE_BUILD_NUMBER/bin/mpiexecjl -np 4 $TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia -O0 --color=yes --project -e 'using Pkg; Pkg.test()'" agents: queue: Oceananigans From 0d59c69e078582538c13e2fd32c96cc1d85c4f26 Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Thu, 4 Mar 2021 20:38:29 -0500 Subject: [PATCH 088/100] Better design for multi-architectures and distributed models --- src/Architectures.jl | 25 +++- src/Distributed/Distributed.jl | 2 + .../distributed_incompressible_model.jl | 28 ++--- .../distributed_solve_for_pressure.jl | 25 +--- src/Distributed/multi_architectures.jl | 119 ++++++++++-------- src/Fields/new_data.jl | 4 +- .../show_incompressible_model.jl | 4 +- src/Oceananigans.jl | 2 +- src/Simulations/simulation.jl | 2 +- test/test_distributed_models.jl | 39 +++--- 10 files changed, 125 insertions(+), 125 deletions(-) diff --git a/src/Architectures.jl b/src/Architectures.jl index fbe36ae084..86c08ab1a5 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -2,7 +2,7 @@ module Architectures export @hascuda, - AbstractArchitecture, CPU, GPU, + AbstractArchitecture, AbstractCPUArchitecture, AbstractGPUArchitecture, CPU, GPU, device, architecture, array_type, arch_array using CUDA @@ -16,20 +16,35 @@ Abstract supertype for architectures supported by Oceananigans. """ abstract type AbstractArchitecture end + +""" + AbstractCPUArchitecture + +Abstract supertype for CPU architectures supported by Oceananigans. +""" +abstract type AbstractCPUArchitecture <: AbstractArchitecture end + +""" + AbstractGPUArchitecture + +Abstract supertype for GPU architectures supported by Oceananigans. +""" +abstract type AbstractGPUArchitecture <: AbstractArchitecture end + """ CPU <: AbstractArchitecture Run Oceananigans on one CPU node. Uses multiple threads if the environment variable `JULIA_NUM_THREADS` is set. """ -struct CPU <: AbstractArchitecture end +struct CPU <: AbstractCPUArchitecture end """ GPU <: AbstractArchitecture Run Oceananigans on a single NVIDIA CUDA GPU. """ -struct GPU <: AbstractArchitecture end +struct GPU <: AbstractGPUArchitecture end """ @hascuda expr @@ -41,8 +56,8 @@ macro hascuda(expr) return has_cuda() ? :($(esc(expr))) : :(nothing) end -device(::CPU) = KernelAbstractions.CPU() -device(::GPU) = KernelAbstractions.CUDADevice() +device(::AbstractCPUArchitecture) = KernelAbstractions.CPU() +device(::AbstractGPUArchitecture) = KernelAbstractions.CUDADevice() architecture(::Number) = nothing architecture(::Array) = CPU() diff --git a/src/Distributed/Distributed.jl b/src/Distributed/Distributed.jl index a7965b3bca..4fe2bd0389 100644 --- a/src/Distributed/Distributed.jl +++ b/src/Distributed/Distributed.jl @@ -7,6 +7,8 @@ export DistributedFFTBasedPoissonSolver, DistributedIncompressibleModel +using Oceananigans.Utils + include("distributed_utils.jl") include("multi_architectures.jl") include("halo_communication_bcs.jl") diff --git a/src/Distributed/distributed_incompressible_model.jl b/src/Distributed/distributed_incompressible_model.jl index 5add326f64..b4ae45e78d 100644 --- a/src/Distributed/distributed_incompressible_model.jl +++ b/src/Distributed/distributed_incompressible_model.jl @@ -5,15 +5,9 @@ using Oceananigans.Grids using Oceananigans.Grids: halo_size ##### -##### Distributed model struct and constructor +##### Distributed incompressible model constructor ##### -struct DistributedIncompressibleModel{A, G, M} - architecture :: A - grid :: G - model :: M -end - function DistributedIncompressibleModel(; architecture, grid, boundary_conditions=nothing, model_kwargs...) my_rank = architecture.my_rank i, j, k = architecture.my_index @@ -21,14 +15,15 @@ function DistributedIncompressibleModel(; architecture, grid, boundary_condition my_connectivity = architecture.connectivity Nx, Ny, Nz = size(grid) + Lx, Ly, Lz = length(grid) - # Pull out endpoints for full model. + # Pull out endpoints for full grid. xL, xR = grid.xF[1], grid.xF[Nx+1] yL, yR = grid.yF[1], grid.yF[Ny+1] zL, zR = grid.zF[1], grid.zF[Nz+1] - Lx, Ly, Lz = length(grid) # Make sure we can put an integer number of grid points in each rank. + # Will generalize in the future. @assert isinteger(Nx / Rx) @assert isinteger(Ny / Ry) @assert isinteger(Nz / Rz) @@ -73,23 +68,18 @@ function DistributedIncompressibleModel(; architecture, grid, boundary_condition p_bcs = PressureBoundaryConditions(my_grid) p_bcs = inject_halo_communication_boundary_conditions(p_bcs, my_rank, my_connectivity) - pHY′ = CenterField(child_architecture(architecture), my_grid, p_bcs) - pNHS = CenterField(child_architecture(architecture), my_grid, p_bcs) + pHY′ = CenterField(architecture, my_grid, p_bcs) + pNHS = CenterField(architecture, my_grid, p_bcs) pressures = (pHY′=pHY′, pNHS=pNHS) my_model = IncompressibleModel(; - architecture = child_architecture(architecture), + architecture = architecture, grid = my_grid, boundary_conditions = communicative_bcs, pressure_solver = pressure_solver, pressures = pressures, - model_kwargs... + model_kwargs... ) - return DistributedIncompressibleModel(architecture, grid, my_model) -end - -function Base.show(io::IO, dm::DistributedIncompressibleModel) - print(io, "DistributedIncompressibleModel with ") - print(io, dm.architecture) + return my_model end diff --git a/src/Distributed/distributed_solve_for_pressure.jl b/src/Distributed/distributed_solve_for_pressure.jl index 47f56cef49..5e05e642b3 100644 --- a/src/Distributed/distributed_solve_for_pressure.jl +++ b/src/Distributed/distributed_solve_for_pressure.jl @@ -1,24 +1,9 @@ -import Oceananigans.Solvers: solve_for_pressure! +using Oceananigans.Solvers: calculate_pressure_source_term_fft_based_solver! -function solve_for_pressure!(pressure, solver::DistributedFFTBasedPoissonSolver, arch, grid, Δt, U★) +import Oceananigans.Solvers: solve_for_pressure!, source_term_storage, source_term_kernel, solution_storage - RHS = first(solver.storage) +source_term_storage(solver::DistributedFFTBasedPoissonSolver) = first(solver.storage) - rhs_event = launch!(arch, grid, :xyz, - calculate_pressure_right_hand_side!, RHS, arch, grid, Δt, U★, - dependencies = Event(device(arch))) +source_term_kernel(::DistributedFFTBasedPoissonSolver) = calculate_pressure_source_term_fft_based_solver! - wait(device(arch), rhs_event) - - solve_poisson_equation!(solver) - - ϕ = first(solver.storage) - - copy_event = launch!(arch, grid, :xyz, - copy_pressure!, pressure, ϕ, arch, grid, - dependencies = Event(device(arch))) - - wait(device(arch), copy_event) - - return nothing -end +solution_storage(solver::DistributedFFTBasedPoissonSolver) = first(solver.storage) diff --git a/src/Distributed/multi_architectures.jl b/src/Distributed/multi_architectures.jl index 4e71da67da..8cd6f8c60e 100644 --- a/src/Distributed/multi_architectures.jl +++ b/src/Distributed/multi_architectures.jl @@ -2,28 +2,37 @@ using Oceananigans.Architectures using Oceananigans.Grids: topology, validate_tupled_argument -# TODO: Put connectivity inside architecture? MPI should be initialize so you can construct it in there. -# Might have to make it MultiCPU(; grid, ranks) - -abstract type AbstractMultiArchitecture <: AbstractArchitecture end +struct MultiCPU{G, R, I, ρ, C} <: AbstractCPUArchitecture + full_grid :: G + my_rank :: R + my_index :: I + ranks :: ρ + connectivity :: C +end -struct MultiCPU{R, I, ρ, C} <: AbstractMultiArchitecture +struct MultiGPU{G, R, I, ρ, C} <: AbstractGPUArchitecture + full_grid :: G my_rank :: R my_index :: I ranks :: ρ connectivity :: C end +const AbstractMultiArchitecture = Union{MultiCPU, MultiGPU} + child_architecture(::MultiCPU) = CPU() child_architecture(::CPU) = CPU() +child_architecture(::MultiGPU) = GPU() +child_architecture(::GPU) = GPU() + ##### ##### Converting between index and MPI rank taking k as the fast index ##### -@inline index2rank(i, j, k, Rx, Ry, Rz) = (i-1)*Ry*Rz + (j-1)*Rz + (k-1) +index2rank(i, j, k, Rx, Ry, Rz) = (i-1)*Ry*Rz + (j-1)*Rz + (k-1) -@inline function rank2index(r, Rx, Ry, Rz) +function rank2index(r, Rx, Ry, Rz) i = div(r, Ry*Rz) r -= i*Ry*Rz j = div(r, Rz) @@ -36,64 +45,64 @@ end ##### struct RankConnectivity{E, W, N, S, T, B} - east :: E - west :: W - north :: N - south :: S - top :: T - bottom :: B + east :: E + west :: W + north :: N + south :: S + top :: T + bottom :: B end RankConnectivity(; east, west, north, south, top, bottom) = - RankConnectivity(east, west, north, south, top, bottom) + RankConnectivity(east, west, north, south, top, bottom) function increment_index(i, R, topo) - R == 1 && return nothing - if i+1 > R - if topo == Periodic - return 1 - else - return nothing - end - else - return i+1 - end + R == 1 && return nothing + if i+1 > R + if topo == Periodic + return 1 + else + return nothing + end + else + return i+1 + end end function decrement_index(i, R, topo) - R == 1 && return nothing - if i-1 < 1 - if topo == Periodic - return R - else - return nothing - end - else - return i-1 - end + R == 1 && return nothing + if i-1 < 1 + if topo == Periodic + return R + else + return nothing + end + else + return i-1 + end end function RankConnectivity(model_index, ranks, topology) - i, j, k = model_index - Rx, Ry, Rz = ranks - TX, TY, TZ = topology - - i_east = increment_index(i, Rx, TX) - i_west = decrement_index(i, Rx, TX) - j_north = increment_index(j, Ry, TY) - j_south = decrement_index(j, Ry, TY) - k_top = increment_index(k, Rz, TZ) - k_bot = decrement_index(k, Rz, TZ) - - r_east = isnothing(i_east) ? nothing : index2rank(i_east, j, k, Rx, Ry, Rz) - r_west = isnothing(i_west) ? nothing : index2rank(i_west, j, k, Rx, Ry, Rz) - r_north = isnothing(j_north) ? nothing : index2rank(i, j_north, k, Rx, Ry, Rz) - r_south = isnothing(j_south) ? nothing : index2rank(i, j_south, k, Rx, Ry, Rz) - r_top = isnothing(k_top) ? nothing : index2rank(i, j, k_top, Rx, Ry, Rz) - r_bot = isnothing(k_bot) ? nothing : index2rank(i, j, k_bot, Rx, Ry, Rz) - - return RankConnectivity(east=r_east, west=r_west, north=r_north, - south=r_south, top=r_top, bottom=r_bot) + i, j, k = model_index + Rx, Ry, Rz = ranks + TX, TY, TZ = topology + + i_east = increment_index(i, Rx, TX) + i_west = decrement_index(i, Rx, TX) + j_north = increment_index(j, Ry, TY) + j_south = decrement_index(j, Ry, TY) + k_top = increment_index(k, Rz, TZ) + k_bot = decrement_index(k, Rz, TZ) + + r_east = isnothing(i_east) ? nothing : index2rank(i_east, j, k, Rx, Ry, Rz) + r_west = isnothing(i_west) ? nothing : index2rank(i_west, j, k, Rx, Ry, Rz) + r_north = isnothing(j_north) ? nothing : index2rank(i, j_north, k, Rx, Ry, Rz) + r_south = isnothing(j_south) ? nothing : index2rank(i, j_south, k, Rx, Ry, Rz) + r_top = isnothing(k_top) ? nothing : index2rank(i, j, k_top, Rx, Ry, Rz) + r_bot = isnothing(k_bot) ? nothing : index2rank(i, j, k_bot, Rx, Ry, Rz) + + return RankConnectivity(east=r_east, west=r_west, north=r_north, + south=r_south, top=r_top, bottom=r_bot) end ##### @@ -122,7 +131,7 @@ function MultiCPU(; grid, ranks) my_connectivity = RankConnectivity(my_index, ranks, topology(grid)) - return MultiCPU(my_rank, my_index, ranks, my_connectivity) + return MultiCPU(grid, my_rank, my_index, ranks, my_connectivity) end ##### diff --git a/src/Fields/new_data.jl b/src/Fields/new_data.jl index 9297009819..d1da2bb5c2 100644 --- a/src/Fields/new_data.jl +++ b/src/Fields/new_data.jl @@ -45,7 +45,7 @@ Returns an `OffsetArray` of zeros of float type `FT`, with parent data in CPU memory and indices corresponding to a field on a `grid` of `size(grid)` and located at `loc`. """ -function new_data(FT, ::CPU, grid, loc) +function new_data(FT, ::AbstractCPUArchitecture, grid, loc) underlying_data = zeros(FT, total_length(loc[1], topology(grid, 1), grid.Nx, grid.Hx), total_length(loc[2], topology(grid, 2), grid.Ny, grid.Hy), total_length(loc[3], topology(grid, 3), grid.Nz, grid.Hz)) @@ -60,7 +60,7 @@ Returns an `OffsetArray` of zeros of float type `FT`, with parent data in GPU memory and indices corresponding to a field on a `grid` of `size(grid)` and located at `loc`. """ -function new_data(FT, ::GPU, grid, loc) +function new_data(FT, ::AbstractGPUArchitecture, grid, loc) underlying_data = CuArray{FT}(undef, total_length(loc[1], topology(grid, 1), grid.Nx, grid.Hx), total_length(loc[2], topology(grid, 2), grid.Ny, grid.Hy), total_length(loc[3], topology(grid, 3), grid.Nz, grid.Hz)) diff --git a/src/Models/IncompressibleModels/show_incompressible_model.jl b/src/Models/IncompressibleModels/show_incompressible_model.jl index 19e1229307..96fe62a996 100644 --- a/src/Models/IncompressibleModels/show_incompressible_model.jl +++ b/src/Models/IncompressibleModels/show_incompressible_model.jl @@ -1,9 +1,9 @@ -using Oceananigans.Utils: prettytime, ordered_dict_show using Oceananigans: short_show +using Oceananigans.Utils: prettytime, ordered_dict_show """Show the innards of a `Model` in the REPL.""" function Base.show(io::IO, model::IncompressibleModel{TS, C, A}) where {TS, C, A} - print(io, "IncompressibleModel{$A, $(eltype(model.grid))}", + print(io, "IncompressibleModel{$(Base.typename(A)), $(eltype(model.grid))}", "(time = $(prettytime(model.clock.time)), iteration = $(model.clock.iteration)) \n", "├── grid: $(short_show(model.grid))\n", "├── tracers: $(tracernames(model.tracers))\n", diff --git a/src/Oceananigans.jl b/src/Oceananigans.jl index 554409df70..a2569b40ee 100644 --- a/src/Oceananigans.jl +++ b/src/Oceananigans.jl @@ -76,7 +76,7 @@ export ∂x, ∂y, ∂z, @at, # Distributed - MultiCPU, + MultiCPU, DistributedIncompressibleModel, # Utils prettytime diff --git a/src/Simulations/simulation.jl b/src/Simulations/simulation.jl index 2529cc54e1..a2dae097c2 100644 --- a/src/Simulations/simulation.jl +++ b/src/Simulations/simulation.jl @@ -83,7 +83,7 @@ function Simulation(model; Δt, end Base.show(io::IO, s::Simulation) = - print(io, "Simulation{$(typeof(s.model).name){$(typeof(s.model.architecture)), $(eltype(s.model.grid))}}\n", + print(io, "Simulation{$(typeof(s.model).name){$(Base.typename(typeof(s.model.architecture))), $(eltype(s.model.grid))}}\n", "├── Model clock: time = $(prettytime(s.model.clock.time)), iteration = $(s.model.clock.iteration) \n", "├── Next time step ($(typeof(s.Δt))): $(prettytime(get_Δt(s.Δt))) \n", "├── Iteration interval: $(s.iteration_interval)\n", diff --git a/test/test_distributed_models.jl b/test/test_distributed_models.jl index edb22a7a59..f75a6511d1 100644 --- a/test/test_distributed_models.jl +++ b/test/test_distributed_models.jl @@ -66,13 +66,13 @@ function run_triply_periodic_rank_connectivity_tests_with_141_ranks() @test isnothing(connectivity.bottom) # +---+ - # | 0 | - # +---+ - # | 1 | + # | 3 | # +---+ # | 2 | # +---+ - # | 3 | + # | 1 | + # +---+ + # | 0 | # +---+ if my_rank == 0 @@ -476,20 +476,19 @@ end # run_triply_periodic_halo_communication_tests_with_221_ranks() end - # @testset "Time stepping" begin - # topo = (Periodic, Periodic, Periodic) - # full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) - # arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) - # dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) - # model = dm.model - - # time_step!(model, 1) - # @test dm isa DistributedIncompressibleModel - # @test model.clock.time == 1 - - # simulation = Simulation(model, Δt=1, stop_iteration=2) - # run!(simulation) - # @test dm isa DistributedIncompressibleModel - # @test model.clock.time == 2 - # end + @testset "Time stepping" begin + topo = (Periodic, Periodic, Periodic) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) + arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid) + + time_step!(model, 1) + @test model isa IncompressibleModel + @test model.clock.time == 1 + + simulation = Simulation(model, Δt=1, stop_iteration=2) + run!(simulation) + @test model isa IncompressibleModel + @test model.clock.time == 2 + end end From da22b16addf598aede0686ed6db5df16dc6f9236 Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Thu, 4 Mar 2021 22:01:14 -0500 Subject: [PATCH 089/100] Update tests and Buildkite zoo --- .buildkite/pipeline.yml | 10 ++--- src/Architectures.jl | 8 ++-- test/test_distributed_models.jl | 48 ++++++++++++------------ test/test_distributed_poisson_solvers.jl | 4 +- 4 files changed, 35 insertions(+), 35 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index dcbdd4c7fd..a185c19a8e 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -166,7 +166,7 @@ steps: ##### HydrostaticFreeSurfaceModel ##### - - label: "💧 gpu hydrostatic free surface model tests" + - label: "🐡 gpu hydrostatic free surface model tests" env: JULIA_DEPOT_PATH: "$SVERDRUP_HOME/.julia-$BUILDKITE_BUILD_NUMBER" TEST_GROUP: "hydrostatic_free_surface" @@ -177,7 +177,7 @@ steps: architecture: GPU depends_on: "init_gpu" - - label: "💦 cpu hydrostatic free surface model tests" + - label: "🐠 cpu hydrostatic free surface model tests" env: JULIA_DEPOT_PATH: "$TARTARUS_HOME/.julia-$BUILDKITE_BUILD_NUMBER" TEST_GROUP: "hydrostatic_free_surface" @@ -193,7 +193,7 @@ steps: ##### ShallowWaterModel ##### - - label: "💧 gpu shallow water model tests" + - label: "🦑 gpu shallow water model tests" env: JULIA_DEPOT_PATH: "$SVERDRUP_HOME/.julia-$BUILDKITE_BUILD_NUMBER" TEST_GROUP: "shallow_water" @@ -204,7 +204,7 @@ steps: architecture: GPU depends_on: "init_gpu" - - label: "💦 cpu shallow water model tests" + - label: "🦐 cpu shallow water model tests" env: JULIA_DEPOT_PATH: "$TARTARUS_HOME/.julia-$BUILDKITE_BUILD_NUMBER" TEST_GROUP: "shallow_water" @@ -337,7 +337,7 @@ steps: ##### Clean up ##### - - label: "🧻 clean up gpu environment" + - label: "🧽 clean up gpu environment" command: "rm -rf $SVERDRUP_HOME/.julia-$BUILDKITE_BUILD_NUMBER" agents: queue: Oceananigans diff --git a/src/Architectures.jl b/src/Architectures.jl index 86c08ab1a5..b4b2293af1 100644 --- a/src/Architectures.jl +++ b/src/Architectures.jl @@ -66,9 +66,9 @@ architecture(::CuArray) = GPU() array_type(::CPU) = Array array_type(::GPU) = CuArray -arch_array(::CPU, A::Array) = A -arch_array(::CPU, A::CuArray) = Array(A) -arch_array(::GPU, A::Array) = CuArray(A) -arch_array(::GPU, A::CuArray) = A +arch_array(::AbstractCPUArchitecture, A::Array) = A +arch_array(::AbstractCPUArchitecture, A::CuArray) = Array(A) +arch_array(::AbstractGPUArchitecture, A::Array) = CuArray(A) +arch_array(::AbstractGPUArchitecture, A::CuArray) = A end diff --git a/test/test_distributed_models.jl b/test/test_distributed_models.jl index f75a6511d1..52703c750f 100644 --- a/test/test_distributed_models.jl +++ b/test/test_distributed_models.jl @@ -191,10 +191,10 @@ function run_triply_periodic_local_grid_tests_with_411_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) - dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) - local_grid = dm.model.grid + local_grid = model.grid nx, ny, nz = size(local_grid) @test local_grid.xF[1] == 0.25*my_rank @@ -211,10 +211,10 @@ function run_triply_periodic_local_grid_tests_with_141_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) - dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) - local_grid = dm.model.grid + local_grid = model.grid nx, ny, nz = size(local_grid) @test local_grid.xF[1] == 0 @@ -231,10 +231,10 @@ function run_triply_periodic_local_grid_tests_with_114_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) - dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) my_rank = MPI.Comm_rank(MPI.COMM_WORLD) - local_grid = dm.model.grid + local_grid = model.grid nx, ny, nz = size(local_grid) @test local_grid.xF[1] == 0 @@ -251,10 +251,10 @@ function run_triply_periodic_local_grid_tests_with_221_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) - dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) i, j, k = arch.my_index - local_grid = dm.model.grid + local_grid = model.grid nx, ny, nz = size(local_grid) @test local_grid.xF[1] == 0.5*(i-1) @@ -275,9 +275,9 @@ function run_triply_periodic_bc_injection_tests_with_411_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) - dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) - for field in fields(dm.model) + for field in fields(model) fbcs = field.boundary_conditions @test fbcs.east isa HaloCommunicationBC @test fbcs.west isa HaloCommunicationBC @@ -292,9 +292,9 @@ function run_triply_periodic_bc_injection_tests_with_141_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) - dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) - for field in fields(dm.model) + for field in fields(model) fbcs = field.boundary_conditions @test !isa(fbcs.east, HaloCommunicationBC) @test !isa(fbcs.west, HaloCommunicationBC) @@ -309,9 +309,9 @@ function run_triply_periodic_bc_injection_tests_with_114_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) - dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) - for field in fields(dm.model) + for field in fields(model) fbcs = field.boundary_conditions @test !isa(fbcs.east, HaloCommunicationBC) @test !isa(fbcs.west, HaloCommunicationBC) @@ -326,9 +326,9 @@ function run_triply_periodic_bc_injection_tests_with_221_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) - dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) - for field in fields(dm.model) + for field in fields(model) fbcs = field.boundary_conditions @test fbcs.east isa HaloCommunicationBC @test fbcs.west isa HaloCommunicationBC @@ -347,9 +347,9 @@ function run_triply_periodic_halo_communication_tests_with_411_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 6, 4), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) - dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) - for field in fields(dm.model) + for field in fields(model) interior(field) .= arch.my_rank fill_halo_regions!(field, arch) @@ -370,9 +370,9 @@ function run_triply_periodic_halo_communication_tests_with_141_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(3, 8, 2), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) - dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) - for field in fields(dm.model) + for field in fields(model) interior(field) .= arch.my_rank fill_halo_regions!(field, arch) @@ -393,9 +393,9 @@ function run_triply_periodic_halo_communication_tests_with_114_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(3, 5, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) - dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) - for field in fields(dm.model) + for field in fields(model) interior(field) .= arch.my_rank fill_halo_regions!(field, arch) @@ -416,9 +416,9 @@ function run_triply_periodic_halo_communication_tests_with_221_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 3), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) - dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) - for field in fields(dm.model) + for field in fields(model) interior(field) .= arch.my_rank fill_halo_regions!(field, arch) diff --git a/test/test_distributed_poisson_solvers.jl b/test/test_distributed_poisson_solvers.jl index c730d72895..7726f5cd94 100644 --- a/test/test_distributed_poisson_solvers.jl +++ b/test/test_distributed_poisson_solvers.jl @@ -39,9 +39,9 @@ function divergence_free_poisson_solution_triply_periodic(grid_points, ranks) topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=grid_points, extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=ranks) - dm = DistributedIncompressibleModel(architecture=arch, grid=full_grid) + model = DistributedIncompressibleModel(architecture=arch, grid=full_grid) - local_grid = dm.model.grid + local_grid = model.grid solver = DistributedFFTBasedPoissonSolver(arch, full_grid, local_grid) R = random_divergent_source_term(Float64, child_architecture(arch), local_grid) From 4baba22dc675b69dc5945b5567aac288311e62e1 Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Thu, 4 Mar 2021 23:37:40 -0500 Subject: [PATCH 090/100] More tests --- .../distributed_incompressible_model.jl | 10 +- src/Oceananigans.jl | 3 - test/test_distributed_models.jl | 100 +++++++++--------- 3 files changed, 55 insertions(+), 58 deletions(-) diff --git a/src/Distributed/distributed_incompressible_model.jl b/src/Distributed/distributed_incompressible_model.jl index b4ae45e78d..72bd3e6c57 100644 --- a/src/Distributed/distributed_incompressible_model.jl +++ b/src/Distributed/distributed_incompressible_model.jl @@ -45,11 +45,11 @@ function DistributedIncompressibleModel(; architecture, grid, boundary_condition bcs = isnothing(boundary_conditions) ? NamedTuple() : boundary_conditions bcs = ( - u = haskey(bcs, :u) ? bcs.u : UVelocityBoundaryConditions(grid), - v = haskey(bcs, :v) ? bcs.v : VVelocityBoundaryConditions(grid), - w = haskey(bcs, :w) ? bcs.w : WVelocityBoundaryConditions(grid), - T = haskey(bcs, :T) ? bcs.T : TracerBoundaryConditions(grid), - S = haskey(bcs, :S) ? bcs.S : TracerBoundaryConditions(grid) + u = haskey(bcs, :u) ? bcs.u : UVelocityBoundaryConditions(my_grid), + v = haskey(bcs, :v) ? bcs.v : VVelocityBoundaryConditions(my_grid), + w = haskey(bcs, :w) ? bcs.w : WVelocityBoundaryConditions(my_grid), + T = haskey(bcs, :T) ? bcs.T : TracerBoundaryConditions(my_grid), + S = haskey(bcs, :S) ? bcs.S : TracerBoundaryConditions(my_grid) ) communicative_bcs = ( diff --git a/src/Oceananigans.jl b/src/Oceananigans.jl index a2569b40ee..2434cd06d5 100644 --- a/src/Oceananigans.jl +++ b/src/Oceananigans.jl @@ -75,9 +75,6 @@ export # Abstract operations ∂x, ∂y, ∂z, @at, - # Distributed - MultiCPU, DistributedIncompressibleModel, - # Utils prettytime diff --git a/test/test_distributed_models.jl b/test/test_distributed_models.jl index 52703c750f..81b31d5e34 100644 --- a/test/test_distributed_models.jl +++ b/test/test_distributed_models.jl @@ -12,7 +12,7 @@ mpi_ranks = MPI.Comm_size(comm) ##### Multi architectures and rank connectivity ##### -function run_triply_periodic_rank_connectivity_tests_with_411_ranks() +function test_triply_periodic_rank_connectivity_with_411_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) @@ -49,7 +49,7 @@ function run_triply_periodic_rank_connectivity_tests_with_411_ranks() return nothing end -function run_triply_periodic_rank_connectivity_tests_with_141_ranks() +function test_triply_periodic_rank_connectivity_with_141_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) @@ -92,7 +92,7 @@ function run_triply_periodic_rank_connectivity_tests_with_141_ranks() return nothing end -function run_triply_periodic_rank_connectivity_tests_with_114_ranks() +function test_triply_periodic_rank_connectivity_with_114_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) @@ -138,7 +138,7 @@ function run_triply_periodic_rank_connectivity_tests_with_114_ranks() return nothing end -function run_triply_periodic_rank_connectivity_tests_with_221_ranks() +function test_triply_periodic_rank_connectivity_with_221_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) @@ -187,7 +187,7 @@ end ##### Local grids for distributed models ##### -function run_triply_periodic_local_grid_tests_with_411_ranks() +function test_triply_periodic_local_grid_with_411_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) @@ -207,7 +207,7 @@ function run_triply_periodic_local_grid_tests_with_411_ranks() return nothing end -function run_triply_periodic_local_grid_tests_with_141_ranks() +function test_triply_periodic_local_grid_with_141_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) @@ -227,7 +227,7 @@ function run_triply_periodic_local_grid_tests_with_141_ranks() return nothing end -function run_triply_periodic_local_grid_tests_with_114_ranks() +function test_triply_periodic_local_grid_with_114_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) @@ -247,7 +247,7 @@ function run_triply_periodic_local_grid_tests_with_114_ranks() return nothing end -function run_triply_periodic_local_grid_tests_with_221_ranks() +function test_triply_periodic_local_grid_with_221_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) @@ -271,13 +271,13 @@ end ##### Injection of halo communication BCs ##### -function run_triply_periodic_bc_injection_tests_with_411_ranks() +function test_triply_periodic_bc_injection_with_411_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) - for field in fields(model) + for field in merge(fields(model), model.pressures) fbcs = field.boundary_conditions @test fbcs.east isa HaloCommunicationBC @test fbcs.west isa HaloCommunicationBC @@ -288,13 +288,13 @@ function run_triply_periodic_bc_injection_tests_with_411_ranks() end end -function run_triply_periodic_bc_injection_tests_with_141_ranks() +function test_triply_periodic_bc_injection_with_141_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) - for field in fields(model) + for field in merge(fields(model), model.pressures) fbcs = field.boundary_conditions @test !isa(fbcs.east, HaloCommunicationBC) @test !isa(fbcs.west, HaloCommunicationBC) @@ -305,13 +305,13 @@ function run_triply_periodic_bc_injection_tests_with_141_ranks() end end -function run_triply_periodic_bc_injection_tests_with_114_ranks() +function test_triply_periodic_bc_injection_with_114_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) - for field in fields(model) + for field in merge(fields(model), model.pressures) fbcs = field.boundary_conditions @test !isa(fbcs.east, HaloCommunicationBC) @test !isa(fbcs.west, HaloCommunicationBC) @@ -322,13 +322,13 @@ function run_triply_periodic_bc_injection_tests_with_114_ranks() end end -function run_triply_periodic_bc_injection_tests_with_221_ranks() +function test_triply_periodic_bc_injection_with_221_ranks() topo = (Periodic, Periodic, Periodic) full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) - for field in fields(model) + for field in merge(fields(model), model.pressures) fbcs = field.boundary_conditions @test fbcs.east isa HaloCommunicationBC @test fbcs.west isa HaloCommunicationBC @@ -343,13 +343,13 @@ end ##### Halo communication ##### -function run_triply_periodic_halo_communication_tests_with_411_ranks() +function test_triply_periodic_halo_communication_with_411_ranks(halo) topo = (Periodic, Periodic, Periodic) - full_grid = RegularRectilinearGrid(topology=topo, size=(8, 6, 4), extent=(1, 2, 3)) + full_grid = RegularRectilinearGrid(topology=topo, size=(16, 6, 4), extent=(1, 2, 3), halo=halo) arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) - for field in fields(model) + for field in merge(fields(model), model.pressures) interior(field) .= arch.my_rank fill_halo_regions!(field, arch) @@ -366,13 +366,13 @@ function run_triply_periodic_halo_communication_tests_with_411_ranks() return nothing end -function run_triply_periodic_halo_communication_tests_with_141_ranks() +function test_triply_periodic_halo_communication_with_141_ranks(halo) topo = (Periodic, Periodic, Periodic) - full_grid = RegularRectilinearGrid(topology=topo, size=(3, 8, 2), extent=(1, 2, 3)) + full_grid = RegularRectilinearGrid(topology=topo, size=(4, 16, 4), extent=(1, 2, 3), halo=halo) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) - for field in fields(model) + for field in merge(fields(model), model.pressures) interior(field) .= arch.my_rank fill_halo_regions!(field, arch) @@ -389,13 +389,13 @@ function run_triply_periodic_halo_communication_tests_with_141_ranks() return nothing end -function run_triply_periodic_halo_communication_tests_with_114_ranks() +function test_triply_periodic_halo_communication_with_114_ranks(halo) topo = (Periodic, Periodic, Periodic) - full_grid = RegularRectilinearGrid(topology=topo, size=(3, 5, 8), extent=(1, 2, 3)) + full_grid = RegularRectilinearGrid(topology=topo, size=(4, 4, 16), extent=(1, 2, 3), halo=halo) arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) - for field in fields(model) + for field in merge(fields(model), model.pressures) interior(field) .= arch.my_rank fill_halo_regions!(field, arch) @@ -412,20 +412,20 @@ function run_triply_periodic_halo_communication_tests_with_114_ranks() return nothing end -function run_triply_periodic_halo_communication_tests_with_221_ranks() +function test_triply_periodic_halo_communication_with_221_ranks(halo) topo = (Periodic, Periodic, Periodic) - full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 3), extent=(1, 2, 3)) + full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 3), extent=(1, 2, 3), halo=halo) arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) - for field in fields(model) + for field in merge(fields(model), model.pressures) interior(field) .= arch.my_rank fill_halo_regions!(field, arch) - @test all(east_halo(field) .== arch.connectivity.east) - @test all(west_halo(field) .== arch.connectivity.west) - @test all(north_halo(field) .== arch.connectivity.north) - @test all(south_halo(field) .== arch.connectivity.south) + @test all(east_halo(field, include_corners=false) .== arch.connectivity.east) + @test all(west_halo(field, include_corners=false) .== arch.connectivity.west) + @test all(north_halo(field, include_corners=false) .== arch.connectivity.north) + @test all(south_halo(field, include_corners=false) .== arch.connectivity.south) @test all(interior(field) .== arch.my_rank) @test all(top_halo(field, include_corners=false) .== arch.my_rank) @@ -444,36 +444,36 @@ end @testset "Multi architectures rank connectivity" begin @info " Testing multi architecture rank connectivity..." - run_triply_periodic_rank_connectivity_tests_with_411_ranks() - run_triply_periodic_rank_connectivity_tests_with_141_ranks() - run_triply_periodic_rank_connectivity_tests_with_114_ranks() - run_triply_periodic_rank_connectivity_tests_with_221_ranks() + test_triply_periodic_rank_connectivity_with_411_ranks() + test_triply_periodic_rank_connectivity_with_141_ranks() + test_triply_periodic_rank_connectivity_with_114_ranks() + test_triply_periodic_rank_connectivity_with_221_ranks() end @testset "Local grids for distributed models" begin @info " Testing local grids for distributed models..." - run_triply_periodic_local_grid_tests_with_411_ranks() - run_triply_periodic_local_grid_tests_with_141_ranks() - run_triply_periodic_local_grid_tests_with_114_ranks() - run_triply_periodic_local_grid_tests_with_221_ranks() + test_triply_periodic_local_grid_with_411_ranks() + test_triply_periodic_local_grid_with_141_ranks() + test_triply_periodic_local_grid_with_114_ranks() + test_triply_periodic_local_grid_with_221_ranks() end - # TODO: Test pressure bcs! @testset "Injection of halo communication BCs" begin @info " Testing injection of halo communication BCs..." - run_triply_periodic_bc_injection_tests_with_411_ranks() - run_triply_periodic_bc_injection_tests_with_141_ranks() - run_triply_periodic_bc_injection_tests_with_114_ranks() - run_triply_periodic_bc_injection_tests_with_221_ranks() + test_triply_periodic_bc_injection_with_411_ranks() + test_triply_periodic_bc_injection_with_141_ranks() + test_triply_periodic_bc_injection_with_114_ranks() + test_triply_periodic_bc_injection_with_221_ranks() end - # TODO: Test larger halos! @testset "Halo communication" begin @info " Testing halo communication..." - run_triply_periodic_halo_communication_tests_with_411_ranks() - run_triply_periodic_halo_communication_tests_with_141_ranks() - run_triply_periodic_halo_communication_tests_with_114_ranks() - # run_triply_periodic_halo_communication_tests_with_221_ranks() + for H in 1:3 + test_triply_periodic_halo_communication_with_411_ranks((H, H, H)) + test_triply_periodic_halo_communication_with_141_ranks((H, H, H)) + test_triply_periodic_halo_communication_with_114_ranks((H, H, H)) + test_triply_periodic_halo_communication_with_221_ranks((H, H, H)) + end end @testset "Time stepping" begin From f7eb66413272af1faeccf709812fad1a3b76c913 Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Thu, 4 Mar 2021 23:56:47 -0500 Subject: [PATCH 091/100] Address PR comments --- src/Distributed/halo_communication.jl | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/Distributed/halo_communication.jl b/src/Distributed/halo_communication.jl index d4515a7de0..1a0d2bade4 100644 --- a/src/Distributed/halo_communication.jl +++ b/src/Distributed/halo_communication.jl @@ -10,12 +10,7 @@ import Oceananigans.BoundaryConditions: ##### sides = (:west, :east, :south, :north, :top, :bottom) - -side_id = Dict( - :east => 1, :west => 2, - :north => 3, :south => 4, - :top => 5, :bottom => 6 -) +side_id = Dict(side => n for (n, side) in enumerate(sides)) opposite_side = Dict( :east => :west, :west => :east, @@ -23,16 +18,14 @@ opposite_side = Dict( :top => :bottom, :bottom => :top ) -# Unfortunately can't call MPI.Comm_size(MPI.COMM_WORLD) before MPI.Init(). -MAX_RANKS = 10^3 -RANK_DIGITS = 3 - # Define functions that return unique send and recv MPI tags for each side. # It's an integer where # digit 1: the side # digits 2-4: the from rank # digits 5-7: the to rank +RANK_DIGITS = 3 + for side in sides side_str = string(side) send_tag_fn_name = Symbol("$(side)_send_tag") From faa07293d7f4e71c03064b30c573718c10a3b416 Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Fri, 5 Mar 2021 09:07:38 -0500 Subject: [PATCH 092/100] Bump v0.53.0 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 400c8bd74f..855bc1e7d2 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "Oceananigans" uuid = "9e8cae18-63c1-5223-a75c-80ca9d6e9a09" -version = "0.52.1" +version = "0.53.0" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" From 42d2eab2a94bb17ab5b2e77fbc5ee128a79fc345 Mon Sep 17 00:00:00 2001 From: Ali Ramadhan Date: Tue, 9 Mar 2021 09:40:49 -0500 Subject: [PATCH 093/100] Update src/Distributed/halo_communication.jl --- src/Distributed/halo_communication.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Distributed/halo_communication.jl b/src/Distributed/halo_communication.jl index 1a0d2bade4..4a39a16d4b 100644 --- a/src/Distributed/halo_communication.jl +++ b/src/Distributed/halo_communication.jl @@ -21,8 +21,8 @@ opposite_side = Dict( # Define functions that return unique send and recv MPI tags for each side. # It's an integer where # digit 1: the side -# digits 2-4: the from rank -# digits 5-7: the to rank +# digits 2-4: the "from" rank +# digits 5-7: the "to" rank RANK_DIGITS = 3 From 3ae697f87420943b3bd0f9cff9c0d4150cce8eea Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Tue, 9 Mar 2021 21:02:38 -0500 Subject: [PATCH 094/100] Inject halo communication BCs in `Field` constructor --- src/Distributed/Distributed.jl | 1 + src/Distributed/distributed_fields.jl | 10 +++++ .../distributed_incompressible_model.jl | 43 +++---------------- src/Fields/field.jl | 16 +++---- 4 files changed, 25 insertions(+), 45 deletions(-) create mode 100644 src/Distributed/distributed_fields.jl diff --git a/src/Distributed/Distributed.jl b/src/Distributed/Distributed.jl index 4fe2bd0389..7c1973c807 100644 --- a/src/Distributed/Distributed.jl +++ b/src/Distributed/Distributed.jl @@ -13,6 +13,7 @@ include("distributed_utils.jl") include("multi_architectures.jl") include("halo_communication_bcs.jl") include("halo_communication.jl") +include("distributed_fields.jl") include("distributed_fft_based_poisson_solver.jl") include("distributed_solve_for_pressure.jl") include("distributed_incompressible_model.jl") diff --git a/src/Distributed/distributed_fields.jl b/src/Distributed/distributed_fields.jl new file mode 100644 index 0000000000..f73878a7bc --- /dev/null +++ b/src/Distributed/distributed_fields.jl @@ -0,0 +1,10 @@ +import Oceananigans.Fields: Field + +function Field(X, Y, Z, arch::AbstractMultiArchitecture, grid, + bcs = FieldBoundaryConditions(grid, (X, Y, Z)), + data = new_data(eltype(grid), arch, grid, (X, Y, Z))) + + communicative_bcs = inject_halo_communication_boundary_conditions(bcs, arch.my_rank, arch.connectivity) + + return Field(X, Y, Z, child_architecture(arch), grid, communicative_bcs, data) +end diff --git a/src/Distributed/distributed_incompressible_model.jl b/src/Distributed/distributed_incompressible_model.jl index 72bd3e6c57..c167be41ce 100644 --- a/src/Distributed/distributed_incompressible_model.jl +++ b/src/Distributed/distributed_incompressible_model.jl @@ -8,7 +8,7 @@ using Oceananigans.Grids: halo_size ##### Distributed incompressible model constructor ##### -function DistributedIncompressibleModel(; architecture, grid, boundary_conditions=nothing, model_kwargs...) +function DistributedIncompressibleModel(; architecture, grid, model_kwargs...) my_rank = architecture.my_rank i, j, k = architecture.my_index Rx, Ry, Rz = architecture.ranks @@ -35,50 +35,19 @@ function DistributedIncompressibleModel(; architecture, grid, boundary_condition y₁, y₂ = yL + (j-1)*ly, yL + j*ly z₁, z₂ = zL + (k-1)*lz, zL + k*lz - # FIXME: local grid might have different topology! + # FIXME? local grid might have different topology! my_grid = RegularRectilinearGrid(topology=topology(grid), size=(nx, ny, nz), x=(x₁, x₂), y=(y₁, y₂), z=(z₁, z₂), halo=halo_size(grid)) - ## Change appropriate boundary conditions to halo communication BCs - - # FIXME: Stop assuming (u, v, w, T, S). - - bcs = isnothing(boundary_conditions) ? NamedTuple() : boundary_conditions - - bcs = ( - u = haskey(bcs, :u) ? bcs.u : UVelocityBoundaryConditions(my_grid), - v = haskey(bcs, :v) ? bcs.v : VVelocityBoundaryConditions(my_grid), - w = haskey(bcs, :w) ? bcs.w : WVelocityBoundaryConditions(my_grid), - T = haskey(bcs, :T) ? bcs.T : TracerBoundaryConditions(my_grid), - S = haskey(bcs, :S) ? bcs.S : TracerBoundaryConditions(my_grid) - ) - - communicative_bcs = ( - u = inject_halo_communication_boundary_conditions(bcs.u, my_rank, my_connectivity), - v = inject_halo_communication_boundary_conditions(bcs.v, my_rank, my_connectivity), - w = inject_halo_communication_boundary_conditions(bcs.w, my_rank, my_connectivity), - T = inject_halo_communication_boundary_conditions(bcs.T, my_rank, my_connectivity), - S = inject_halo_communication_boundary_conditions(bcs.S, my_rank, my_connectivity) - ) - ## Construct local model pressure_solver = haskey(model_kwargs, :pressure_solver) ? Dict(model_kwargs)[:pressure_solver] : DistributedFFTBasedPoissonSolver(architecture, grid, my_grid) - p_bcs = PressureBoundaryConditions(my_grid) - p_bcs = inject_halo_communication_boundary_conditions(p_bcs, my_rank, my_connectivity) - - pHY′ = CenterField(architecture, my_grid, p_bcs) - pNHS = CenterField(architecture, my_grid, p_bcs) - pressures = (pHY′=pHY′, pNHS=pNHS) - my_model = IncompressibleModel(; - architecture = architecture, - grid = my_grid, - boundary_conditions = communicative_bcs, - pressure_solver = pressure_solver, - pressures = pressures, - model_kwargs... + architecture = architecture, + grid = my_grid, + pressure_solver = pressure_solver, + model_kwargs... ) return my_model diff --git a/src/Fields/field.jl b/src/Fields/field.jl index 6389e7a665..c350423415 100644 --- a/src/Fields/field.jl +++ b/src/Fields/field.jl @@ -78,7 +78,7 @@ function CenterField(FT::DataType, arch, grid, bcs = TracerBoundaryConditions(grid), data = new_data(FT, arch, grid, (Center, Center, Center))) - return Field{Center, Center, Center}(data, grid, bcs) + return Field(Center, Center, Center, arch, grid, bcs, data) end """ @@ -93,7 +93,7 @@ function XFaceField(FT::DataType, arch, grid, bcs = UVelocityBoundaryConditions(grid), data = new_data(FT, arch, grid, (Face, Center, Center))) - return Field{Face, Center, Center}(data, grid, bcs) + return Field(Face, Center, Center, arch, grid, bcs, data) end """ @@ -108,7 +108,7 @@ function YFaceField(FT::DataType, arch, grid, bcs = VVelocityBoundaryConditions(grid), data = new_data(FT, arch, grid, (Center, Face, Center))) - return Field{Center, Face, Center}(data, grid, bcs) + return Field(Center, Face, Center, arch, grid, bcs, data) end """ @@ -123,13 +123,13 @@ function ZFaceField(FT::DataType, arch, grid, bcs = WVelocityBoundaryConditions(grid), data = new_data(FT, arch, grid, (Center, Center, Face))) - return Field{Center, Center, Face}(data, grid, bcs) + return Field(Center, Center, Face, arch, grid, bcs, data) end - CenterField(arch::AbstractArchitecture, grid, args...) = CenterField(eltype(grid), arch, grid, args...) -XFaceField(arch::AbstractArchitecture, grid, args...) = XFaceField(eltype(grid), arch, grid, args...) -YFaceField(arch::AbstractArchitecture, grid, args...) = YFaceField(eltype(grid), arch, grid, args...) -ZFaceField(arch::AbstractArchitecture, grid, args...) = ZFaceField(eltype(grid), arch, grid, args...) +CenterField(arch::AbstractArchitecture, grid, args...) = CenterField(eltype(grid), arch, grid, args...) + XFaceField(arch::AbstractArchitecture, grid, args...) = XFaceField(eltype(grid), arch, grid, args...) + YFaceField(arch::AbstractArchitecture, grid, args...) = YFaceField(eltype(grid), arch, grid, args...) + ZFaceField(arch::AbstractArchitecture, grid, args...) = ZFaceField(eltype(grid), arch, grid, args...) @propagate_inbounds Base.setindex!(f::Field, v, inds...) = @inbounds setindex!(f.data, v, inds...) From 50c9b7a1230206d28cf8d9e564601a7acdb9c7de Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Tue, 9 Mar 2021 21:12:21 -0500 Subject: [PATCH 095/100] Nuke sandbox --- sandbox/mpi_turbulence.jl | 80 --------------------------------------- 1 file changed, 80 deletions(-) delete mode 100644 sandbox/mpi_turbulence.jl diff --git a/sandbox/mpi_turbulence.jl b/sandbox/mpi_turbulence.jl deleted file mode 100644 index d5b9824ad7..0000000000 --- a/sandbox/mpi_turbulence.jl +++ /dev/null @@ -1,80 +0,0 @@ -using MPI - -MPI.Initialized() || MPI.Init() - -using Statistics - -using Oceananigans.Advection -using Oceananigans.Fields -using Oceananigans.OutputWriters -using Oceananigans.AbstractOperations -using Oceananigans.Utils - -using Oceananigans.Solvers: calculate_pressure_right_hand_side!, copy_pressure! - -topo = (Periodic, Periodic, Periodic) -full_grid = RegularRectilinearGrid(topology=topo, size=(512, 512, 1), extent=(4π, 4π, 1), halo=(3, 3, 3)) -arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) - -dm = DistributedIncompressibleModel( - architecture = arch, - grid = full_grid, - timestepper = :RungeKutta3, - advection = WENO5(), - closure = IsotropicDiffusivity(ν=1e-5) -) - -model = dm.model -u₀ = rand(size(model.grid)...); -u₀ .-= mean(u₀); -set!(model, u=u₀, v=u₀) - -progress(sim) = @info "Iteration: $(sim.model.clock.iteration), time: $(sim.model.clock.time)" -simulation = Simulation(model, Δt=0.05, stop_time=50, iteration_interval=1, progress=progress) - -u, v, w = model.velocities -outputs = (ζ=ComputedField(∂x(v) - ∂y(u)),) -simulation.output_writers[:fields] = NetCDFOutputWriter(model, outputs, filepath="mpi_turbulence_rank$(arch.my_rank).nc", schedule=TimeInterval(0.1)) - -MPI.Barrier(MPI.COMM_WORLD) - -run!(simulation) - -using Printf -using NCDatasets -using CairoMakie - -if arch.my_rank == 0 - ranks = 4 - - ds = [NCDataset("mpi_turbulence_rank$r.nc") for r in 0:ranks-1] - - frame = Node(1) - plot_title = @lift @sprintf("Oceananigans.jl + MPI: 2D turbulence t = %.1f", ds[1]["time"][$frame]) - ζ = [@lift ds[r]["ζ"][:, :, 1, $frame] for r in 1:ranks] - - fig = Figure(resolution=(1600, 1200)) - - for r in reverse(1:ranks) - ax = fig[ranks-r+1, 1] = Axis(fig, ylabel="rank $(r-1)", xticks = MultiplesTicks(9, pi, "π"), yticks = MultiplesTicks(3, pi, "π")) - hm = CairoMakie.heatmap!(ax, ds[r]["xF"], ds[r]["yF"], ζ[r], colormap=:balance, colorrange=(-2, 2)) - r > 1 && hidexdecorations!(ax, grid=false) - if r == 1 - cb = fig[:, 2] = Colorbar(fig, hm, label = "Vorticity ζ = ∂x(v) - ∂y(u)", width=30) - cb.height = Relative(2/3) - end - xlims!(ax, [0, 4π]) - ylims!(ax, [(r-1)*π, r*π]) - end - - supertitle = fig[0, :] = Label(fig, plot_title, textsize=30) - - trim!(fig.layout) - - record(fig, "mpi_turbulence.mp4", 1:length(ds[1]["time"])-1, framerate=30) do n - @info "Animating MPI turbulence frame $n/$(length(ds[1]["time"]))..." - frame[] = n - end - - [close(d) for d in ds] -end From 3a4f9ea8c15167d16728612613c182b1abc7888e Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Tue, 9 Mar 2021 21:40:45 -0500 Subject: [PATCH 096/100] Some renaming and added a communicator to `MultiCPU` --- .../distributed_fft_based_poisson_solver.jl | 2 +- src/Distributed/distributed_fields.jl | 2 +- .../distributed_incompressible_model.jl | 3 +- src/Distributed/halo_communication.jl | 30 ++--- src/Distributed/halo_communication_bcs.jl | 14 +-- src/Distributed/multi_architectures.jl | 42 +++---- test/test_distributed_models.jl | 112 +++++++++--------- test/test_distributed_poisson_solvers.jl | 4 +- 8 files changed, 104 insertions(+), 105 deletions(-) diff --git a/src/Distributed/distributed_fft_based_poisson_solver.jl b/src/Distributed/distributed_fft_based_poisson_solver.jl index 5595e1b35c..039e1f2238 100644 --- a/src/Distributed/distributed_fft_based_poisson_solver.jl +++ b/src/Distributed/distributed_fft_based_poisson_solver.jl @@ -17,7 +17,7 @@ function DistributedFFTBasedPoissonSolver(arch, full_grid, local_grid) λy = poisson_eigenvalues(full_grid.Ny, full_grid.Ly, 2, TY()) λz = poisson_eigenvalues(full_grid.Nz, full_grid.Lz, 3, TZ()) - I, J, K = arch.my_index + I, J, K = arch.local_index λx = λx[(J-1)*local_grid.Ny+1:J*local_grid.Ny, :, :] eigenvalues = (; λx, λy, λz) diff --git a/src/Distributed/distributed_fields.jl b/src/Distributed/distributed_fields.jl index f73878a7bc..3d9a345577 100644 --- a/src/Distributed/distributed_fields.jl +++ b/src/Distributed/distributed_fields.jl @@ -4,7 +4,7 @@ function Field(X, Y, Z, arch::AbstractMultiArchitecture, grid, bcs = FieldBoundaryConditions(grid, (X, Y, Z)), data = new_data(eltype(grid), arch, grid, (X, Y, Z))) - communicative_bcs = inject_halo_communication_boundary_conditions(bcs, arch.my_rank, arch.connectivity) + communicative_bcs = inject_halo_communication_boundary_conditions(bcs, arch.local_rank, arch.connectivity) return Field(X, Y, Z, child_architecture(arch), grid, communicative_bcs, data) end diff --git a/src/Distributed/distributed_incompressible_model.jl b/src/Distributed/distributed_incompressible_model.jl index c167be41ce..2379586113 100644 --- a/src/Distributed/distributed_incompressible_model.jl +++ b/src/Distributed/distributed_incompressible_model.jl @@ -9,8 +9,7 @@ using Oceananigans.Grids: halo_size ##### function DistributedIncompressibleModel(; architecture, grid, model_kwargs...) - my_rank = architecture.my_rank - i, j, k = architecture.my_index + i, j, k = architecture.local_index Rx, Ry, Rz = architecture.ranks my_connectivity = architecture.connectivity diff --git a/src/Distributed/halo_communication.jl b/src/Distributed/halo_communication.jl index 4a39a16d4b..95e880428d 100644 --- a/src/Distributed/halo_communication.jl +++ b/src/Distributed/halo_communication.jl @@ -31,16 +31,16 @@ for side in sides send_tag_fn_name = Symbol("$(side)_send_tag") recv_tag_fn_name = Symbol("$(side)_recv_tag") @eval begin - function $send_tag_fn_name(my_rank, rank_to_send_to) - from_digits = string(my_rank, pad=RANK_DIGITS) + function $send_tag_fn_name(local_rank, rank_to_send_to) + from_digits = string(local_rank, pad=RANK_DIGITS) to_digits = string(rank_to_send_to, pad=RANK_DIGITS) side_digit = string(side_id[Symbol($side_str)]) return parse(Int, from_digits * to_digits * side_digit) end - function $recv_tag_fn_name(my_rank, rank_to_recv_from) + function $recv_tag_fn_name(local_rank, rank_to_recv_from) from_digits = string(rank_to_recv_from, pad=RANK_DIGITS) - to_digits = string(my_rank, pad=RANK_DIGITS) + to_digits = string(local_rank, pad=RANK_DIGITS) side_digit = string(side_id[opposite_side[Symbol($side_str)]]) return parse(Int, from_digits * to_digits * side_digit) end @@ -105,13 +105,13 @@ for (side, opposite_side) in zip([:east, :north, :top], [:west, :south, :bottom] @eval begin function $fill_both_halos!(c, bc_side::HaloCommunicationBC, bc_opposite_side::HaloCommunicationBC, arch, barrier, grid, c_location, args...) @assert bc_side.condition.from == bc_opposite_side.condition.from # Extra protection in case of bugs - my_rank = bc_side.condition.from + local_rank = bc_side.condition.from - $send_side_halo(c, grid, c_location, my_rank, bc_side.condition.to) - $send_opposite_side_halo(c, grid, c_location, my_rank, bc_opposite_side.condition.to) + $send_side_halo(c, grid, c_location, local_rank, bc_side.condition.to) + $send_opposite_side_halo(c, grid, c_location, local_rank, bc_opposite_side.condition.to) - $recv_and_fill_side_halo!(c, grid, c_location, my_rank, bc_side.condition.to) - $recv_and_fill_opposite_side_halo!(c, grid, c_location, my_rank, bc_opposite_side.condition.to) + $recv_and_fill_side_halo!(c, grid, c_location, local_rank, bc_side.condition.to) + $recv_and_fill_opposite_side_halo!(c, grid, c_location, local_rank, bc_opposite_side.condition.to) return nothing, nothing end @@ -129,11 +129,11 @@ for side in sides side_send_tag = Symbol("$(side)_send_tag") @eval begin - function $send_side_halo(c, grid, c_location, my_rank, rank_to_send_to) + function $send_side_halo(c, grid, c_location, local_rank, rank_to_send_to) send_buffer = $underlying_side_boundary(c, grid, c_location) - send_tag = $side_send_tag(my_rank, rank_to_send_to) + send_tag = $side_send_tag(local_rank, rank_to_send_to) - @debug "Sending " * $side_str * " halo: my_rank=$my_rank, rank_to_send_to=$rank_to_send_to, send_tag=$send_tag" + @debug "Sending " * $side_str * " halo: local_rank=$local_rank, rank_to_send_to=$rank_to_send_to, send_tag=$send_tag" status = MPI.Isend(send_buffer, rank_to_send_to, send_tag, MPI.COMM_WORLD) return status @@ -152,11 +152,11 @@ for side in sides side_recv_tag = Symbol("$(side)_recv_tag") @eval begin - function $recv_and_fill_side_halo!(c, grid, c_location, my_rank, rank_to_recv_from) + function $recv_and_fill_side_halo!(c, grid, c_location, local_rank, rank_to_recv_from) recv_buffer = $underlying_side_halo(c, grid, c_location) - recv_tag = $side_recv_tag(my_rank, rank_to_recv_from) + recv_tag = $side_recv_tag(local_rank, rank_to_recv_from) - @debug "Receiving " * $side_str * " halo: my_rank=$my_rank, rank_to_recv_from=$rank_to_recv_from, recv_tag=$recv_tag" + @debug "Receiving " * $side_str * " halo: local_rank=$local_rank, rank_to_recv_from=$rank_to_recv_from, recv_tag=$recv_tag" MPI.Recv!(recv_buffer, rank_to_recv_from, recv_tag, MPI.COMM_WORLD) return nothing diff --git a/src/Distributed/halo_communication_bcs.jl b/src/Distributed/halo_communication_bcs.jl index ca72f7c485..56f2c511f7 100644 --- a/src/Distributed/halo_communication_bcs.jl +++ b/src/Distributed/halo_communication_bcs.jl @@ -20,7 +20,7 @@ HaloCommunicationRanks(; from, to) = HaloCommunicationRanks(from, to) print_condition(hcr::HaloCommunicationRanks) = "(from rank $(hcr.from) to rank $(hcr.to))" -function inject_halo_communication_boundary_conditions(field_bcs, my_rank, connectivity) +function inject_halo_communication_boundary_conditions(field_bcs, local_rank, connectivity) rank_east = connectivity.east rank_west = connectivity.west rank_north = connectivity.north @@ -28,12 +28,12 @@ function inject_halo_communication_boundary_conditions(field_bcs, my_rank, conne rank_top = connectivity.top rank_bottom = connectivity.bottom - east_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_east) - west_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_west) - north_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_north) - south_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_south) - top_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_top) - bottom_comm_ranks = HaloCommunicationRanks(from=my_rank, to=rank_bottom) + east_comm_ranks = HaloCommunicationRanks(from=local_rank, to=rank_east) + west_comm_ranks = HaloCommunicationRanks(from=local_rank, to=rank_west) + north_comm_ranks = HaloCommunicationRanks(from=local_rank, to=rank_north) + south_comm_ranks = HaloCommunicationRanks(from=local_rank, to=rank_south) + top_comm_ranks = HaloCommunicationRanks(from=local_rank, to=rank_top) + bottom_comm_ranks = HaloCommunicationRanks(from=local_rank, to=rank_bottom) east_comm_bc = HaloCommunicationBoundaryCondition(east_comm_ranks) west_comm_bc = HaloCommunicationBoundaryCondition(west_comm_ranks) diff --git a/src/Distributed/multi_architectures.jl b/src/Distributed/multi_architectures.jl index 8cd6f8c60e..122711771a 100644 --- a/src/Distributed/multi_architectures.jl +++ b/src/Distributed/multi_architectures.jl @@ -2,20 +2,22 @@ using Oceananigans.Architectures using Oceananigans.Grids: topology, validate_tupled_argument -struct MultiCPU{G, R, I, ρ, C} <: AbstractCPUArchitecture - full_grid :: G - my_rank :: R - my_index :: I - ranks :: ρ - connectivity :: C +struct MultiCPU{G, R, I, ρ, C, γ} <: AbstractCPUArchitecture + distributed_grid :: G + local_rank :: R + local_index :: I + ranks :: ρ + connectivity :: C + communicator :: γ end -struct MultiGPU{G, R, I, ρ, C} <: AbstractGPUArchitecture - full_grid :: G - my_rank :: R - my_index :: I - ranks :: ρ - connectivity :: C +struct MultiGPU{G, R, I, ρ, C, γ} <: AbstractGPUArchitecture + distributed_grid :: G + local_rank :: R + local_index :: I + ranks :: ρ + connectivity :: C + communicator :: γ end const AbstractMultiArchitecture = Union{MultiCPU, MultiGPU} @@ -109,7 +111,7 @@ end ##### Constructors ##### -function MultiCPU(; grid, ranks) +function MultiCPU(; grid, ranks, communicator=MPI.COMM_WORLD) MPI.Initialized() || error("Must call MPI.Init() before constructing a MultiCPU.") validate_tupled_argument(ranks, Int, "ranks") @@ -117,21 +119,19 @@ function MultiCPU(; grid, ranks) Rx, Ry, Rz = ranks total_ranks = Rx*Ry*Rz - comm = MPI.COMM_WORLD + mpi_ranks = MPI.Comm_size(communicator) + local_rank = MPI.Comm_rank(communicator) - mpi_ranks = MPI.Comm_size(comm) - my_rank = MPI.Comm_rank(comm) - - i, j, k = my_index = rank2index(my_rank, Rx, Ry, Rz) + i, j, k = local_index = rank2index(local_rank, Rx, Ry, Rz) if total_ranks != mpi_ranks throw(ArgumentError("ranks=($Rx, $Ry, $Rz) [$total_ranks total] inconsistent " * "with number of MPI ranks: $mpi_ranks.")) end - my_connectivity = RankConnectivity(my_index, ranks, topology(grid)) + local_connectivity = RankConnectivity(local_index, ranks, topology(grid)) - return MultiCPU(grid, my_rank, my_index, ranks, my_connectivity) + return MultiCPU(grid, local_rank, local_index, ranks, local_connectivity, communicator) end ##### @@ -140,7 +140,7 @@ end function Base.show(io::IO, arch::MultiCPU) c = arch.connectivity - print(io, "MultiCPU architecture (rank $(arch.my_rank)/$(prod(arch.ranks))) [index $(arch.my_index) / $(arch.ranks)]\n", + print(io, "MultiCPU architecture (rank $(arch.local_rank)/$(prod(arch.ranks))) [index $(arch.local_index) / $(arch.ranks)]\n", "└── connectivity:", isnothing(c.east) ? "" : " east=$(c.east)", isnothing(c.west) ? "" : " west=$(c.west)", diff --git a/test/test_distributed_models.jl b/test/test_distributed_models.jl index 81b31d5e34..dded3c090a 100644 --- a/test/test_distributed_models.jl +++ b/test/test_distributed_models.jl @@ -17,8 +17,8 @@ function test_triply_periodic_rank_connectivity_with_411_ranks() full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) - my_rank = MPI.Comm_rank(MPI.COMM_WORLD) - @test my_rank == index2rank(arch.my_index..., arch.ranks...) + local_rank = MPI.Comm_rank(MPI.COMM_WORLD) + @test local_rank == index2rank(arch.local_index..., arch.ranks...) connectivity = arch.connectivity @@ -32,16 +32,16 @@ function test_triply_periodic_rank_connectivity_with_411_ranks() # | 0 | 1 | 2 | 3 | # +---+---+---+---+ - if my_rank == 0 + if local_rank == 0 @test connectivity.east == 1 @test connectivity.west == 3 - elseif my_rank == 1 + elseif local_rank == 1 @test connectivity.east == 2 @test connectivity.west == 0 - elseif my_rank == 2 + elseif local_rank == 2 @test connectivity.east == 3 @test connectivity.west == 1 - elseif my_rank == 3 + elseif local_rank == 3 @test connectivity.east == 0 @test connectivity.west == 2 end @@ -54,8 +54,8 @@ function test_triply_periodic_rank_connectivity_with_141_ranks() full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) - my_rank = MPI.Comm_rank(MPI.COMM_WORLD) - @test my_rank == index2rank(arch.my_index..., arch.ranks...) + local_rank = MPI.Comm_rank(MPI.COMM_WORLD) + @test local_rank == index2rank(arch.local_index..., arch.ranks...) connectivity = arch.connectivity @@ -75,16 +75,16 @@ function test_triply_periodic_rank_connectivity_with_141_ranks() # | 0 | # +---+ - if my_rank == 0 + if local_rank == 0 @test connectivity.north == 1 @test connectivity.south == 3 - elseif my_rank == 1 + elseif local_rank == 1 @test connectivity.north == 2 @test connectivity.south == 0 - elseif my_rank == 2 + elseif local_rank == 2 @test connectivity.north == 3 @test connectivity.south == 1 - elseif my_rank == 3 + elseif local_rank == 3 @test connectivity.north == 0 @test connectivity.south == 2 end @@ -97,8 +97,8 @@ function test_triply_periodic_rank_connectivity_with_114_ranks() full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) - my_rank = MPI.Comm_rank(MPI.COMM_WORLD) - @test my_rank == index2rank(arch.my_index..., arch.ranks...) + local_rank = MPI.Comm_rank(MPI.COMM_WORLD) + @test local_rank == index2rank(arch.local_index..., arch.ranks...) connectivity = arch.connectivity @@ -121,16 +121,16 @@ function test_triply_periodic_rank_connectivity_with_114_ranks() # / 0 / # /---/ - if my_rank == 0 + if local_rank == 0 @test connectivity.top == 1 @test connectivity.bottom == 3 - elseif my_rank == 1 + elseif local_rank == 1 @test connectivity.top == 2 @test connectivity.bottom == 0 - elseif my_rank == 2 + elseif local_rank == 2 @test connectivity.top == 3 @test connectivity.bottom == 1 - elseif my_rank == 3 + elseif local_rank == 3 @test connectivity.top == 0 @test connectivity.bottom == 2 end @@ -143,8 +143,8 @@ function test_triply_periodic_rank_connectivity_with_221_ranks() full_grid = RegularRectilinearGrid(topology=topo, size=(8, 8, 8), extent=(1, 2, 3)) arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) - my_rank = MPI.Comm_rank(MPI.COMM_WORLD) - @test my_rank == index2rank(arch.my_index..., arch.ranks...) + local_rank = MPI.Comm_rank(MPI.COMM_WORLD) + @test local_rank == index2rank(arch.local_index..., arch.ranks...) connectivity = arch.connectivity @@ -158,22 +158,22 @@ function test_triply_periodic_rank_connectivity_with_221_ranks() # | 1 | 3 | # +---+---+ - if my_rank == 0 + if local_rank == 0 @test connectivity.east == 2 @test connectivity.west == 2 @test connectivity.north == 1 @test connectivity.south == 1 - elseif my_rank == 1 + elseif local_rank == 1 @test connectivity.east == 3 @test connectivity.west == 3 @test connectivity.north == 0 @test connectivity.south == 0 - elseif my_rank == 2 + elseif local_rank == 2 @test connectivity.east == 0 @test connectivity.west == 0 @test connectivity.north == 3 @test connectivity.south == 3 - elseif my_rank == 3 + elseif local_rank == 3 @test connectivity.east == 1 @test connectivity.west == 1 @test connectivity.north == 2 @@ -193,12 +193,12 @@ function test_triply_periodic_local_grid_with_411_ranks() arch = MultiCPU(grid=full_grid, ranks=(4, 1, 1)) model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) - my_rank = MPI.Comm_rank(MPI.COMM_WORLD) + local_rank = MPI.Comm_rank(MPI.COMM_WORLD) local_grid = model.grid nx, ny, nz = size(local_grid) - @test local_grid.xF[1] == 0.25*my_rank - @test local_grid.xF[nx+1] == 0.25*(my_rank+1) + @test local_grid.xF[1] == 0.25*local_rank + @test local_grid.xF[nx+1] == 0.25*(local_rank+1) @test local_grid.yF[1] == 0 @test local_grid.yF[ny+1] == 2 @test local_grid.zF[1] == -3 @@ -213,14 +213,14 @@ function test_triply_periodic_local_grid_with_141_ranks() arch = MultiCPU(grid=full_grid, ranks=(1, 4, 1)) model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) - my_rank = MPI.Comm_rank(MPI.COMM_WORLD) + local_rank = MPI.Comm_rank(MPI.COMM_WORLD) local_grid = model.grid nx, ny, nz = size(local_grid) @test local_grid.xF[1] == 0 @test local_grid.xF[nx+1] == 1 - @test local_grid.yF[1] == 0.5*my_rank - @test local_grid.yF[ny+1] == 0.5*(my_rank+1) + @test local_grid.yF[1] == 0.5*local_rank + @test local_grid.yF[ny+1] == 0.5*(local_rank+1) @test local_grid.zF[1] == -3 @test local_grid.zF[nz+1] == 0 @@ -233,7 +233,7 @@ function test_triply_periodic_local_grid_with_114_ranks() arch = MultiCPU(grid=full_grid, ranks=(1, 1, 4)) model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) - my_rank = MPI.Comm_rank(MPI.COMM_WORLD) + local_rank = MPI.Comm_rank(MPI.COMM_WORLD) local_grid = model.grid nx, ny, nz = size(local_grid) @@ -241,8 +241,8 @@ function test_triply_periodic_local_grid_with_114_ranks() @test local_grid.xF[nx+1] == 1 @test local_grid.yF[1] == 0 @test local_grid.yF[ny+1] == 2 - @test local_grid.zF[1] == -3 + 0.75*my_rank - @test local_grid.zF[nz+1] == -3 + 0.75*(my_rank+1) + @test local_grid.zF[1] == -3 + 0.75*local_rank + @test local_grid.zF[nz+1] == -3 + 0.75*(local_rank+1) return nothing end @@ -253,7 +253,7 @@ function test_triply_periodic_local_grid_with_221_ranks() arch = MultiCPU(grid=full_grid, ranks=(2, 2, 1)) model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) - i, j, k = arch.my_index + i, j, k = arch.local_index local_grid = model.grid nx, ny, nz = size(local_grid) @@ -350,17 +350,17 @@ function test_triply_periodic_halo_communication_with_411_ranks(halo) model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) for field in merge(fields(model), model.pressures) - interior(field) .= arch.my_rank + interior(field) .= arch.local_rank fill_halo_regions!(field, arch) @test all(east_halo(field) .== arch.connectivity.east) @test all(west_halo(field) .== arch.connectivity.west) - @test all(interior(field) .== arch.my_rank) - @test all(north_halo(field, include_corners=false) .== arch.my_rank) - @test all(south_halo(field, include_corners=false) .== arch.my_rank) - @test all(top_halo(field, include_corners=false) .== arch.my_rank) - @test all(bottom_halo(field, include_corners=false) .== arch.my_rank) + @test all(interior(field) .== arch.local_rank) + @test all(north_halo(field, include_corners=false) .== arch.local_rank) + @test all(south_halo(field, include_corners=false) .== arch.local_rank) + @test all(top_halo(field, include_corners=false) .== arch.local_rank) + @test all(bottom_halo(field, include_corners=false) .== arch.local_rank) end return nothing @@ -373,17 +373,17 @@ function test_triply_periodic_halo_communication_with_141_ranks(halo) model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) for field in merge(fields(model), model.pressures) - interior(field) .= arch.my_rank + interior(field) .= arch.local_rank fill_halo_regions!(field, arch) @test all(north_halo(field) .== arch.connectivity.north) @test all(south_halo(field) .== arch.connectivity.south) - @test all(interior(field) .== arch.my_rank) - @test all(east_halo(field, include_corners=false) .== arch.my_rank) - @test all(west_halo(field, include_corners=false) .== arch.my_rank) - @test all(top_halo(field, include_corners=false) .== arch.my_rank) - @test all(bottom_halo(field, include_corners=false) .== arch.my_rank) + @test all(interior(field) .== arch.local_rank) + @test all(east_halo(field, include_corners=false) .== arch.local_rank) + @test all(west_halo(field, include_corners=false) .== arch.local_rank) + @test all(top_halo(field, include_corners=false) .== arch.local_rank) + @test all(bottom_halo(field, include_corners=false) .== arch.local_rank) end return nothing @@ -396,17 +396,17 @@ function test_triply_periodic_halo_communication_with_114_ranks(halo) model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) for field in merge(fields(model), model.pressures) - interior(field) .= arch.my_rank + interior(field) .= arch.local_rank fill_halo_regions!(field, arch) @test all(top_halo(field) .== arch.connectivity.top) @test all(bottom_halo(field) .== arch.connectivity.bottom) - @test all(interior(field) .== arch.my_rank) - @test all(east_halo(field, include_corners=false) .== arch.my_rank) - @test all(west_halo(field, include_corners=false) .== arch.my_rank) - @test all(north_halo(field, include_corners=false) .== arch.my_rank) - @test all(south_halo(field, include_corners=false) .== arch.my_rank) + @test all(interior(field) .== arch.local_rank) + @test all(east_halo(field, include_corners=false) .== arch.local_rank) + @test all(west_halo(field, include_corners=false) .== arch.local_rank) + @test all(north_halo(field, include_corners=false) .== arch.local_rank) + @test all(south_halo(field, include_corners=false) .== arch.local_rank) end return nothing @@ -419,7 +419,7 @@ function test_triply_periodic_halo_communication_with_221_ranks(halo) model = DistributedIncompressibleModel(architecture=arch, grid=full_grid, pressure_solver=nothing) for field in merge(fields(model), model.pressures) - interior(field) .= arch.my_rank + interior(field) .= arch.local_rank fill_halo_regions!(field, arch) @test all(east_halo(field, include_corners=false) .== arch.connectivity.east) @@ -427,9 +427,9 @@ function test_triply_periodic_halo_communication_with_221_ranks(halo) @test all(north_halo(field, include_corners=false) .== arch.connectivity.north) @test all(south_halo(field, include_corners=false) .== arch.connectivity.south) - @test all(interior(field) .== arch.my_rank) - @test all(top_halo(field, include_corners=false) .== arch.my_rank) - @test all(bottom_halo(field, include_corners=false) .== arch.my_rank) + @test all(interior(field) .== arch.local_rank) + @test all(top_halo(field, include_corners=false) .== arch.local_rank) + @test all(bottom_halo(field, include_corners=false) .== arch.local_rank) end return nothing diff --git a/test/test_distributed_poisson_solvers.jl b/test/test_distributed_poisson_solvers.jl index 7726f5cd94..82e6d7c011 100644 --- a/test/test_distributed_poisson_solvers.jl +++ b/test/test_distributed_poisson_solvers.jl @@ -50,7 +50,7 @@ function divergence_free_poisson_solution_triply_periodic(grid_points, ranks) solve_poisson_equation!(solver) p_bcs = PressureBoundaryConditions(local_grid) - p_bcs = inject_halo_communication_boundary_conditions(p_bcs, arch.my_rank, arch.connectivity) + p_bcs = inject_halo_communication_boundary_conditions(p_bcs, arch.local_rank, arch.connectivity) ϕ = CenterField(Float64, child_architecture(arch), local_grid, p_bcs) # "pressure" ∇²ϕ = CenterField(Float64, child_architecture(arch), local_grid, p_bcs) @@ -64,5 +64,5 @@ end @testset "Distributed FFT-based Poisson solver" begin @info " Testing distributed FFT-based Poisson solver..." @test divergence_free_poisson_solution_triply_periodic((16, 16, 1), (1, 4, 1)) - @test divergence_free_poisson_solution_triply_periodic((64, 64, 1), (1, 4, 1)) + @test divergence_free_poisson_solution_triply_periodic((44, 44, 1), (1, 4, 1)) end From 447e121c404374593cea053096dabae3ce86f1ea Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Tue, 9 Mar 2021 22:03:19 -0500 Subject: [PATCH 097/100] Left to right --- .../distributed_incompressible_model.jl | 1 + src/Distributed/halo_communication.jl | 30 +++++++++---------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/src/Distributed/distributed_incompressible_model.jl b/src/Distributed/distributed_incompressible_model.jl index 2379586113..87d4c337ff 100644 --- a/src/Distributed/distributed_incompressible_model.jl +++ b/src/Distributed/distributed_incompressible_model.jl @@ -23,6 +23,7 @@ function DistributedIncompressibleModel(; architecture, grid, model_kwargs...) # Make sure we can put an integer number of grid points in each rank. # Will generalize in the future. + # TODO: Check that we have enough grid points on each rank to fit the halos! @assert isinteger(Nx / Rx) @assert isinteger(Ny / Ry) @assert isinteger(Nz / Rz) diff --git a/src/Distributed/halo_communication.jl b/src/Distributed/halo_communication.jl index 95e880428d..b14daa6d6d 100644 --- a/src/Distributed/halo_communication.jl +++ b/src/Distributed/halo_communication.jl @@ -13,9 +13,9 @@ sides = (:west, :east, :south, :north, :top, :bottom) side_id = Dict(side => n for (n, side) in enumerate(sides)) opposite_side = Dict( - :east => :west, :west => :east, - :north => :south, :south => :north, - :top => :bottom, :bottom => :top + :west => :east, :east => :west, + :south => :north, :north => :south, + :bottom => :top, :top => :bottom ) # Define functions that return unique send and recv MPI tags for each side. @@ -58,11 +58,11 @@ function fill_halo_regions!(c::AbstractArray, bcs, arch::AbstractMultiArchitectu barrier = Event(device(child_architecture(arch))) - east_event, west_event = fill_east_and_west_halos!(c, bcs.east, bcs.west, arch, barrier, grid, c_location, args...) - north_event, south_event = fill_north_and_south_halos!(c, bcs.north, bcs.south, arch, barrier, grid, c_location, args...) - top_event, bottom_event = fill_top_and_bottom_halos!(c, bcs.top, bcs.bottom, arch, barrier, grid, c_location, args...) + west_event, east_event = fill_west_and_east_halos!(c, bcs.west, bcs.east, arch, barrier, grid, c_location, args...) + south_event, north_event = fill_south_and_north_halos!(c, bcs.south, bcs.north, arch, barrier, grid, c_location, args...) + bottom_event, top_event = fill_bottom_and_top_halos!(c, bcs.bottom, bcs.top, arch, barrier, grid, c_location, args...) - events = [east_event, west_event, north_event, south_event, top_event, bottom_event] + events = [west_event, east_event, south_event, north_event, bottom_event, top_event] events = filter(e -> e isa Event, events) wait(device(child_architecture(arch)), MultiEvent(Tuple(events))) @@ -70,12 +70,12 @@ function fill_halo_regions!(c::AbstractArray, bcs, arch::AbstractMultiArchitectu end ##### -##### fill_east_and_west_halos! } -##### fill_north_and_south_halos! } for non-communicating boundary conditions (fallback) -##### fill_top_and_bottom_halos! } +##### fill_west_and_east_halos! } +##### fill_south_and_north_halos! } for non-communicating boundary conditions (fallback) +##### fill_bottom_and_top_halos! } ##### -for (side, opposite_side) in zip([:east, :north, :top], [:west, :south, :bottom]) +for (side, opposite_side) in zip([:west, :south, :bottom], [:east, :north, :top]) fill_both_halos! = Symbol("fill_$(side)_and_$(opposite_side)_halos!") fill_side_halo! = Symbol("fill_$(side)_halo!") fill_opposite_side_halo! = Symbol("fill_$(opposite_side)_halo!") @@ -90,12 +90,12 @@ for (side, opposite_side) in zip([:east, :north, :top], [:west, :south, :bottom] end ##### -##### fill_east_and_west_halos! } -##### fill_north_and_south_halos! } for when both halos are communicative -##### fill_top_and_bottom_halos! } +##### fill_west_and_east_halos! } +##### fill_south_and_north_halos! } for when both halos are communicative +##### fill_bottom_and_top_halos! } ##### -for (side, opposite_side) in zip([:east, :north, :top], [:west, :south, :bottom]) +for (side, opposite_side) in zip([:west, :south, :bottom], [:east, :north, :top]) fill_both_halos! = Symbol("fill_$(side)_and_$(opposite_side)_halos!") send_side_halo = Symbol("send_$(side)_halo") send_opposite_side_halo = Symbol("send_$(opposite_side)_halo") From 3992e473858a4f3c85d914fd2f23fd147fd623de Mon Sep 17 00:00:00 2001 From: "ali.hh.ramadhan@gmail.com" Date: Tue, 9 Mar 2021 23:32:39 -0500 Subject: [PATCH 098/100] Strong scaling benchmark for incompressible model --- benchmark/Manifest.toml | 63 +++++++++++++------ benchmark/Project.toml | 1 + .../strong_scaling_incompressible_model.jl | 39 ++++++++++++ ...ong_scaling_incompressible_model_single.jl | 40 ++++++++++++ 4 files changed, 123 insertions(+), 20 deletions(-) create mode 100644 benchmark/strong_scaling_incompressible_model.jl create mode 100644 benchmark/strong_scaling_incompressible_model_single.jl diff --git a/benchmark/Manifest.toml b/benchmark/Manifest.toml index 11edda056a..f40874ac4e 100644 --- a/benchmark/Manifest.toml +++ b/benchmark/Manifest.toml @@ -2,15 +2,14 @@ [[AbstractFFTs]] deps = ["LinearAlgebra"] -git-tree-sha1 = "8ed9de2f1b1a9b1dee48582ad477c6e67b83eb2c" +git-tree-sha1 = "485ee0867925449198280d4af84bdb46a2a404d0" uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" -version = "1.0.0" +version = "1.0.1" [[AbstractTrees]] -deps = ["Markdown"] -git-tree-sha1 = "33e450545eaf7699da1a6e755f9ea65f14077a45" +git-tree-sha1 = "03e0550477d86222521d254b741d470ba17ea0b5" uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" -version = "0.3.3" +version = "0.3.4" [[Adapt]] deps = ["LinearAlgebra"] @@ -31,9 +30,9 @@ uuid = "ab4f0b2a-ad5b-11e8-123f-65d77653426b" version = "0.1.0" [[BSON]] -git-tree-sha1 = "2878972c4bc17d9c8d26d48d9ef00fcfe1899e7a" +git-tree-sha1 = "db18b5ea04686f73d269e10bdb241947c40d7d6f" uuid = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" -version = "0.3.0" +version = "0.3.2" [[Base64]] uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" @@ -67,6 +66,12 @@ git-tree-sha1 = "de4f08843c332d355852721adb1592bce7924da3" uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" version = "0.9.29" +[[CodecZlib]] +deps = ["TranscodingStreams", "Zlib_jll"] +git-tree-sha1 = "ded953804d019afa9a3f98981d99b33e3db7b6da" +uuid = "944b1d66-785c-5afd-91f1-9de20f533193" +version = "0.7.0" + [[Compat]] deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] git-tree-sha1 = "919c7f3151e79ff196add81d7f4e45d91bbf420b" @@ -178,6 +183,12 @@ git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" uuid = "82899510-4779-5014-852e-03e436cf321d" version = "1.0.0" +[[JLD2]] +deps = ["CodecZlib", "DataStructures", "MacroTools", "Mmap", "Pkg", "Printf", "Requires", "UUIDs"] +git-tree-sha1 = "b8343a7f96591404ade118b3a7014e1a52062465" +uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +version = "0.4.2" + [[JLLWrappers]] git-tree-sha1 = "a431f5f2ca3f4feef3bd7a5e94b8b8d4f2f647a0" uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" @@ -248,9 +259,9 @@ uuid = "a63ad114-7e13-5084-954f-fe012c677804" [[NNlib]] deps = ["ChainRulesCore", "Compat", "LinearAlgebra", "Pkg", "Requires", "Statistics"] -git-tree-sha1 = "df42d0816edfc24f5b82a728f46381613c4dff79" +git-tree-sha1 = "5ce2e4b2bfe3811811e7db4b6a148439806fd2f8" uuid = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" -version = "0.7.14" +version = "0.7.16" [[OrderedCollections]] git-tree-sha1 = "4fa2ba51070ec13fcc7517db714445b4ab986bdf" @@ -259,9 +270,9 @@ version = "1.4.0" [[Parsers]] deps = ["Dates"] -git-tree-sha1 = "50c9a9ed8c714945e01cd53a21007ed3865ed714" +git-tree-sha1 = "223a825cccef2228f3fdbf2ecc7ca93363059073" uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" -version = "1.0.15" +version = "1.0.16" [[Pkg]] deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] @@ -274,10 +285,10 @@ uuid = "32113eaa-f34f-5b0d-bd6c-c81e245fc73d" version = "0.2.10" [[PooledArrays]] -deps = ["DataAPI"] -git-tree-sha1 = "0e8f5c428a41a81cd71f76d76f2fc3415fe5a676" +deps = ["DataAPI", "Future"] +git-tree-sha1 = "cde4ce9d6f33219465b55162811d8de8139c0414" uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" -version = "1.1.0" +version = "1.2.1" [[PrettyTables]] deps = ["Crayons", "Formatting", "Markdown", "Reexport", "Tables"] @@ -310,9 +321,9 @@ version = "1.0.0" [[Requires]] deps = ["UUIDs"] -git-tree-sha1 = "cfbac6c1ed70c002ec6361e7fd334f02820d6419" +git-tree-sha1 = "4036a3bd08ac7e968e27c203d45f5fff15020621" uuid = "ae029012-a4dd-5104-9daa-d747884805df" -version = "1.1.2" +version = "1.1.3" [[SHA]] uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" @@ -367,9 +378,9 @@ version = "1.0.0" [[Tables]] deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "TableTraits", "Test"] -git-tree-sha1 = "a716dde43d57fa537a19058d044b495301ba6565" +git-tree-sha1 = "f03fc113290ee7726b173fc7ea661260d204b3f2" uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" -version = "1.3.2" +version = "1.4.0" [[TerminalLoggers]] deps = ["LeftChildRightSiblingTrees", "Logging", "Markdown", "Printf", "ProgressLogging", "UUIDs"] @@ -383,9 +394,15 @@ uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [[TimerOutputs]] deps = ["Printf"] -git-tree-sha1 = "3318281dd4121ecf9713ce1383b9ace7d7476fdd" +git-tree-sha1 = "32cdbe6cd2d214c25a0b88f985c9e0092877c236" uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" -version = "0.5.7" +version = "0.5.8" + +[[TranscodingStreams]] +deps = ["Random", "Test"] +git-tree-sha1 = "7c53c35547de1c5b9d46a4797cf6d8253807108c" +uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" +version = "0.9.5" [[UUIDs]] deps = ["Random", "SHA"] @@ -393,3 +410,9 @@ uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [[Unicode]] uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[Zlib_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "320228915c8debb12cb434c59057290f0834dbf6" +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.11+18" diff --git a/benchmark/Project.toml b/benchmark/Project.toml index 4861b22f9e..86bb462f92 100644 --- a/benchmark/Project.toml +++ b/benchmark/Project.toml @@ -8,6 +8,7 @@ BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" +JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" PkgBenchmark = "32113eaa-f34f-5b0d-bd6c-c81e245fc73d" PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" diff --git a/benchmark/strong_scaling_incompressible_model.jl b/benchmark/strong_scaling_incompressible_model.jl new file mode 100644 index 0000000000..7ae1d8cb45 --- /dev/null +++ b/benchmark/strong_scaling_incompressible_model.jl @@ -0,0 +1,39 @@ +using JLD2 +using BenchmarkTools +using Benchmarks + +# Benchmark parameters + +Nx = 128 +Ny = 128 +Nz = Nx + +ranks = (1, 2, 4) + +# Run and collect benchmarks + +print_system_info() + +for r in ranks + @info "Benchmarking distributed incompressible model strong scaling [N=($Nx, $Ny, $Nz), ranks=$r]..." + julia = Base.julia_cmd() + run(`mpiexec -np $r $julia --project strong_scaling_incompressible_model_single.jl $Nx $Ny $Nz`) +end + +suite = BenchmarkGroup(["size", "ranks"]) +for r in ranks + jldopen("strong_scaling_incompressible_model_$r.jld2", "r") do file + suite[((Nx, Ny, Nz), r)] = file["trial"] + end +end + +# Summarize benchmarks + +df = benchmarks_dataframe(suite) +sort!(df, :ranks) +benchmarks_pretty_table(df, title="Incompressible model strong scaling benchmark") + +suite_Δ = speedups_suite(suite, base_case=((Nx, Ny, Nz), 1)) +df_Δ = speedups_dataframe(suite_Δ) +sort!(df_Δ, :ranks) +benchmarks_pretty_table(df_Δ, title="Incompressible model strong scaling speedup") diff --git a/benchmark/strong_scaling_incompressible_model_single.jl b/benchmark/strong_scaling_incompressible_model_single.jl new file mode 100644 index 0000000000..28ab30e60d --- /dev/null +++ b/benchmark/strong_scaling_incompressible_model_single.jl @@ -0,0 +1,40 @@ +using Logging +using MPI +using JLD2 +using BenchmarkTools + +using Oceananigans +using Oceananigans.Distributed +using Benchmarks + +Logging.global_logger(OceananigansLogger()) + +MPI.Init() +comm = MPI.COMM_WORLD + +R = MPI.Comm_size(comm) + +Nx = parse(Int, ARGS[1]) +Ny = parse(Int, ARGS[2]) +Nz = parse(Int, ARGS[3]) + +@info "Setting up distributed incompressible model with N=($Nx, $Ny, $Nz) grid points on $R rank(s)..." + +topo = (Periodic, Periodic, Periodic) +distributed_grid = RegularRectilinearGrid(topology=topo, size=(Nx, Ny, Nz), extent=(1, 1, 1)) +arch = MultiCPU(grid=distributed_grid, ranks=(1, R, 1)) +model = DistributedIncompressibleModel(architecture=arch, grid=distributed_grid) + +@info "Warming up distributed incompressible model..." + +time_step!(model, 1) # warmup + +@info "Benchmarking distributed incompressible model..." + +trial = @benchmark begin + @sync_gpu time_step!($model, 1) +end samples=10 + +jldopen("strong_scaling_incompressible_model_$R.jld2", "w") do file + file["trial"] = trial +end From b3a75b8b94f0c068fdad147f691fa58f9afd440a Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Wed, 10 Mar 2021 01:18:14 -0500 Subject: [PATCH 099/100] Slightly better strong scaling benchmark --- benchmark/strong_scaling_incompressible_model.jl | 6 +++--- benchmark/strong_scaling_incompressible_model_single.jl | 7 ++++++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/benchmark/strong_scaling_incompressible_model.jl b/benchmark/strong_scaling_incompressible_model.jl index 7ae1d8cb45..3b24cb272a 100644 --- a/benchmark/strong_scaling_incompressible_model.jl +++ b/benchmark/strong_scaling_incompressible_model.jl @@ -4,11 +4,11 @@ using Benchmarks # Benchmark parameters -Nx = 128 -Ny = 128 +Nx = 256 +Ny = 256 Nz = Nx -ranks = (1, 2, 4) +ranks = (1, 2, 4, 8, 16) # Run and collect benchmarks diff --git a/benchmark/strong_scaling_incompressible_model_single.jl b/benchmark/strong_scaling_incompressible_model_single.jl index 28ab30e60d..9e5d3aa4c5 100644 --- a/benchmark/strong_scaling_incompressible_model_single.jl +++ b/benchmark/strong_scaling_incompressible_model_single.jl @@ -12,6 +12,7 @@ Logging.global_logger(OceananigansLogger()) MPI.Init() comm = MPI.COMM_WORLD +local_rank = MPI.Comm_rank(comm) R = MPI.Comm_size(comm) Nx = parse(Int, ARGS[1]) @@ -33,8 +34,12 @@ time_step!(model, 1) # warmup trial = @benchmark begin @sync_gpu time_step!($model, 1) + MPI.Barrier(comm) end samples=10 -jldopen("strong_scaling_incompressible_model_$R.jld2", "w") do file +@info "Rank $local_rank is done benchmarking!" + +jldopen("strong_scaling_incompressible_model_$(R)_$local_rank.jld2", "w") do file file["trial"] = trial end + From ab3e539789119f91f0ead1b7ea610e89dc527ef6 Mon Sep 17 00:00:00 2001 From: ali-ramadhan Date: Wed, 10 Mar 2021 01:20:20 -0500 Subject: [PATCH 100/100] Update [compat] entries --- Project.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Project.toml b/Project.toml index 855bc1e7d2..d9e42a09d0 100644 --- a/Project.toml +++ b/Project.toml @@ -35,9 +35,11 @@ FFTW = "^1" Glob = "1.3" JLD2 = "^0.2, ^0.3, 0.4" KernelAbstractions = "^0.3, 0.4, 0.5" +MPI = "0.16" NCDatasets = "^0.10, ^0.11" OffsetArrays = "^1.4" OrderedCollections = "^1.1" +PencilFFTs = "0.12" SafeTestsets = "0.0.1" SeawaterPolynomials = "^0.2" StructArrays = "0.4, 0.5"