Skip to content

Commit

Permalink
use blocking synchronize to reduce poll waiting
Browse files Browse the repository at this point in the history
  • Loading branch information
simonbyrne committed Oct 19, 2023
1 parent 9c22047 commit 6a59fc8
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions src/Spaces/dss_cuda.jl
Original file line number Diff line number Diff line change
Expand Up @@ -427,13 +427,13 @@ function fill_send_buffer!(::ClimaComms.CUDADevice, dss_buffer::DSSBuffer)
if nsend > 0
nitems = nsend * nlevels * nfid
nthreads, nblocks = _configure_threadblock(nitems)
CUDA.synchronize() # CUDA MPI uses a separate stream. This will synchronize across streams
CUDA.synchronize(;blocking=true) # CUDA MPI uses a separate stream. This will synchronize across streams
@cuda threads = (nthreads) blocks = (nblocks) fill_send_buffer_kernel!(
send_data,
send_buf_idx,
pperimeter_data,
)
CUDA.synchronize() # CUDA MPI uses a separate stream. This will synchronize across streams
CUDA.synchronize(;blocking=true) # CUDA MPI uses a separate stream. This will synchronize across streams
end
return nothing
end
Expand Down Expand Up @@ -468,13 +468,13 @@ function load_from_recv_buffer!(::ClimaComms.CUDADevice, dss_buffer::DSSBuffer)
if nrecv > 0
nitems = nrecv * nlevels * nfid
nthreads, nblocks = _configure_threadblock(nitems)
CUDA.synchronize()
CUDA.synchronize(;blocking=true)
@cuda threads = (nthreads) blocks = (nblocks) load_from_recv_buffer_kernel!(
pperimeter_data,
recv_data,
recv_buf_idx,
)
CUDA.synchronize()
CUDA.synchronize(;blocking=true)
end
return nothing
end
Expand Down

0 comments on commit 6a59fc8

Please sign in to comment.