Skip to content

Commit

Permalink
Move buildkite to new central
Browse files Browse the repository at this point in the history
  • Loading branch information
Sbozzolo committed Apr 23, 2024
1 parent b61c5cf commit dd18e6a
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 191 deletions.
18 changes: 0 additions & 18 deletions .buildkite/JuliaProject.toml

This file was deleted.

18 changes: 9 additions & 9 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
agents:
queue: central
queue: new-central
slurm_mem: 8G
modules: julia/1.9.3 cuda/12.2 ucx/1.14.1_cuda-12.2 openmpi/4.1.5_cuda-12.2 hdf5/1.12.2-ompi415 nsight-systems/2023.2.1
modules: climacommon/2024_03_18

env:
JULIA_LOAD_PATH: "${JULIA_LOAD_PATH}:${BUILDKITE_BUILD_CHECKOUT_PATH}/.buildkite"
JULIA_CUDA_USE_BINARYBUILDER: false
JULIA_CUDA_MEMORY_POOL: none
OPENBLAS_NUM_THREADS: 1

steps:
- label: "initialize"
key: "initialize"
command:
- echo "--- Instantiate project"
- "julia --project -e 'using Pkg; Pkg.instantiate(;verbose=true); Pkg.precompile(;strict=true)'"
- "julia --project -e 'using Pkg; Pkg.add(\"CUDA\"); Pkg.add(\"MPI\"); Pkg.instantiate(;verbose=true); Pkg.precompile(;strict=true)'"
# force the initialization of the CUDA runtime as it is lazily loaded by default
- "julia --project -e 'using CUDA; CUDA.precompile_runtime()'"
- "julia --project -e 'using Pkg; Pkg.status()'"

- "julia --project=test -e 'using Pkg; Pkg.develop(;path=\".\"); Pkg.instantiate(;verbose=true); Pkg.precompile(;strict=true)'"

agents:
slurm_gpus: 1
slurm_cpus_per_task: 8
env:
JULIA_NUM_PRECOMPILE_TASKS: 8
Expand All @@ -29,7 +29,7 @@ steps:
- label: ":computer: tests"
key: "cpu_tests"
command:
- julia --project -e 'using Pkg; Pkg.test()'
- julia --project=test test/runtests.jl
env:
CLIMACOMMS_TEST_DEVICE: CPU
agents:
Expand All @@ -39,7 +39,7 @@ steps:
- label: ":computer: threaded tests"
key: "cpu_threaded_tests"
command:
- julia --threads 8 --project -e 'using Pkg; Pkg.test()'
- julia --threads 8 --project=test test/runtests.jl
env:
CLIMACOMMS_TEST_DEVICE: CPU
agents:
Expand All @@ -48,7 +48,7 @@ steps:
- label: ":flower_playing_cards: tests"
key: "gpu_tests"
command:
- julia --project -e 'using Pkg; Pkg.test()'
- julia --project=test test/runtests.jl
env:
CLIMACOMMS_TEST_DEVICE: CUDA
agents:
Expand Down
5 changes: 1 addition & 4 deletions docs/make.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
using Documenter, ClimaComms

format = Documenter.HTML(
prettyurls = !isempty(get(ENV, "CI", "")),
collapselevel = 1,
)
format = Documenter.HTML(prettyurls = !isempty(get(ENV, "CI", "")), collapselevel = 1)
makedocs(
sitename = "ClimaComms.jl",
warnonly = true,
Expand Down
95 changes: 21 additions & 74 deletions ext/ClimaCommsMPIExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ import MPI
import ClimaComms
import ClimaComms: MPICommsContext

ClimaComms.MPICommsContext(device = ClimaComms.device()) =
ClimaComms.MPICommsContext(device, MPI.COMM_WORLD)
ClimaComms.MPICommsContext(device = ClimaComms.device()) = ClimaComms.MPICommsContext(device, MPI.COMM_WORLD)

function init(ctx::MPICommsContext)
if !MPI.Initialized()
Expand All @@ -15,14 +14,12 @@ function init(ctx::MPICommsContext)
# TODO: Generalize this to arbitrary accelerators
if ctx.device isa ClimaComms.CUDADevice
if !MPI.has_cuda()
error("MPI implementation is not built with CUDA-aware interface")
error(
"MPI implementation is not built with CUDA-aware interface. If your MPI is not OpenMPI, you have to set JULIA_MPI_HAS_CUDA to `true`",
)
end
# assign GPUs based on local rank
local_comm = MPI.Comm_split_type(
ctx.mpicomm,
MPI.COMM_TYPE_SHARED,
MPI.Comm_rank(ctx.mpicomm),
)
local_comm = MPI.Comm_split_type(ctx.mpicomm, MPI.COMM_TYPE_SHARED, MPI.Comm_rank(ctx.mpicomm))
ClimaComms._assign_device(ctx.device, MPI.Comm_rank(local_comm))
MPI.free(local_comm)
end
Expand All @@ -37,26 +34,20 @@ ClimaComms.nprocs(ctx::MPICommsContext) = MPI.Comm_size(ctx.mpicomm)

ClimaComms.barrier(ctx::MPICommsContext) = MPI.Barrier(ctx.mpicomm)

ClimaComms.reduce(ctx::MPICommsContext, val, op) =
MPI.Reduce(val, op, 0, ctx.mpicomm)
ClimaComms.reduce(ctx::MPICommsContext, val, op) = MPI.Reduce(val, op, 0, ctx.mpicomm)

ClimaComms.reduce!(ctx::MPICommsContext, sendbuf, recvbuf, op) =
MPI.Reduce!(sendbuf, recvbuf, op, ctx.mpicomm; root = 0)

ClimaComms.reduce!(ctx::MPICommsContext, sendrecvbuf, op) =
MPI.Reduce!(sendrecvbuf, op, ctx.mpicomm; root = 0)
ClimaComms.reduce!(ctx::MPICommsContext, sendrecvbuf, op) = MPI.Reduce!(sendrecvbuf, op, ctx.mpicomm; root = 0)

ClimaComms.allreduce(ctx::MPICommsContext, sendbuf, op) =
MPI.Allreduce(sendbuf, op, ctx.mpicomm)
ClimaComms.allreduce(ctx::MPICommsContext, sendbuf, op) = MPI.Allreduce(sendbuf, op, ctx.mpicomm)

ClimaComms.allreduce!(ctx::MPICommsContext, sendbuf, recvbuf, op) =
MPI.Allreduce!(sendbuf, recvbuf, op, ctx.mpicomm)
ClimaComms.allreduce!(ctx::MPICommsContext, sendbuf, recvbuf, op) = MPI.Allreduce!(sendbuf, recvbuf, op, ctx.mpicomm)

ClimaComms.allreduce!(ctx::MPICommsContext, sendrecvbuf, op) =
MPI.Allreduce!(sendrecvbuf, op, ctx.mpicomm)
ClimaComms.allreduce!(ctx::MPICommsContext, sendrecvbuf, op) = MPI.Allreduce!(sendrecvbuf, op, ctx.mpicomm)

ClimaComms.bcast(ctx::MPICommsContext, object) =
MPI.bcast(object, ctx.mpicomm; root = 0)
ClimaComms.bcast(ctx::MPICommsContext, object) = MPI.bcast(object, ctx.mpicomm; root = 0)

function ClimaComms.gather(ctx::MPICommsContext, array)
dims = size(array)
Expand All @@ -71,8 +62,7 @@ function ClimaComms.gather(ctx::MPICommsContext, array)
MPI.Gatherv!(array, recvbuf, 0, ctx.mpicomm)
end

ClimaComms.abort(ctx::MPICommsContext, status::Int) =
MPI.Abort(ctx.mpicomm, status)
ClimaComms.abort(ctx::MPICommsContext, status::Int) = MPI.Abort(ctx.mpicomm, status)


# We could probably do something fancier here?
Expand Down Expand Up @@ -128,9 +118,7 @@ function graph_context(
recv_lengths,
recv_pids,
::Type{GCT},
) where {
GCT <: Union{MPISendRecvGraphContext, MPIPersistentSendRecvGraphContext},
}
) where {GCT <: Union{MPISendRecvGraphContext, MPIPersistentSendRecvGraphContext}}
@assert length(send_pids) == length(send_lengths)
@assert length(recv_pids) == length(recv_lengths)

Expand All @@ -155,36 +143,15 @@ function graph_context(
end
recv_ranks = Cint[pid - 1 for pid in recv_pids]
recv_reqs = MPI.UnsafeMultiRequest(length(recv_ranks))
args = (
ctx,
tag,
send_bufs,
send_ranks,
send_reqs,
recv_bufs,
recv_ranks,
recv_reqs,
)
args = (ctx, tag, send_bufs, send_ranks, send_reqs, recv_bufs, recv_ranks, recv_reqs)
if GCT == MPIPersistentSendRecvGraphContext
# Allocate a persistent receive request
for n in 1:length(recv_bufs)
MPI.Recv_init(
recv_bufs[n],
ctx.mpicomm,
recv_reqs[n];
source = recv_ranks[n],
tag = tag,
)
MPI.Recv_init(recv_bufs[n], ctx.mpicomm, recv_reqs[n]; source = recv_ranks[n], tag = tag)
end
# Allocate a persistent send request
for n in 1:length(send_bufs)
MPI.Send_init(
send_bufs[n],
ctx.mpicomm,
send_reqs[n];
dest = send_ranks[n],
tag = tag,
)
MPI.Send_init(send_bufs[n], ctx.mpicomm, send_reqs[n]; dest = send_ranks[n], tag = tag)
end
MPIPersistentSendRecvGraphContext(args...)
else
Expand Down Expand Up @@ -212,46 +179,26 @@ ClimaComms.graph_context(
persistent ? MPIPersistentSendRecvGraphContext : MPISendRecvGraphContext,
)

function ClimaComms.start(
ghost::MPISendRecvGraphContext;
dependencies = nothing,
)
function ClimaComms.start(ghost::MPISendRecvGraphContext; dependencies = nothing)
if !all(MPI.isnull, ghost.recv_reqs)
error("Must finish() before next start()")
end
# post receives
for n in 1:length(ghost.recv_bufs)
MPI.Irecv!(
ghost.recv_bufs[n],
ghost.recv_ranks[n],
ghost.tag,
ghost.ctx.mpicomm,
ghost.recv_reqs[n],
)
MPI.Irecv!(ghost.recv_bufs[n], ghost.recv_ranks[n], ghost.tag, ghost.ctx.mpicomm, ghost.recv_reqs[n])
end
# post sends
for n in 1:length(ghost.send_bufs)
MPI.Isend(
ghost.send_bufs[n],
ghost.send_ranks[n],
ghost.tag,
ghost.ctx.mpicomm,
ghost.send_reqs[n],
)
MPI.Isend(ghost.send_bufs[n], ghost.send_ranks[n], ghost.tag, ghost.ctx.mpicomm, ghost.send_reqs[n])
end
end

function ClimaComms.start(
ghost::MPIPersistentSendRecvGraphContext;
dependencies = nothing,
)
function ClimaComms.start(ghost::MPIPersistentSendRecvGraphContext; dependencies = nothing)
MPI.Startall(ghost.recv_reqs) # post receives
MPI.Startall(ghost.send_reqs) # post sends
end

function ClimaComms.progress(
ghost::Union{MPISendRecvGraphContext, MPIPersistentSendRecvGraphContext},
)
function ClimaComms.progress(ghost::Union{MPISendRecvGraphContext, MPIPersistentSendRecvGraphContext})
if isdefined(MPI, :MPI_ANY_SOURCE) # < v0.20
MPI.Iprobe(MPI.MPI_ANY_SOURCE, ghost.tag, ghost.ctx.mpicomm)
else # >= v0.20
Expand Down
Loading

0 comments on commit dd18e6a

Please sign in to comment.