Skip to content

Commit

Permalink
Fix some pipelines
Browse files Browse the repository at this point in the history
  • Loading branch information
charleskawczynski committed Jun 5, 2024
1 parent 61d2ac8 commit de081f9
Show file tree
Hide file tree
Showing 6 changed files with 120 additions and 13 deletions.
2 changes: 1 addition & 1 deletion .buildkite/comparison/pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ cat << EOM
- "$job_id/scaling_data_${nprocs}_processes.jld2"
- "$job_id/report.*.nsys-rep"
env:
CLIMACORE_DISTRIBUTED: "MPI"
CLIMACOMMS_CONTEXT: "MPI"
NPROCS: $nprocs
agents:
config: cpu
Expand Down
43 changes: 43 additions & 0 deletions .buildkite/gpu_pipeline/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ steps:
--config_file ${GPU_CONFIG_PATH}target_gpu_implicit_baroclinic_wave.yml
--job_id target_gpu_implicit_baroclinic_wave
artifact_paths: "target_gpu_implicit_baroclinic_wave/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1
slurm_cpus_per_task: 4
Expand All @@ -71,6 +73,8 @@ steps:
--config_file ${GPU_CONFIG_PATH}gpu_hs_rhoe_equil_55km_nz63_0M.yml
--job_id gpu_hs_rhoe_equil_55km_nz63_0M
artifact_paths: "gpu_hs_rhoe_equil_55km_nz63_0M/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1
slurm_cpus_per_task: 4
Expand All @@ -87,6 +91,9 @@ steps:
--config_file ${GPU_CONFIG_PATH}gpu_hs_rhoe_equil_55km_nz63_0M_4process.yml
--job_id gpu_hs_rhoe_equil_55km_nz63_0M_4process
artifact_paths: "gpu_hs_rhoe_equil_55km_nz63_0M_4process/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
CLIMACOMMS_CONTEXT: "MPI"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
Expand All @@ -104,6 +111,9 @@ steps:
--config_file ${GPU_CONFIG_PATH}target_gpu_implicit_baroclinic_wave_4process.yml
--job_id target_gpu_implicit_baroclinic_wave_4process
artifact_paths: "target_gpu_implicit_baroclinic_wave_4process/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
CLIMACOMMS_CONTEXT: "MPI"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
Expand All @@ -123,6 +133,9 @@ steps:
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ss_1process.yml
--job_id gpu_aquaplanet_dyamond_ss_1process
artifact_paths: "gpu_aquaplanet_dyamond_ss_1process/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
CLIMACOMMS_CONTEXT: "MPI"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
Expand All @@ -140,6 +153,9 @@ steps:
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ss_2process.yml
--job_id gpu_aquaplanet_dyamond_ss_2process
artifact_paths: "gpu_aquaplanet_dyamond_ss_2process/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
CLIMACOMMS_CONTEXT: "MPI"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
Expand All @@ -157,6 +173,9 @@ steps:
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ss_4process.yml
--job_id gpu_aquaplanet_dyamond_ss_4process
artifact_paths: "gpu_aquaplanet_dyamond_ss_4process/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
CLIMACOMMS_CONTEXT: "MPI"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
Expand All @@ -173,6 +192,8 @@ steps:
- >
julia --color=yes --project=examples post_processing/plot_gpu_strong_scaling.jl gpu_aquaplanet_dyamond_ss
artifact_paths: "gpu_aquaplanet_dyamond_ss/*"
env:
CLIMACOMMS_CONTEXT: "MPI"
agents:
slurm_cpus_per_task: 1
slurm_ntasks: 1
Expand All @@ -190,6 +211,9 @@ steps:
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ws_1process.yml
--job_id gpu_aquaplanet_dyamond_ws_1process
artifact_paths: "gpu_aquaplanet_dyamond_ws_1process/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
CLIMACOMMS_CONTEXT: "MPI"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
Expand All @@ -207,6 +231,9 @@ steps:
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ws_2process.yml
--job_id gpu_aquaplanet_dyamond_ws_2process
artifact_paths: "gpu_aquaplanet_dyamond_ws_2process/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
CLIMACOMMS_CONTEXT: "MPI"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
Expand All @@ -224,6 +251,9 @@ steps:
--config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ws_4process.yml
--job_id gpu_aquaplanet_dyamond_ws_4process
artifact_paths: "gpu_aquaplanet_dyamond_ws_4process/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
CLIMACOMMS_CONTEXT: "MPI"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
Expand All @@ -240,6 +270,9 @@ steps:
- >
julia --color=yes --project=examples post_processing/plot_gpu_weak_scaling.jl gpu_aquaplanet_dyamond_ws
artifact_paths: "gpu_aquaplanet_dyamond_ws/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
CLIMACOMMS_CONTEXT: "MPI"
agents:
slurm_cpus_per_task: 1
slurm_ntasks: 1
Expand All @@ -259,6 +292,9 @@ steps:
--config_file ${MODEL_CONFIG_PATH}aquaplanet_diagedmf.yml
--job_id gpu_aquaplanet_diagedmf
artifact_paths: "gpu_aquaplanet_diagedmf/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
CLIMACOMMS_CONTEXT: "MPI"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
Expand All @@ -272,6 +308,8 @@ steps:
--config_file ${MODEL_CONFIG_PATH}aquaplanet_diagedmf.yml
--job_id gpu_aquaplanet_diagedmf_benchmark
artifact_paths: "gpu_aquaplanet_diagedmf_benchmark/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1

Expand All @@ -287,6 +325,9 @@ steps:
--config_file ${MODEL_CONFIG_PATH}aquaplanet_progedmf.yml
--job_id gpu_aquaplanet_progedmf
artifact_paths: "gpu_aquaplanet_progedmf/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
CLIMACOMMS_CONTEXT: "MPI"
agents:
slurm_gpus_per_task: 1
slurm_cpus_per_task: 4
Expand All @@ -300,5 +341,7 @@ steps:
--config_file ${MODEL_CONFIG_PATH}aquaplanet_progedmf.yml
--job_id gpu_aquaplanet_progedmf_benchmark
artifact_paths: "gpu_aquaplanet_progedmf_benchmark/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1
6 changes: 6 additions & 0 deletions .buildkite/longruns/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ steps:
# command:
# - srun julia --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME
# artifact_paths: "$$JOB_NAME/output_active/*"
# env:
# CLIMACOMMS_CONTEXT: "MPI"
# agents:
# slurm_ntasks: 32
# slurm_mem_per_cpu: 32GB
Expand All @@ -58,6 +60,8 @@ steps:
command:
- srun julia --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME
artifact_paths: "$$JOB_NAME/output_active/*"
env:
CLIMACOMMS_CONTEXT: "MPI"
agents:
slurm_ntasks: 32
slurm_nodes: 2
Expand All @@ -69,6 +73,8 @@ steps:
command:
- srun julia --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME
artifact_paths: "$$JOB_NAME/output_active/*"
env:
CLIMACOMMS_CONTEXT: "MPI"
agents:
slurm_ntasks: 64
slurm_nodes: 4
Expand Down
34 changes: 32 additions & 2 deletions .buildkite/longruns_gpu/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ steps:
command:
- srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME
artifact_paths: "$$JOB_NAME/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1
slurm_time: 12:00:00
Expand All @@ -56,6 +58,8 @@ steps:
command:
- srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME
artifact_paths: "$$JOB_NAME/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1
slurm_time: 12:00:00
Expand All @@ -66,6 +70,8 @@ steps:
command:
- srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME
artifact_paths: "$$JOB_NAME/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1
slurm_time: 12:00:00
Expand All @@ -76,6 +82,8 @@ steps:
command:
- srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME
artifact_paths: "$$JOB_NAME/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1
slurm_time: 12:00:00
Expand All @@ -86,6 +94,8 @@ steps:
command:
- srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME
artifact_paths: "$$JOB_NAME/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1
slurm_time: 12:00:00
Expand All @@ -97,7 +107,8 @@ steps:
- srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME
artifact_paths: "$$JOB_NAME/output_active/*"
env:
CLIMACORE_DISTRIBUTED: "MPI"
CLIMACOMMS_CONTEXT: "MPI"
CLIMACOMMS_DEVICE: "CUDA"
JOB_NAME: "longrun_hs_rhoe_equil_55km_nz63_0M_deepatmos"
agents:
slurm_gpus: 1
Expand All @@ -111,7 +122,8 @@ steps:
- srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME
artifact_paths: "$$JOB_NAME/output_active/*"
env:
CLIMACORE_DISTRIBUTED: "MPI"
CLIMACOMMS_CONTEXT: "MPI"
CLIMACOMMS_DEVICE: "CUDA"
JOB_NAME: "longrun_aquaplanet_rhoe_equil_55km_nz63_gray_0M"
agents:
slurm_gpus: 1
Expand All @@ -121,6 +133,8 @@ steps:
command:
- srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME
artifact_paths: "$$JOB_NAME/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1
slurm_time: 12:00:00
Expand All @@ -131,6 +145,8 @@ steps:
command:
- srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME
artifact_paths: "$$JOB_NAME/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1
slurm_time: 12:00:00
Expand All @@ -141,6 +157,8 @@ steps:
command:
- srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME
artifact_paths: "$$JOB_NAME/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1
slurm_time: 12:00:00
Expand All @@ -151,6 +169,8 @@ steps:
command:
- srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME
artifact_paths: "$$JOB_NAME/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1
slurm_time: 12:00:00
Expand All @@ -161,6 +181,8 @@ steps:
command:
- srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME
artifact_paths: "$$JOB_NAME/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1
slurm_time: 6:00:00
Expand All @@ -171,6 +193,8 @@ steps:
command:
- srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME
artifact_paths: "$$JOB_NAME/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1
slurm_time: 6:00:00
Expand All @@ -181,6 +205,8 @@ steps:
command:
- srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME
artifact_paths: "$$JOB_NAME/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1
slurm_time: 12:00:00
Expand All @@ -195,6 +221,8 @@ steps:
command:
- srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME
artifact_paths: "$$JOB_NAME/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1
slurm_time: 12:00:00
Expand All @@ -209,6 +237,8 @@ steps:
command:
- srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME
artifact_paths: "$$JOB_NAME/output_active/*"
env:
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1
slurm_time: 12:00:00
Expand Down
Loading

0 comments on commit de081f9

Please sign in to comment.