From 86cde6a6bc311627e8d6ea6ceebc8f222c5f1de0 Mon Sep 17 00:00:00 2001 From: Charles Kawczynski Date: Wed, 5 Jun 2024 15:53:37 -0400 Subject: [PATCH] Fix some pipelines --- .buildkite/comparison/pipeline.sh | 2 +- .buildkite/gpu_pipeline/pipeline.yml | 43 ++++++++++++++++++++++++++ .buildkite/longruns/pipeline.yml | 6 ++++ .buildkite/longruns_gpu/pipeline.yml | 34 ++++++++++++++++++-- .buildkite/pipeline.yml | 46 ++++++++++++++++++++++------ .buildkite/scaling/pipeline.sh | 2 +- Project.toml | 2 +- docs/Manifest.toml | 2 +- examples/Manifest.toml | 2 +- perf/Manifest.toml | 2 +- 10 files changed, 124 insertions(+), 17 deletions(-) diff --git a/.buildkite/comparison/pipeline.sh b/.buildkite/comparison/pipeline.sh index 371aa2ef42..193da65cc2 100755 --- a/.buildkite/comparison/pipeline.sh +++ b/.buildkite/comparison/pipeline.sh @@ -88,7 +88,7 @@ cat << EOM - "$job_id/scaling_data_${nprocs}_processes.jld2" - "$job_id/report.*.nsys-rep" env: - CLIMACORE_DISTRIBUTED: "MPI" + CLIMACOMMS_CONTEXT: "MPI" NPROCS: $nprocs agents: config: cpu diff --git a/.buildkite/gpu_pipeline/pipeline.yml b/.buildkite/gpu_pipeline/pipeline.yml index 816c07802e..aa8c96d3d1 100644 --- a/.buildkite/gpu_pipeline/pipeline.yml +++ b/.buildkite/gpu_pipeline/pipeline.yml @@ -56,6 +56,8 @@ steps: --config_file ${GPU_CONFIG_PATH}target_gpu_implicit_baroclinic_wave.yml --job_id target_gpu_implicit_baroclinic_wave artifact_paths: "target_gpu_implicit_baroclinic_wave/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 slurm_cpus_per_task: 4 @@ -71,6 +73,8 @@ steps: --config_file ${GPU_CONFIG_PATH}gpu_hs_rhoe_equil_55km_nz63_0M.yml --job_id gpu_hs_rhoe_equil_55km_nz63_0M artifact_paths: "gpu_hs_rhoe_equil_55km_nz63_0M/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 slurm_cpus_per_task: 4 @@ -87,6 +91,9 @@ steps: --config_file ${GPU_CONFIG_PATH}gpu_hs_rhoe_equil_55km_nz63_0M_4process.yml --job_id gpu_hs_rhoe_equil_55km_nz63_0M_4process artifact_paths: "gpu_hs_rhoe_equil_55km_nz63_0M_4process/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" + CLIMACOMMS_CONTEXT: "MPI" agents: slurm_gpus_per_task: 1 slurm_cpus_per_task: 4 @@ -104,6 +111,9 @@ steps: --config_file ${GPU_CONFIG_PATH}target_gpu_implicit_baroclinic_wave_4process.yml --job_id target_gpu_implicit_baroclinic_wave_4process artifact_paths: "target_gpu_implicit_baroclinic_wave_4process/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" + CLIMACOMMS_CONTEXT: "MPI" agents: slurm_gpus_per_task: 1 slurm_cpus_per_task: 4 @@ -123,6 +133,9 @@ steps: --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ss_1process.yml --job_id gpu_aquaplanet_dyamond_ss_1process artifact_paths: "gpu_aquaplanet_dyamond_ss_1process/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" + CLIMACOMMS_CONTEXT: "MPI" agents: slurm_gpus_per_task: 1 slurm_cpus_per_task: 4 @@ -140,6 +153,9 @@ steps: --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ss_2process.yml --job_id gpu_aquaplanet_dyamond_ss_2process artifact_paths: "gpu_aquaplanet_dyamond_ss_2process/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" + CLIMACOMMS_CONTEXT: "MPI" agents: slurm_gpus_per_task: 1 slurm_cpus_per_task: 4 @@ -157,6 +173,9 @@ steps: --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ss_4process.yml --job_id gpu_aquaplanet_dyamond_ss_4process artifact_paths: "gpu_aquaplanet_dyamond_ss_4process/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" + CLIMACOMMS_CONTEXT: "MPI" agents: slurm_gpus_per_task: 1 slurm_cpus_per_task: 4 @@ -173,6 +192,8 @@ steps: - > julia --color=yes --project=examples post_processing/plot_gpu_strong_scaling.jl gpu_aquaplanet_dyamond_ss artifact_paths: "gpu_aquaplanet_dyamond_ss/*" + env: + CLIMACOMMS_CONTEXT: "MPI" agents: slurm_cpus_per_task: 1 slurm_ntasks: 1 @@ -190,6 +211,9 @@ steps: --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ws_1process.yml --job_id gpu_aquaplanet_dyamond_ws_1process artifact_paths: "gpu_aquaplanet_dyamond_ws_1process/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" + CLIMACOMMS_CONTEXT: "MPI" agents: slurm_gpus_per_task: 1 slurm_cpus_per_task: 4 @@ -207,6 +231,9 @@ steps: --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ws_2process.yml --job_id gpu_aquaplanet_dyamond_ws_2process artifact_paths: "gpu_aquaplanet_dyamond_ws_2process/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" + CLIMACOMMS_CONTEXT: "MPI" agents: slurm_gpus_per_task: 1 slurm_cpus_per_task: 4 @@ -224,6 +251,9 @@ steps: --config_file ${GPU_CONFIG_PATH}gpu_aquaplanet_dyamond_ws_4process.yml --job_id gpu_aquaplanet_dyamond_ws_4process artifact_paths: "gpu_aquaplanet_dyamond_ws_4process/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" + CLIMACOMMS_CONTEXT: "MPI" agents: slurm_gpus_per_task: 1 slurm_cpus_per_task: 4 @@ -240,6 +270,9 @@ steps: - > julia --color=yes --project=examples post_processing/plot_gpu_weak_scaling.jl gpu_aquaplanet_dyamond_ws artifact_paths: "gpu_aquaplanet_dyamond_ws/*" + env: + CLIMACOMMS_DEVICE: "CUDA" + CLIMACOMMS_CONTEXT: "MPI" agents: slurm_cpus_per_task: 1 slurm_ntasks: 1 @@ -259,6 +292,9 @@ steps: --config_file ${MODEL_CONFIG_PATH}aquaplanet_diagedmf.yml --job_id gpu_aquaplanet_diagedmf artifact_paths: "gpu_aquaplanet_diagedmf/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" + CLIMACOMMS_CONTEXT: "MPI" agents: slurm_gpus_per_task: 1 slurm_cpus_per_task: 4 @@ -272,6 +308,8 @@ steps: --config_file ${MODEL_CONFIG_PATH}aquaplanet_diagedmf.yml --job_id gpu_aquaplanet_diagedmf_benchmark artifact_paths: "gpu_aquaplanet_diagedmf_benchmark/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 @@ -287,6 +325,9 @@ steps: --config_file ${MODEL_CONFIG_PATH}aquaplanet_progedmf.yml --job_id gpu_aquaplanet_progedmf artifact_paths: "gpu_aquaplanet_progedmf/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" + CLIMACOMMS_CONTEXT: "MPI" agents: slurm_gpus_per_task: 1 slurm_cpus_per_task: 4 @@ -300,5 +341,7 @@ steps: --config_file ${MODEL_CONFIG_PATH}aquaplanet_progedmf.yml --job_id gpu_aquaplanet_progedmf_benchmark artifact_paths: "gpu_aquaplanet_progedmf_benchmark/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 diff --git a/.buildkite/longruns/pipeline.yml b/.buildkite/longruns/pipeline.yml index 932c83779d..19947ee43c 100644 --- a/.buildkite/longruns/pipeline.yml +++ b/.buildkite/longruns/pipeline.yml @@ -47,6 +47,8 @@ steps: # command: # - srun julia --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME # artifact_paths: "$$JOB_NAME/output_active/*" + # env: + # CLIMACOMMS_CONTEXT: "MPI" # agents: # slurm_ntasks: 32 # slurm_mem_per_cpu: 32GB @@ -58,6 +60,8 @@ steps: command: - srun julia --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME artifact_paths: "$$JOB_NAME/output_active/*" + env: + CLIMACOMMS_CONTEXT: "MPI" agents: slurm_ntasks: 32 slurm_nodes: 2 @@ -69,6 +73,8 @@ steps: command: - srun julia --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME artifact_paths: "$$JOB_NAME/output_active/*" + env: + CLIMACOMMS_CONTEXT: "MPI" agents: slurm_ntasks: 64 slurm_nodes: 4 diff --git a/.buildkite/longruns_gpu/pipeline.yml b/.buildkite/longruns_gpu/pipeline.yml index fb0e4bfb2a..0df4f145b0 100644 --- a/.buildkite/longruns_gpu/pipeline.yml +++ b/.buildkite/longruns_gpu/pipeline.yml @@ -46,6 +46,8 @@ steps: command: - srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME artifact_paths: "$$JOB_NAME/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 slurm_time: 12:00:00 @@ -56,6 +58,8 @@ steps: command: - srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME artifact_paths: "$$JOB_NAME/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 slurm_time: 12:00:00 @@ -66,6 +70,8 @@ steps: command: - srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME artifact_paths: "$$JOB_NAME/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 slurm_time: 12:00:00 @@ -76,6 +82,8 @@ steps: command: - srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME artifact_paths: "$$JOB_NAME/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 slurm_time: 12:00:00 @@ -86,6 +94,8 @@ steps: command: - srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME artifact_paths: "$$JOB_NAME/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 slurm_time: 12:00:00 @@ -97,7 +107,8 @@ steps: - srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME artifact_paths: "$$JOB_NAME/output_active/*" env: - CLIMACORE_DISTRIBUTED: "MPI" + CLIMACOMMS_CONTEXT: "MPI" + CLIMACOMMS_DEVICE: "CUDA" JOB_NAME: "longrun_hs_rhoe_equil_55km_nz63_0M_deepatmos" agents: slurm_gpus: 1 @@ -111,7 +122,8 @@ steps: - srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME artifact_paths: "$$JOB_NAME/output_active/*" env: - CLIMACORE_DISTRIBUTED: "MPI" + CLIMACOMMS_CONTEXT: "MPI" + CLIMACOMMS_DEVICE: "CUDA" JOB_NAME: "longrun_aquaplanet_rhoe_equil_55km_nz63_gray_0M" agents: slurm_gpus: 1 @@ -121,6 +133,8 @@ steps: command: - srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME artifact_paths: "$$JOB_NAME/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 slurm_time: 12:00:00 @@ -131,6 +145,8 @@ steps: command: - srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME artifact_paths: "$$JOB_NAME/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 slurm_time: 12:00:00 @@ -141,6 +157,8 @@ steps: command: - srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME artifact_paths: "$$JOB_NAME/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 slurm_time: 12:00:00 @@ -151,6 +169,8 @@ steps: command: - srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME artifact_paths: "$$JOB_NAME/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 slurm_time: 12:00:00 @@ -161,6 +181,8 @@ steps: command: - srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME artifact_paths: "$$JOB_NAME/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 slurm_time: 6:00:00 @@ -171,6 +193,8 @@ steps: command: - srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME artifact_paths: "$$JOB_NAME/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 slurm_time: 6:00:00 @@ -181,6 +205,8 @@ steps: command: - srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME artifact_paths: "$$JOB_NAME/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 slurm_time: 12:00:00 @@ -195,6 +221,8 @@ steps: command: - srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME artifact_paths: "$$JOB_NAME/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 slurm_time: 12:00:00 @@ -209,6 +237,8 @@ steps: command: - srun julia --color=yes --project=examples examples/hybrid/driver.jl --config_file $CONFIG_PATH/$$JOB_NAME.yml --job_id $$JOB_NAME artifact_paths: "$$JOB_NAME/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 slurm_time: 12:00:00 diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 03927d7dba..32f044fe9e 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -306,7 +306,7 @@ steps: agents: slurm_mem: 20GB slurm_constraint: icelake|cascadelake|skylake|epyc - + - label: ":computer: aquaplanet (ρe_tot) nonequilmoist allsky radiation monin_obukhov varying insolation high top 1-moment" command: > julia --color=yes --project=examples examples/hybrid/driver.jl @@ -376,7 +376,7 @@ steps: --job_id mpi_make_restart artifact_paths: "mpi_make_restart/output_active/*" env: - CLIMACORE_DISTRIBUTED: "MPI" + CLIMACOMMS_CONTEXT: "MPI" agents: slurm_ntasks: 2 slurm_mem: 16G @@ -392,7 +392,7 @@ steps: --job_id restart_mpi_baroclinic_wave_rhoe artifact_paths: "restart_mpi_baroclinic_wave_rhoe/output_active/*" env: - CLIMACORE_DISTRIBUTED: "MPI" + CLIMACOMMS_CONTEXT: "MPI" agents: slurm_ntasks: 2 slurm_mem: 16G @@ -408,7 +408,7 @@ steps: --job_id mpi_sphere_aquaplanet_rhoe_equilmoist_clearsky artifact_paths: "mpi_sphere_aquaplanet_rhoe_equilmoist_clearsky/output_active/*" env: - CLIMACORE_DISTRIBUTED: "MPI" + CLIMACOMMS_CONTEXT: "MPI" agents: slurm_ntasks: 2 slurm_mem: 16GB @@ -421,7 +421,7 @@ steps: --job_id prep_remap artifact_paths: "prep_remap/output_active/*" env: - CLIMACORE_DISTRIBUTED: "MPI" + CLIMACOMMS_CONTEXT: "MPI" agents: slurm_ntasks: 2 slurm_mem: 16G @@ -455,7 +455,7 @@ steps: --job_id test_mpi_io artifact_paths: "test_mpi_io/output_active/*" env: - CLIMACORE_DISTRIBUTED: "MPI" + CLIMACOMMS_CONTEXT: "MPI" agents: slurm_ntasks: 2 slurm_mem: 16G @@ -643,7 +643,7 @@ steps: artifact_paths: "prognostic_edmfx_bomex_column/output_active/*" agents: slurm_mem: 20GB - + - label: ":genie: Prognostic EDMFX Bomex in a column (implicit)" command: > julia --color=yes --project=examples examples/hybrid/driver.jl @@ -688,7 +688,7 @@ steps: artifact_paths: "prognostic_edmfx_trmm_column_0M/output_active/*" agents: slurm_mem: 20GB - + - label: ":genie: Prognostic EDMFX GCM driven in a column" command: > julia --color=yes --project=examples examples/hybrid/driver.jl @@ -726,6 +726,8 @@ steps: --config_file $CONFIG_PATH/gpu_baroclinic_wave_rhoe.yml --job_id gpu_baroclinic_wave_rhoe artifact_paths: "gpu_implicit_barowave_ref/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_mem: 16G slurm_gpus: 1 @@ -754,6 +756,8 @@ steps: --config_file ${GPU_CONFIG_PATH}/target_gpu_implicit_baroclinic_wave.yml --job_id target_gpu_implicit_baroclinic_wave artifact_paths: "target_gpu_implicit_baroclinic_wave/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 slurm_mem: 32G @@ -770,7 +774,7 @@ steps: --job_id target_gpu_implicit_baroclinic_wave_4process artifact_paths: "target_gpu_implicit_baroclinic_wave_4process/output_active/*" env: - CLIMACORE_DISTRIBUTED: "MPI" + CLIMACOMMS_CONTEXT: "MPI" agents: slurm_gpus_per_task: 1 slurm_cpus_per_task: 4 @@ -785,6 +789,8 @@ steps: --config_file $CONFIG_PATH/central_gpu_hs_rhoe_equil_55km_nz63_0M.yml --job_id central_gpu_hs_rhoe_equil_55km_nz63_0M artifact_paths: "central_gpu_hs_rhoe_equil_55km_nz63_0M/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 slurm_mem: 16G @@ -797,6 +803,8 @@ steps: --config_file $CONFIG_PATH/central_cloud_diag_gpu_hs_rhoe_equil_55km_nz63_0M.yml --job_id central_cloud_diag_gpu_hs_rhoe_equil_55km_nz63_0M artifact_paths: "central_cloud_diag_gpu_hs_rhoe_equil_55km_nz63_0M/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 slurm_mem: 16G @@ -810,6 +818,8 @@ steps: --config_file ${CONFIG_PATH}/gpu_aquaplanet_dyamond.yml --job_id gpu_aquaplanet_dyamond artifact_paths: "gpu_aquaplanet_dyamond/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 @@ -819,6 +829,8 @@ steps: --config_file $CONFIG_PATH/gpu_diagnostic_edmfx_aquaplanet.yml --job_id gpu_diagnostic_edmfx_aquaplanet artifact_paths: "gpu_diagnostic_edmfx_aquaplanet/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 slurm_mem: 20G @@ -829,6 +841,8 @@ steps: --config_file $CONFIG_PATH/gpu_prognostic_edmfx_aquaplanet.yml --job_id gpu_prognostic_edmfx_aquaplanet artifact_paths: "gpu_prognostic_edmfx_aquaplanet/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 slurm_mem: 20G @@ -844,6 +858,8 @@ steps: --config_file $PERF_CONFIG_PATH/gpu_implicit_barowave_wrt_h_elem.yml --job_id gpu_implicit_barowave_wrt_h_elem artifact_paths: "gpu_implicit_barowave_wrt_h_elem/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 @@ -854,6 +870,8 @@ steps: --config_file $PERF_CONFIG_PATH/gpu_implicit_barowave.yml --job_id gpu_implicit_barowave artifact_paths: "gpu_implicit_barowave/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_gpus: 1 @@ -864,6 +882,8 @@ steps: --config_file $PERF_CONFIG_PATH/gpu_implicit_barowave_moist.yml --job_id gpu_implicit_barowave_moist artifact_paths: "gpu_implicit_barowave_moist/output_active/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_mem: 16G slurm_gpus: 1 @@ -889,6 +909,8 @@ steps: julia --color=yes --project=perf perf/benchmark.jl --config_file $PERF_CONFIG_PATH/bm_perf_target.yml --job_id bm_perf_target_gpu + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_mem: 24GB slurm_gpus: 1 @@ -907,6 +929,8 @@ steps: julia --color=yes --project=perf perf/benchmark.jl --config_file ${CONFIG_PATH}/aquaplanet_progedmf.yml --job_id bm_diag_edmf_gpu + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_mem: 24GB slurm_gpus: 1 @@ -916,6 +940,8 @@ steps: julia --color=yes --project=perf perf/benchmark.jl --config_file ${CONFIG_PATH}/aquaplanet_progedmf.yml --job_id bm_prog_edmf_gpu + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_mem: 24GB slurm_gpus: 1 @@ -1011,6 +1037,8 @@ steps: --config_file $PERF_CONFIG_PATH/flame_gpu_implicit_barowave_moist.yml --job_id flame_gpu_implicit_barowave_moist artifact_paths: "flame_gpu_implicit_barowave_moist/*" + env: + CLIMACOMMS_DEVICE: "CUDA" agents: slurm_mem: 48GB slurm_gpus: 1 diff --git a/.buildkite/scaling/pipeline.sh b/.buildkite/scaling/pipeline.sh index b3446202d9..bf186b0e3e 100755 --- a/.buildkite/scaling/pipeline.sh +++ b/.buildkite/scaling/pipeline.sh @@ -158,7 +158,7 @@ cat << EOM - "${job_id}/scaling_data_${nprocs}_processes.jld2" - "${job_id}-nsys.tar.gz" env: - CLIMACORE_DISTRIBUTED: "MPI" + CLIMACOMMS_CONTEXT: "MPI" agents: slurm_time: $time EOM diff --git a/Project.toml b/Project.toml index fb1262ead6..97ab802e38 100644 --- a/Project.toml +++ b/Project.toml @@ -50,7 +50,7 @@ ArgParse = "1" ArtifactWrappers = "0.2" Artifacts = "1" AtmosphericProfilesLibrary = "0.1" -ClimaComms = "0.6" +ClimaComms = "0.6.1" ClimaCore = "0.14.6" ClimaDiagnostics = "0.2" ClimaParams = "0.10.4" diff --git a/docs/Manifest.toml b/docs/Manifest.toml index ec1f475dc2..30ab675795 100644 --- a/docs/Manifest.toml +++ b/docs/Manifest.toml @@ -289,7 +289,7 @@ weakdeps = ["SparseArrays"] deps = ["Adapt", "ArgParse", "ArtifactWrappers", "Artifacts", "AtmosphericProfilesLibrary", "ClimaComms", "ClimaCore", "ClimaDiagnostics", "ClimaParams", "ClimaTimeSteppers", "ClimaUtilities", "CloudMicrophysics", "Colors", "Dates", "Dierckx", "DiffEqBase", "Distributions", "DocStringExtensions", "FastGaussQuadrature", "Insolation", "Interpolations", "IntervalSets", "Krylov", "LinearAlgebra", "Logging", "NCDatasets", "NVTX", "Pkg", "Printf", "RRTMGP", "Random", "RootSolvers", "SciMLBase", "StaticArrays", "Statistics", "StatsBase", "SurfaceFluxes", "Thermodynamics", "YAML"] path = ".." uuid = "b2c96348-7fb7-4fe0-8da9-78d88439e717" -version = "0.24.2" +version = "0.24.3" [[deps.ClimaComms]] git-tree-sha1 = "55785e915f4fda85dcad4609aa5e53131151986b" diff --git a/examples/Manifest.toml b/examples/Manifest.toml index e7d2d2d95e..79d478558f 100644 --- a/examples/Manifest.toml +++ b/examples/Manifest.toml @@ -317,7 +317,7 @@ version = "0.5.2" deps = ["Adapt", "ArgParse", "ArtifactWrappers", "Artifacts", "AtmosphericProfilesLibrary", "ClimaComms", "ClimaCore", "ClimaDiagnostics", "ClimaParams", "ClimaTimeSteppers", "ClimaUtilities", "CloudMicrophysics", "Colors", "Dates", "Dierckx", "DiffEqBase", "Distributions", "DocStringExtensions", "FastGaussQuadrature", "Insolation", "Interpolations", "IntervalSets", "Krylov", "LinearAlgebra", "Logging", "NCDatasets", "NVTX", "Pkg", "Printf", "RRTMGP", "Random", "RootSolvers", "SciMLBase", "StaticArrays", "Statistics", "StatsBase", "SurfaceFluxes", "Thermodynamics", "YAML"] path = ".." uuid = "b2c96348-7fb7-4fe0-8da9-78d88439e717" -version = "0.24.2" +version = "0.24.3" [[deps.ClimaComms]] git-tree-sha1 = "55785e915f4fda85dcad4609aa5e53131151986b" diff --git a/perf/Manifest.toml b/perf/Manifest.toml index 0b6b41e5e1..c79a237dd6 100644 --- a/perf/Manifest.toml +++ b/perf/Manifest.toml @@ -328,7 +328,7 @@ version = "0.5.2" deps = ["Adapt", "ArgParse", "ArtifactWrappers", "Artifacts", "AtmosphericProfilesLibrary", "ClimaComms", "ClimaCore", "ClimaDiagnostics", "ClimaParams", "ClimaTimeSteppers", "ClimaUtilities", "CloudMicrophysics", "Colors", "Dates", "Dierckx", "DiffEqBase", "Distributions", "DocStringExtensions", "FastGaussQuadrature", "Insolation", "Interpolations", "IntervalSets", "Krylov", "LinearAlgebra", "Logging", "NCDatasets", "NVTX", "Pkg", "Printf", "RRTMGP", "Random", "RootSolvers", "SciMLBase", "StaticArrays", "Statistics", "StatsBase", "SurfaceFluxes", "Thermodynamics", "YAML"] path = ".." uuid = "b2c96348-7fb7-4fe0-8da9-78d88439e717" -version = "0.24.2" +version = "0.24.3" [[deps.ClimaComms]] git-tree-sha1 = "55785e915f4fda85dcad4609aa5e53131151986b"