From d9393a82d2b13422ebbc985d55a1aae88a905c71 Mon Sep 17 00:00:00 2001 From: LenkaNovak Date: Tue, 3 Oct 2023 14:17:11 -0700 Subject: [PATCH] add dyamond add nocouple run clean rm slurm_mem_per_cpu rm slurm_mem fix atmos config path add atmos toml path incr slurm_mem up pip try flag fix rm limiter usage clean clean clean try 140d race build_history fix maxlog --- .buildkite/longruns/pipeline.yml | 50 ++++++++++++------- .../longrun_configs/amip_longrun_target.yml | 6 +-- config/longrun_configs/amip_n1_shortrun.yml | 2 +- .../amip_n1_shortrun_nocouple.yml | 24 +++++++++ config/longrun_configs/amip_n2_shortrun.yml | 2 +- config/longrun_configs/amip_n32_shortrun.yml | 2 +- config/longrun_configs/amip_n64_shortrun.yml | 2 +- config/longrun_configs/amip_n8_shortrun.yml | 2 +- config/longrun_configs/dyamond_target.yml | 14 ++++++ .../slabplanet_default_longrun.yml | 2 +- config/model_configs/interactive_debug.yml | 2 +- experiments/AMIP/modular/cli_options.jl | 2 + .../components/atmosphere/climaatmos_init.jl | 22 +++++--- 13 files changed, 95 insertions(+), 37 deletions(-) create mode 100644 config/longrun_configs/amip_n1_shortrun_nocouple.yml create mode 100644 config/longrun_configs/dyamond_target.yml diff --git a/.buildkite/longruns/pipeline.yml b/.buildkite/longruns/pipeline.yml index 2e68b73db2..ee25252f10 100644 --- a/.buildkite/longruns/pipeline.yml +++ b/.buildkite/longruns/pipeline.yml @@ -63,18 +63,21 @@ steps: slurm_nodes: 1 slurm_mem_per_cpu: 16G - # - label: "MPI AMIP FINE: longrun" # unstable after 6 months - # key: "amip_longrun_fine" - # command: "mpiexec julia --color=yes --project=experiments/AMIP/modular/ experiments/AMIP/modular/coupler_driver_modular.jl --run_name amip_longrun_fine --coupled true --anim true --surface_setup PrescribedSurface --moist equil --vert_diff true --rad gray --z_elem 50 --dz_top 3000 --dz_bottom 30 --h_elem 16 --kappa_4 1e16 --z_stretch false --rayleigh_sponge true --alpha_rayleigh_uh 0 --alpha_rayleigh_w 10 --dt_cpl 150 --dt 150secs --dt_rad 1hours --FLOAT_TYPE Float64 --energy_check false --mode_name amip --t_end 140days --dt_save_to_sol 10days --mono_surface false --precip_model 0M" - # artifact_paths: "experiments/AMIP/modular/output/amip/amip_longrun_fine_artifacts/*" - # env: - # CLIMACORE_DISTRIBUTED: "MPI" - # BUILD_HISTORY_HANDLE: "" - # agents: - # slurm_ntasks_per_node: 16 - # slurm_nodes: 4 - # slurm_mem_per_cpu: 16G + # DYAMOND AMIP: 1 day (convection resolving) + - label: "MPI AMIP SUPERFINE: dyamond_target" + key: "dyamond_target" + command: "mpiexec julia --color=yes --project=experiments/AMIP/modular/ experiments/AMIP/modular/coupler_driver_modular.jl --config_file $CONFIG_PATH/dyamond_target.yml" + artifact_paths: "experiments/AMIP/modular/output/amip/dyamond_target_artifacts/*" + env: + CLIMACORE_DISTRIBUTED: "MPI" + BUILD_HISTORY_HANDLE: "" + agents: + slurm_ntasks_per_node: 16 + slurm_nodes: 4 + slurm_mem_per_cpu: 16G + + # mid-resolution AMIP: longrun (140 days) - label: "MPI AMIP FINE: target longrun" key: "amip_longrun_target" command: "mpiexec julia --color=yes --project=experiments/AMIP/modular/ experiments/AMIP/modular/coupler_driver_modular.jl --config_file $CONFIG_PATH/amip_longrun_target.yml" @@ -87,7 +90,7 @@ steps: slurm_nodes: 4 slurm_mem_per_cpu: 16G - # MPI performance scaling (10 days) + # mid-resolution AMIP: MPI performance scaling (10 days) - label: "MPI AMIP FINE: n64" key: "mpi_amip_fine_n64" command: "mpiexec julia --color=yes --project=experiments/AMIP/modular/ experiments/AMIP/modular/coupler_driver_modular.jl --config_file $CONFIG_PATH/amip_n64_shortrun.yml" @@ -147,6 +150,17 @@ steps: slurm_nodes: 1 slurm_mem_per_cpu: 16G + - label: "MPI AMIP FINE: n1 no couple" # sim time = Δt_cpl (~ benchmarking with standalone models) + key: "mpi_amip_fine_n1_nocouple" + command: "julia --color=yes --project=experiments/AMIP/modular/ experiments/AMIP/modular/coupler_driver_modular.jl --config_file $CONFIG_PATH/amip_n1_shortrun_nocouple.yml" + artifact_paths: "experiments/AMIP/modular/output/amip/amip_n1_shortrun_nocouple_artifacts/*" + env: + BUILD_HISTORY_HANDLE: "" + agents: + slurm_ntasks_per_node: 1 + slurm_nodes: 1 + slurm_mem_per_cpu: 16G + # mpi_amip_fine_n1 flame graph report (NB: arguments passed from the ci pipeline.yml) - label: ":rocket: performance: flame graph diff: perf_target_amip_n1_shortrun" command: "julia --color=yes --project=perf perf/flame_diff.jl --config_file $PERF_CONFIG_PATH/perf_diff_target_amip_n1_shortrun.yml" @@ -154,14 +168,14 @@ steps: agents: slurm_ntasks_per_node: 1 slurm_nodes: 1 - slurm_mem_per_cpu: 16GB + slurm_mem_per_cpu: 16G - wait # plot job performance history - label: ":chart_with_downwards_trend: build history" command: - - build_history staging # name of branch to plot + - build_history main # name of branch to plot artifact_paths: - "build_history.html" @@ -175,10 +189,8 @@ steps: command: - slack-upload -c "#coupler-report" -f experiments/AMIP/modular/output/slabplanet/slabplanet_default_longrun_artifacts/total_energy_log_bucket.png -m png -n slab_coarse_log -x "Slabplanet energy conservation (log error)" - slack-upload -c "#coupler-report" -f experiments/AMIP/modular/output/slabplanet/slabplanet_default_longrun_artifacts/total_energy_bucket.png -m png -n slab_coarse -x "Slabplanet energy conservation" - - # - label: ":envelope: Slack report: AMIP fine benchmark" - # command: - # - slack-upload -c "#coupler-report" -f experiments/AMIP/modular/output/amip/amip_longrun_fine_artifacts/amip_paperplots.png -m png -n amip_fine -x "AMIP Longrun" + - slack-upload -c "#coupler-report" -f experiments/AMIP/modular/output/slabplanet/slabplanet_default_longrun_artifacts/total_water_log_bucket.png -m png -n slab_coarse_w_log -x "Slabplanet water conservation (log error)" + - slack-upload -c "#coupler-report" -f experiments/AMIP/modular/output/slabplanet/slabplanet_default_longrun_artifacts/total_water_bucket.png -m png -n slab_coarse_w -x "Slabplanet water conservation" - label: ":envelope: Slack report: target AMIP" command: @@ -187,4 +199,4 @@ steps: - label: ":envelope: Slack report: Flame Diff" command: - slack-upload -c "#coupler-report" -f perf/output/perf_diff_target_amip_n1_shortrun/flame_diff.html -m png -n amip_fine_flamegraphdiff -x "AMIP Longrun FlameGraphDiff" - - slack-upload -c "#coupler-report" -f perf/output/perf_diff_target_amip_n1_shortrun/flame_diff_self_count.html -m png -n amip_fine_flamegraphdiffself -x "AMIP Longrun FlameGraphDiffSelf" + - slack-upload -c "#coupler-report" -f perf/output/perf_diff_target_amip_n1_shortrun/flame_diff_self_count.html -m png -n amip_fine_flamegraphdiffself -x "AMIP Longrun FlameGraphDiffSelf" \ No newline at end of file diff --git a/config/longrun_configs/amip_longrun_target.yml b/config/longrun_configs/amip_longrun_target.yml index b204b8cd5a..1b2ff5114b 100644 --- a/config/longrun_configs/amip_longrun_target.yml +++ b/config/longrun_configs/amip_longrun_target.yml @@ -14,9 +14,9 @@ h_elem: 12 kappa_4: 3e16 rayleigh_sponge: true alpha_rayleigh_uh: 0 -dt: "150secs" -t_end: "100days" # TODO this has been decreased from 140 days to avoid instability +dt: "100secs" +t_end: "100days" # TODO this has been decreased from 140 days to avoid instability #460 job_id: "amip_longrun_target" dt_save_to_sol: "5days" dt_save_to_disk: "1days" -apply_limiter: true +apply_limiter: false diff --git a/config/longrun_configs/amip_n1_shortrun.yml b/config/longrun_configs/amip_n1_shortrun.yml index f86d664a47..ecf8d4ef2b 100644 --- a/config/longrun_configs/amip_n1_shortrun.yml +++ b/config/longrun_configs/amip_n1_shortrun.yml @@ -20,5 +20,5 @@ mode_name: "amip" t_end: "1days" dt_save_to_sol: "100days" mono_surface: false -apply_limiter: true +apply_limiter: false precip_model: "0M" diff --git a/config/longrun_configs/amip_n1_shortrun_nocouple.yml b/config/longrun_configs/amip_n1_shortrun_nocouple.yml new file mode 100644 index 0000000000..52d3a3a96d --- /dev/null +++ b/config/longrun_configs/amip_n1_shortrun_nocouple.yml @@ -0,0 +1,24 @@ +job_id: "amip_n1_shortrun_nocouple" +run_name: "amip_n1_shortrun_nocouple" +moist: "equil" +vert_diff: "true" +rad: "gray" +z_elem: 50 +dz_top: 3000 +dz_bottom: 30 +h_elem: 16 +kappa_4: 1e16 +z_stretch: false +rayleigh_sponge: true +alpha_rayleigh_uh: 0 +alpha_rayleigh_w: 10 +dt_cpl: 86200 +dt: "150secs" +dt_rad: "1hours" +energy_check: false +mode_name: "amip" +t_end: "1days" +dt_save_to_sol: "100days" +mono_surface: false +apply_limiter: false +precip_model: "0M" diff --git a/config/longrun_configs/amip_n2_shortrun.yml b/config/longrun_configs/amip_n2_shortrun.yml index 73f913a4c7..aa22779415 100644 --- a/config/longrun_configs/amip_n2_shortrun.yml +++ b/config/longrun_configs/amip_n2_shortrun.yml @@ -20,5 +20,5 @@ mode_name: "amip" t_end: "1days" dt_save_to_sol: "100days" mono_surface: false -apply_limiter: true +apply_limiter: false precip_model: "0M" diff --git a/config/longrun_configs/amip_n32_shortrun.yml b/config/longrun_configs/amip_n32_shortrun.yml index a05992480b..69a24cb706 100644 --- a/config/longrun_configs/amip_n32_shortrun.yml +++ b/config/longrun_configs/amip_n32_shortrun.yml @@ -20,5 +20,5 @@ mode_name: "amip" t_end: "10days" dt_save_to_sol: "100days" mono_surface: false -apply_limiter: true +apply_limiter: false precip_model: "0M" diff --git a/config/longrun_configs/amip_n64_shortrun.yml b/config/longrun_configs/amip_n64_shortrun.yml index e4a28456d7..580755cab7 100644 --- a/config/longrun_configs/amip_n64_shortrun.yml +++ b/config/longrun_configs/amip_n64_shortrun.yml @@ -20,5 +20,5 @@ mode_name: "amip" t_end: "10days" dt_save_to_sol: "100days" mono_surface: false -apply_limiter: true +apply_limiter: false precip_model: "0M" diff --git a/config/longrun_configs/amip_n8_shortrun.yml b/config/longrun_configs/amip_n8_shortrun.yml index 5ba0e0f7cb..ba9f3a41b4 100644 --- a/config/longrun_configs/amip_n8_shortrun.yml +++ b/config/longrun_configs/amip_n8_shortrun.yml @@ -20,5 +20,5 @@ mode_name: "amip" t_end: "10days" dt_save_to_sol: "100days" mono_surface: false -apply_limiter: true +apply_limiter: false precip_model: "0M" diff --git a/config/longrun_configs/dyamond_target.yml b/config/longrun_configs/dyamond_target.yml new file mode 100644 index 0000000000..bd574b1cc0 --- /dev/null +++ b/config/longrun_configs/dyamond_target.yml @@ -0,0 +1,14 @@ +run_name: "dyamond_target" +anim: true +dt_cpl: 50 +energy_check: false +mode_name: "amip" +mono_surface: false +t_end: "1days" +job_id: "dyamond_target" +dt_save_to_sol: "0.5days" +dt_save_to_disk: "0.5days" +turb_flux_partition: "CombinedStateFluxes" +atmos_config_file: "config/longrun_configs/longrun_aquaplanet_dyamond.yml" +atmos_toml_file: "toml/longrun_aquaplanet_dyamond.toml" +monthly_checkpoint: false diff --git a/config/longrun_configs/slabplanet_default_longrun.yml b/config/longrun_configs/slabplanet_default_longrun.yml index c093b94ac1..e635c8b447 100644 --- a/config/longrun_configs/slabplanet_default_longrun.yml +++ b/config/longrun_configs/slabplanet_default_longrun.yml @@ -12,5 +12,5 @@ mono_surface: true h_elem: 6 precip_model: "0M" anim: true -apply_limiter: true +apply_limiter: false job_id: "slabplanet_default_longrun" diff --git a/config/model_configs/interactive_debug.yml b/config/model_configs/interactive_debug.yml index fd4761e497..db0ba1bfbc 100644 --- a/config/model_configs/interactive_debug.yml +++ b/config/model_configs/interactive_debug.yml @@ -14,4 +14,4 @@ dt_save_restart: "5days" precip_model: "0M" run_name: "interactive_debug_run" job_id: "interactive_debug_run" -monthly_checkpoint: true +monthly_checkpoint: true \ No newline at end of file diff --git a/experiments/AMIP/modular/cli_options.jl b/experiments/AMIP/modular/cli_options.jl index f6b83656ae..bdc66d342c 100644 --- a/experiments/AMIP/modular/cli_options.jl +++ b/experiments/AMIP/modular/cli_options.jl @@ -59,6 +59,8 @@ function argparse_settings() default = "PrescribedSurface" "--atmos_config_file" help = "A yaml file used to set the atmospheric model configuration. If nothing is specified, the default configuration is used." + "--atmos_toml_file" + help = "A toml file used to overwrite the atmospheric model parameters. If nothing is specified, the default parameters are used." # ClimaLSM specific "--land_albedo_type" help = "Access land surface albedo information from data file. [`function`, `map_static`, `map_temporal`]" diff --git a/experiments/AMIP/modular/components/atmosphere/climaatmos_init.jl b/experiments/AMIP/modular/components/atmosphere/climaatmos_init.jl index 08768a8dab..ce7d8ffdc7 100644 --- a/experiments/AMIP/modular/components/atmosphere/climaatmos_init.jl +++ b/experiments/AMIP/modular/components/atmosphere/climaatmos_init.jl @@ -28,19 +28,25 @@ Returns the specified atmospheric configuration (`atmos_config_dict`) overwitten in the coupler dictionary (`config_dict`). """ function get_atmos_config(coupler_dict) - atmos_file = coupler_dict["atmos_config_file"] + atmos_config_file = coupler_dict["atmos_config_file"] # override default or specified configs with coupler arguments, and set the correct atmos config_file - if isnothing(atmos_file) + if isnothing(atmos_config_file) @info "Using Atmos default configuration" - merge(CA.default_config_dict(), coupler_dict, Dict("config_file" => atmos_file)) + atmos_config = merge(CA.default_config_dict(), coupler_dict, Dict("config_file" => atmos_config_file)) else - @info "Using Atmos configuration from $atmos_file" - merge( - CA.override_default_config(joinpath(pkgdir(CA), atmos_file)), + @info "Using Atmos configuration from $atmos_config_file" + atmos_config = merge( + CA.override_default_config(joinpath(pkgdir(CA), atmos_config_file)), coupler_dict, - Dict("config_file" => atmos_file), + Dict("config_file" => atmos_config_file), ) end + atmos_toml_file = coupler_dict["atmos_toml_file"] + if !isnothing(atmos_toml_file) + @info "Overwriting Atmos parameters from $atmos_toml_file" + atmos_config = merge(atmos_config, Dict("toml" => [joinpath(pkgdir(CA), atmos_toml_file)])) + end + return atmos_config end function atmos_init(::Type{FT}, atmos_config_dict::Dict) where {FT} @@ -53,7 +59,7 @@ function atmos_init(::Type{FT}, atmos_config_dict::Dict) where {FT} face_space = axes(Y.f.u₃) spaces = (; center_space = center_space, face_space = face_space) if :ρe_int in propertynames(Y.c) - @warn("Running with ρe_int in coupled mode is not tested yet.") + @warn("Running with ρe_int in coupled mode is not tested yet.", maxlog = 1) end # set initial fluxes to zero