Skip to content

Commit

Permalink
Check that AtmosModel is the same across restarts
Browse files Browse the repository at this point in the history
This commit adds the hash of the AtmosModel instance used to prepare a
simulation in the checkpoint HDF5 files. This is then used to check that
the AtmosModel is the "same" (up to a hash) across restarts.

A test is added to check that this mechanism works. In the test, I
produced a checkpoint and read it with the same configuration with one
value changed (insolation).
  • Loading branch information
Sbozzolo committed Sep 18, 2024
1 parent dadeeb0 commit 448d485
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 3 deletions.
8 changes: 7 additions & 1 deletion src/callbacks/callbacks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,13 @@ NVTX.@annotate function save_state_to_disk_func(integrator, output_dir)
output_file = joinpath(output_dir, "day$day.$sec.hdf5")
comms_ctx = ClimaComms.context(integrator.u.c)
hdfwriter = InputOutput.HDF5Writer(output_file, comms_ctx)
InputOutput.HDF5.write_attribute(hdfwriter.file, "time", t) # TODO: a better way to write metadata
# TODO: a better way to write metadata
InputOutput.HDF5.write_attribute(hdfwriter.file, "time", t)
InputOutput.HDF5.write_attribute(
hdfwriter.file,
"atmos_model_hash",
hash(p.atmos),
)
InputOutput.write!(hdfwriter, Y, "Y")
Base.close(hdfwriter)
return nothing
Expand Down
15 changes: 13 additions & 2 deletions src/solver/type_getters.jl
Original file line number Diff line number Diff line change
Expand Up @@ -291,13 +291,20 @@ function get_spaces_restart(Y)
return (; center_space, face_space)
end

function get_state_restart(config::AtmosConfig, restart_file)
function get_state_restart(config::AtmosConfig, restart_file, atmos_model_hash)
(; parsed_args, comms_ctx) = config

@assert !isnothing(restart_file)
reader = InputOutput.HDF5Reader(restart_file, comms_ctx)
Y = InputOutput.read_field(reader, "Y")
t_start = InputOutput.HDF5.read_attribute(reader.file, "time")
atmos_model_hash_in_restart =
InputOutput.HDF5.read_attribute(reader.file, "atmos_model_hash")
if atmos_model_hash_in_restart != atmos_model_hash
error(
"Restart file $(restart_file) was constructed with a different AtmosModel",
)
end
return (Y, t_start)
end

Expand Down Expand Up @@ -664,7 +671,11 @@ function get_simulation(config::AtmosConfig)

if sim_info.restart
s = @timed_str begin
(Y, t_start) = get_state_restart(config, sim_info.restart_file)
(Y, t_start) = get_state_restart(
config,
sim_info.restart_file,
hash(atmos),
)
spaces = get_spaces_restart(Y)
end
@info "Allocating Y: $s"
Expand Down
18 changes: 18 additions & 0 deletions test/restart.jl
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,10 @@ end
"turbconv" => turb_conv,
"perturb_initstate" => false,
"dt" => "1secs",
"insolation" => "timevarying",
"rad" => "allskywithclear",
"surface_setup" => "DefaultMoninObukhov",
"implicit_diffusion" => true,
"t_end" => "3secs",
"dt_save_state_to_disk" => "1secs",
"enable_diagnostics" => false,
Expand Down Expand Up @@ -184,6 +188,20 @@ end
simulation_restarted2.integrator.p;
name = "integrator.p",
)

# Test that we can catch an Atmos model changing across restarts
config_different = CA.AtmosConfig(
merge(
test_dict,
Dict(
"restart_file" => restart_file,
"insolation" => "rcemipii",
),
),
job_id = job_id * "_different",
)
@test_throws ErrorException CA.get_simulation(config_different)

# end
# end
# end
Expand Down

0 comments on commit 448d485

Please sign in to comment.