Skip to content

Commit

Permalink
More reproducibility fixes
Browse files Browse the repository at this point in the history
Update reproducibility_tests/test_mse.jl

Fix

Fix hopefully last edge case
  • Loading branch information
charleskawczynski committed Jan 3, 2025
1 parent c555c8d commit bbcc3b2
Show file tree
Hide file tree
Showing 4 changed files with 465 additions and 138 deletions.
30 changes: 26 additions & 4 deletions reproducibility_tests/reproducibility_tools.jl
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,8 @@ function reproducibility_results(
:no_comparable_dirs,
)

data_file_references = map(p -> joinpath(p, reference_filename), dirs)
data_file_references =
map(p -> joinpath(p, job_id, reference_filename), dirs)

# foreach(x->maybe_extract(x), data_file_references)

Expand All @@ -203,6 +204,11 @@ function reproducibility_results(
dict_reference = dict_reference_solution,
)
end
if debug_reproducibility()
println("------ end of reproducibility_results")
@show computed_mses
println("------")
end
return (dirs, computed_mses, :successful_comparison)
end

Expand Down Expand Up @@ -275,8 +281,9 @@ function export_reproducibility_results(
skip,
)

commit_shas = readdir(save_dir)
for (computed_mse, dir) in zip(computed_mses, dirs)
commit_hash = basename(dirname(dir))
commit_hash = commit_sha_from_dir(commit_shas, dir)
computed_mse_file =
joinpath(repro_dir, "computed_mse_$commit_hash.json")

Expand All @@ -287,6 +294,15 @@ function export_reproducibility_results(
return (data_file_computed, computed_mses, dirs, how)
end

function commit_sha_from_json_file(file)
filename = basename(file)
if startswith(filename, "computed_mse_") && endswith(filename, ".json")
return replace(filename, "computed_mse_" => "", ".json" => "")
else
error("File $file does not follow correct format.")
end
end

import ClimaReproducibilityTests as CRT
using Test
import PrettyTables
Expand Down Expand Up @@ -367,6 +383,11 @@ function report_reproducibility_results(

for computed_mse in computed_mses
all_reproducible = true
if debug_reproducibility()
println("---- in report_reproducibility_results")
@show computed_mse
println("----")
end
for (var, reproducible) in CRT.test_mse(; computed_mse)
if !reproducible
all_reproducible = false
Expand Down Expand Up @@ -420,8 +441,9 @@ function report_reproducibility_results(
table_data = hcat(sources, summary_statuses)
PrettyTables.pretty_table(io, table_data; header, crop = :none)

n_comparisons = length(computed_mses)
println(io, "Summary:")
println(io, " n_comparisons = $(length(computed_mses))")
println(io, " n_comparisons = $n_comparisons")
println(io, " n_times_reproducible = $n_times_reproducible")
println(io, " n_times_not_reproducible = $n_times_not_reproducible")
println(io, " n_passes = $n_passes")
Expand All @@ -440,7 +462,7 @@ function report_reproducibility_results(
elseif test_broken_report_flakiness
return :not_yet_reproducible
else
if n_passes n_pass_limit
if n_passes n_pass_limit || n_passes == n_comparisons
return :reproducible
else
return :not_reproducible
Expand Down
86 changes: 41 additions & 45 deletions reproducibility_tests/reproducibility_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,9 @@ assist our understanding and reasoning, we let's assume that there are two state
## state 2: data is saved for future reference
- `commit_hash/job_id/output_dir/`
- `commit_hash/job_id/output_dir/reproducibility_bundle/`
- `commit_hash/job_id/output_dir/reproducibility_bundle/ref_counter.jl`
- `commit_hash/job_id/output_dir/reproducibility_bundle/prog_state.hdf5`
- `commit_hash/job_id/reproducibility_bundle/`
- `commit_hash/job_id/reproducibility_bundle/ref_counter.jl`
- `commit_hash/job_id/reproducibility_bundle/prog_state.hdf5`
- `commit_hash/reproducibility_bundle/ref_counter.jl`
- `commit_hash/reproducibility_bundle/job_id/`
Expand Down Expand Up @@ -458,12 +457,12 @@ print_dir_tree(dir) = print_dir_tree(stdout, dir)
print_dir_tree(io::IO, dir) = println(io, string_dir_tree(dir))

function string_dir_tree(dir)
s = "Files in `$dir`\n:"
s = "Files in `$dir`:\n"
for (root, _, files) in walkdir(dir)
for file in files
f = joinpath(root, file)
isfile(f) || continue # rm symlink folders (included but not files)
s *= " $f\n"
s *= " $(replace(f, dir => ""))\n"
end
end
return s
Expand All @@ -480,8 +479,7 @@ end
ref_counter_PR = read_ref_counter(ref_counter_file_PR),
skip = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) != "climaatmos-ci",
dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
commit = get(ENV, "BUILDKITE_COMMIT", nothing),
n_hash_characters = 7,
commit = get_commit_sha(),
repro_folder = "reproducibility_bundle",
strip_folder = strip_output_active_path,
)
Expand Down Expand Up @@ -519,8 +517,7 @@ function move_data_to_save_dir(;
ref_counter_PR = read_ref_counter(ref_counter_file_PR),
skip = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) != "climaatmos-ci",
dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
commit = get(ENV, "BUILDKITE_COMMIT", nothing),
n_hash_characters = 7,
commit = get_commit_sha(),
repro_folder = "reproducibility_bundle",
strip_folder = strip_output_active_path,
)
Expand All @@ -541,36 +538,20 @@ function move_data_to_save_dir(;
dirs_src,
dest_root,
commit,
n_hash_characters,
repro_folder,
strip_folder,
)
if debug_reproducibility()
@show repro_folder
@show dirs_src
@show dest_root
@show files_dest
@show files_src
@show isfile.(files_src)
println("******")
foreach(print_dir_tree, dirs_src)
println("******")
print_dir_tree(dest_root)
println("******")
end
for (src, dest) in zip(files_src, files_dest)
@show src
@show dest
@assert isfile(src)
mkpath(dirname(dest))
mv(src, dest; force = true)
end
dest_repro = destination_directory(;
dest_root,
commit,
n_hash_characters,
repro_folder,
)
dest_repro = destination_directory(; dest_root, commit, repro_folder)
ref_counter_file_main = joinpath(dest_repro, "ref_counter.jl")
debug_reproducibility() &&
@info "Repro: moving $ref_counter_file_PR to $ref_counter_file_main"
Expand All @@ -590,14 +571,40 @@ function move_data_to_save_dir(;
end
end

"""
get_commit_sha(;
n_hash_characters = 7,
commit = get(ENV, "BUILDKITE_COMMIT", nothing)
)
Returns a string of the commit hash.
"""
get_commit_sha(;
n_hash_characters = 7,
commit = get(ENV, "BUILDKITE_COMMIT", nothing),
) = return commit[1:min(n_hash_characters, length(commit))]

function commit_sha_from_dir(commit_shas, dir)
while true
if isempty(dir)
error("Unfound commit sha.")
else
b = basename(dir)
if b in commit_shas || any(x -> occursin(b, x), commit_shas)
return b
else
dir = dirname(dir)
end
end
end
end

"""
save_dir_transform(
src;
job_id,
dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
commit = get(ENV, "BUILDKITE_COMMIT", nothing),
n_hash_characters = 7,
commit = get_commit_sha(),
repro_folder = "reproducibility_bundle",
strip_folder = strip_output_active_path,
)
Expand All @@ -607,25 +614,18 @@ Returns the output file, to be saved, given:
- `job_id` the job ID
- `dest_root` the destination root directory
- `commit` the commit hash
- `n_hash_characters` truncates the commit hash to given number of characters
- `repro_folder` reproducibility folder
- `strip_folder` function to strip folders in output path
"""
function save_dir_transform(
src;
job_id,
dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
commit = get(ENV, "BUILDKITE_COMMIT", nothing),
n_hash_characters = 7,
commit = get_commit_sha(),
repro_folder = "reproducibility_bundle",
strip_folder = strip_output_active_path,
)
dest_repro = destination_directory(;
dest_root,
commit,
n_hash_characters,
repro_folder,
)
dest_repro = destination_directory(; dest_root, commit, repro_folder)
src_filename = basename(src)
dst = joinpath(dest_repro, job_id, src_filename)
return strip_output_active_path(dst)
Expand All @@ -634,26 +634,22 @@ end
"""
destination_directory(;
dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
commit = get(ENV, "BUILDKITE_COMMIT", nothing),
n_hash_characters = 7,
commit = get_commit_sha(),
repro_folder = "reproducibility_bundle",
)
Return the reproducibility destination directory:
`root/commit_sha/repro_folder`, given:
- `dest_root` the destination root directory
- `commit` the commit hash
- `n_hash_characters` truncates the commit hash to given number of characters
- `repro_folder` reproducibility folder
"""
function destination_directory(;
dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
commit = get(ENV, "BUILDKITE_COMMIT", nothing),
n_hash_characters = 7,
commit = get_commit_sha(),
repro_folder = "reproducibility_bundle",
)
commit_sha = commit[1:min(n_hash_characters, length(commit))]
return joinpath(dest_root, commit_sha, repro_folder)
return joinpath(dest_root, commit, repro_folder)
end

"""
Expand Down
19 changes: 12 additions & 7 deletions reproducibility_tests/test_mse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,10 @@ include(joinpath(@__DIR__, "reproducibility_tools.jl"))

debug = true
repro_dir = joinpath(out_dir, "reproducibility_bundle")
computed_mse_filenames =
map(filter(default_is_mse_file, readdir(repro_dir))) do x
joinpath(repro_dir, x)
end
if isempty(computed_mse_filenames)
computed_mse_files = map(filter(default_is_mse_file, readdir(repro_dir))) do x
joinpath(repro_dir, x)
end
if isempty(computed_mse_files)
@warn "No reproducibility tests performed, due to non-existent comparable data."
debug && @show readdir(out_dir)
debug && @show readdir(repro_dir)
Expand Down Expand Up @@ -61,10 +60,16 @@ if isempty(computed_mse_filenames)
else
@testset "Reproducibility tests" begin
commit_hashes =
map(x -> basename(dirname(dirname(x))), computed_mse_filenames)
map(x -> commit_sha_from_json_file(x), computed_mse_files)
computed_mses = map(x -> parse_file_json(x), computed_mse_files)
if debug_reproducibility()
println("------ in test_mse.jl")
@show computed_mses
println("------")
end
results = report_reproducibility_results(
commit_hashes,
computed_mse_filenames;
computed_mses;
test_broken_report_flakiness,
)

Expand Down
Loading

0 comments on commit bbcc3b2

Please sign in to comment.