Skip to content

Commit

Permalink
More reproducibility fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
charleskawczynski committed Jan 2, 2025
1 parent c555c8d commit 00f001f
Show file tree
Hide file tree
Showing 3 changed files with 422 additions and 129 deletions.
6 changes: 4 additions & 2 deletions reproducibility_tests/reproducibility_tools.jl
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,8 @@ function reproducibility_results(
:no_comparable_dirs,
)

data_file_references = map(p -> joinpath(p, reference_filename), dirs)
data_file_references =
map(p -> joinpath(p, job_id, reference_filename), dirs)

# foreach(x->maybe_extract(x), data_file_references)

Expand Down Expand Up @@ -275,8 +276,9 @@ function export_reproducibility_results(
skip,
)

commit_shas = readdir(save_dir)
for (computed_mse, dir) in zip(computed_mses, dirs)
commit_hash = basename(dirname(dir))
commit_hash = commit_sha_from_dir(commit_shas, dir)
computed_mse_file =
joinpath(repro_dir, "computed_mse_$commit_hash.json")

Expand Down
86 changes: 41 additions & 45 deletions reproducibility_tests/reproducibility_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,9 @@ assist our understanding and reasoning, we let's assume that there are two state
## state 2: data is saved for future reference
- `commit_hash/job_id/output_dir/`
- `commit_hash/job_id/output_dir/reproducibility_bundle/`
- `commit_hash/job_id/output_dir/reproducibility_bundle/ref_counter.jl`
- `commit_hash/job_id/output_dir/reproducibility_bundle/prog_state.hdf5`
- `commit_hash/job_id/reproducibility_bundle/`
- `commit_hash/job_id/reproducibility_bundle/ref_counter.jl`
- `commit_hash/job_id/reproducibility_bundle/prog_state.hdf5`
- `commit_hash/reproducibility_bundle/ref_counter.jl`
- `commit_hash/reproducibility_bundle/job_id/`
Expand Down Expand Up @@ -458,12 +457,12 @@ print_dir_tree(dir) = print_dir_tree(stdout, dir)
print_dir_tree(io::IO, dir) = println(io, string_dir_tree(dir))

function string_dir_tree(dir)
s = "Files in `$dir`\n:"
s = "Files in `$dir`:\n"
for (root, _, files) in walkdir(dir)
for file in files
f = joinpath(root, file)
isfile(f) || continue # rm symlink folders (included but not files)
s *= " $f\n"
s *= " $(replace(f, dir => ""))\n"
end
end
return s
Expand All @@ -480,8 +479,7 @@ end
ref_counter_PR = read_ref_counter(ref_counter_file_PR),
skip = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) != "climaatmos-ci",
dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
commit = get(ENV, "BUILDKITE_COMMIT", nothing),
n_hash_characters = 7,
commit = get_commit_sha(),
repro_folder = "reproducibility_bundle",
strip_folder = strip_output_active_path,
)
Expand Down Expand Up @@ -519,8 +517,7 @@ function move_data_to_save_dir(;
ref_counter_PR = read_ref_counter(ref_counter_file_PR),
skip = get(ENV, "BUILDKITE_PIPELINE_SLUG", nothing) != "climaatmos-ci",
dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
commit = get(ENV, "BUILDKITE_COMMIT", nothing),
n_hash_characters = 7,
commit = get_commit_sha(),
repro_folder = "reproducibility_bundle",
strip_folder = strip_output_active_path,
)
Expand All @@ -541,36 +538,20 @@ function move_data_to_save_dir(;
dirs_src,
dest_root,
commit,
n_hash_characters,
repro_folder,
strip_folder,
)
if debug_reproducibility()
@show repro_folder
@show dirs_src
@show dest_root
@show files_dest
@show files_src
@show isfile.(files_src)
println("******")
foreach(print_dir_tree, dirs_src)
println("******")
print_dir_tree(dest_root)
println("******")
end
for (src, dest) in zip(files_src, files_dest)
@show src
@show dest
@assert isfile(src)
mkpath(dirname(dest))
mv(src, dest; force = true)
end
dest_repro = destination_directory(;
dest_root,
commit,
n_hash_characters,
repro_folder,
)
dest_repro = destination_directory(; dest_root, commit, repro_folder)
ref_counter_file_main = joinpath(dest_repro, "ref_counter.jl")
debug_reproducibility() &&
@info "Repro: moving $ref_counter_file_PR to $ref_counter_file_main"
Expand All @@ -590,14 +571,40 @@ function move_data_to_save_dir(;
end
end

"""
get_commit_sha(;
n_hash_characters = 7,
commit = get(ENV, "BUILDKITE_COMMIT", nothing)
)
Returns a string of the commit hash.
"""
get_commit_sha(;
n_hash_characters = 7,
commit = get(ENV, "BUILDKITE_COMMIT", nothing),
) = return commit[1:min(n_hash_characters, length(commit))]

function commit_sha_from_dir(commit_shas, dir)
while true
if isempty(dir)
error("Unfound commit sha.")
else
b = basename(dir)
if b in commit_shas || any(x -> occursin(b, x), commit_shas)
return b
else
dir = dirname(dir)
end
end
end
end

"""
save_dir_transform(
src;
job_id,
dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
commit = get(ENV, "BUILDKITE_COMMIT", nothing),
n_hash_characters = 7,
commit = get_commit_sha(),
repro_folder = "reproducibility_bundle",
strip_folder = strip_output_active_path,
)
Expand All @@ -607,25 +614,18 @@ Returns the output file, to be saved, given:
- `job_id` the job ID
- `dest_root` the destination root directory
- `commit` the commit hash
- `n_hash_characters` truncates the commit hash to given number of characters
- `repro_folder` reproducibility folder
- `strip_folder` function to strip folders in output path
"""
function save_dir_transform(
src;
job_id,
dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
commit = get(ENV, "BUILDKITE_COMMIT", nothing),
n_hash_characters = 7,
commit = get_commit_sha(),
repro_folder = "reproducibility_bundle",
strip_folder = strip_output_active_path,
)
dest_repro = destination_directory(;
dest_root,
commit,
n_hash_characters,
repro_folder,
)
dest_repro = destination_directory(; dest_root, commit, repro_folder)
src_filename = basename(src)
dst = joinpath(dest_repro, job_id, src_filename)
return strip_output_active_path(dst)
Expand All @@ -634,26 +634,22 @@ end
"""
destination_directory(;
dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
commit = get(ENV, "BUILDKITE_COMMIT", nothing),
n_hash_characters = 7,
commit = get_commit_sha(),
repro_folder = "reproducibility_bundle",
)
Return the reproducibility destination directory:
`root/commit_sha/repro_folder`, given:
- `dest_root` the destination root directory
- `commit` the commit hash
- `n_hash_characters` truncates the commit hash to given number of characters
- `repro_folder` reproducibility folder
"""
function destination_directory(;
dest_root = "/central/scratch/esm/slurm-buildkite/climaatmos-main",
commit = get(ENV, "BUILDKITE_COMMIT", nothing),
n_hash_characters = 7,
commit = get_commit_sha(),
repro_folder = "reproducibility_bundle",
)
commit_sha = commit[1:min(n_hash_characters, length(commit))]
return joinpath(dest_root, commit_sha, repro_folder)
return joinpath(dest_root, commit, repro_folder)
end

"""
Expand Down
Loading

0 comments on commit 00f001f

Please sign in to comment.