Skip to content

Commit

Permalink
Initial attempt at loading build stats into db
Browse files Browse the repository at this point in the history
  • Loading branch information
Zentrik committed Jun 20, 2024
1 parent d9a56d4 commit a1ee572
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 4 deletions.
6 changes: 2 additions & 4 deletions buildkite_logs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ function process_commit!(artifact_size_df, pstat_df, aid, sha, branch, init_metr
end

for (timing_series, time) in timings
push!(pstat_df, (aid=aid, series=init_metric_to_series_id[timing_series], value=time))
push!(pstat_df, (aid=aid, series=init_metric_to_series_id(timing_series), value=time))
end
end

Expand All @@ -100,9 +100,7 @@ end
sha = "0a491e00a1f38b814ca173bd7d9bffeadde65738"
branch = "master"

init_metric_to_series_id = Dict(x => x for x in ["inputs", "major", "elapsed", "system", "user", "avgtext", "outputs", "avgdata", "swaps", "minor", "maxresident"])

process_commit!(artifact_size_df, pstat_df, aid, sha, branch, init_metric_to_series_id)
process_commit!(artifact_size_df, pstat_df, aid, sha, branch, identity)
@test artifact_size_df == DataFrame(aid=[aid, aid, aid], component=["julia", "sys.so", "libjulia.so"], size=[9478, 197751633, 199055])
@test pstat_df == DataFrame(aid=[aid, aid, aid, aid, aid, aid, aid, aid, aid, aid, aid], series=["elapsed", "system", "user", "outputs", "minor", "swaps", "maxresident", "major", "avgtext", "avgdata", "inputs"], value=[[0.13, 0.13, 0.14], [0.07, 0.06, 0.07], [0.26, 0.28, 0.28], [0.0, 0.0, 0.0], [20532.0, 20531.0, 20598.0], [0.0, 0.0, 0.0], [180252.0, 180360.0, 180400.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]])
end
Expand Down
2 changes: 2 additions & 0 deletions database/schema.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ These are the tables that we use, `upload_nanosoldier_to_db` should always have
1 │ artifact
3 │ pstat

artifact_size

11 │ benchmark
pull_request_build
16 │ pstat_series
Expand Down
40 changes: 40 additions & 0 deletions upload_nanosoldier_to_db.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ using HTTP, JSON3

const headers = nothing

median(x) = sort(x)[div(length(x), 2)+1]
function process_benchmark_archive!(df, path, next_artifact_id, db, benchmark_to_pstat_series_id; return_group_only=false)
println("Processing $path...")
mktempdir() do dir
Expand Down Expand Up @@ -114,6 +115,17 @@ function process_benchmark_archive!(df, path, next_artifact_id, db, benchmark_to
next_artifact_id[] += 1

DBInterface.execute(db, "INSERT INTO artifact (id, name, date, type) VALUES ($(artifact_row.id), '$(artifact_row.name)', $(artifact_row.date), '$(artifact_row.type)')")

# if artifact_row.type == "master"
# artifact_size_df = DataFrame()
# pstat_df = DataFrame()
# process_commit!(artifact_size_df, pstat_df, artifact_row.id, artifact_row.name, "master", identity)
# SQLite.load!(artifact_size_df, db, "artifact_size")
# for row in eachrow(pstat_df)
# metric = row.series in ("minor", "major") ? "$(row.series)-pagefaults" : row.series
# push_metric_to_pstat!(df, db, "init", "median-$metric", artifact_row.id, median(row.value), benchmark_to_pstat_series_id)
# end
# end
else
artifact_id = artifact_query[1, "id"]
end
Expand Down Expand Up @@ -661,3 +673,31 @@ function create_tags_db(db_path)
db = SQLite.DB(db_path)
@time SQLite.load!(df, db, "tags"; replace=true)
end

# Inserting into pstat under different metrics is undesirable as the ui only shows one metric at a time
# function process_log_commit(commit)
# db = SQLite.DB("/media/rag/NVME/Code/rustc-perf-db/julia.db")

# commit = "a14cc38512b6daab6b8417ebb8a64fc794ff89cc"
# artifact_query = DBInterface.execute(db, "SELECT * FROM artifact WHERE name='$(commit)' LIMIT 1") |> DataFrame

# pstat_series_table = DBInterface.execute(db, "SELECT * FROM pstat_series") |> DataFrame
# # need to tranform into vector as indexing into df extremely slow
# names_col = pstat_series_table[:, "crate"]
# metrics_col = pstat_series_table[:, "metric"]
# pstat_series_id_column = pstat_series_table[:, "id"]

# benchmark_to_pstat_series_id = Dict((name, metric) => id for (id, name, metric) in zip(pstat_series_id_column, names_col, metrics_col))

# artifact_size_df = DataFrame()
# pstat_df = DataFrame()
# process_commit!(artifact_size_df, pstat_df, artifact_query[1, :id], commit, "master", identity)
# SQLite.load!(artifact_size_df, db, "artifact_size")

# df = DataFrame()
# for row in eachrow(pstat_df)
# metric = row.series in ("minor", "major") ? "$(row.series)-pagefaults" : row.series
# push_metric_to_pstat!(df, db, "init", "median-$metric", artifact_query[1, :id], median(row.value), benchmark_to_pstat_series_id)
# end
# SQLite.load!(df, db, "pstat")
# end

0 comments on commit a1ee572

Please sign in to comment.