From 183f14e84e236a001fa98ee5eca456e518425d24 Mon Sep 17 00:00:00 2001 From: Brett Coon Date: Fri, 15 Mar 2024 15:38:38 -0700 Subject: [PATCH] i6686-opcodemix-intervals: Add OpcodeMix Intervals (#6706) Implements interval support for the opcode_mix analyzer. Adds snapshot support, primarily targeting instruction-count based snapshots. The most common opcodes and opcode categories are printed for each interval. Fixes #6686 --- ...fline-interval-opcode-mix-output.templatex | 12 ++ clients/drcachesim/tools/opcode_mix.cpp | 111 +++++++++++++++++- clients/drcachesim/tools/opcode_mix.h | 28 +++++ suite/tests/CMakeLists.txt | 3 + 4 files changed, 153 insertions(+), 1 deletion(-) create mode 100644 clients/drcachesim/tests/offline-interval-opcode-mix-output.templatex diff --git a/clients/drcachesim/tests/offline-interval-opcode-mix-output.templatex b/clients/drcachesim/tests/offline-interval-opcode-mix-output.templatex new file mode 100644 index 00000000000..2071186a068 --- /dev/null +++ b/clients/drcachesim/tests/offline-interval-opcode-mix-output.templatex @@ -0,0 +1,12 @@ +Hello, world! + *Opcode mix tool results: + *[0-9]* : total executed instructions + *[1-9][0-9]* : *[a-z][ a-z]* + *[1-9][0-9]* : *[a-z][ a-z]* +.* +Printing unmerged per-shard interval results: +There were [0-9]* intervals created. +ID:1 ending at instruction 10000 has [0-9]* opcodes and [0-9]* categories. + *\[1\] Opcode: [ a-z]* \([0-9]*\) Count=[0-9]* PKI=[0-9\.]* + *\[2\] Opcode: [ a-z]* \([0-9]*\) Count=[0-9]* PKI=[0-9\.]* +.* diff --git a/clients/drcachesim/tools/opcode_mix.cpp b/clients/drcachesim/tools/opcode_mix.cpp index 8ac7ad25a17..995c810a578 100644 --- a/clients/drcachesim/tools/opcode_mix.cpp +++ b/clients/drcachesim/tools/opcode_mix.cpp @@ -42,6 +42,7 @@ #include #include +#include #include #include #include @@ -282,7 +283,9 @@ opcode_mix_t::get_category_names(uint category) const uint max_mask = 0x80000000; for (uint mask = 0x1; mask <= max_mask; mask <<= 1) { if (TESTANY(mask, category)) { - category_name += " "; + if (category_name.length() > 0) { + category_name += " "; + } category_name += instr_get_category_name(static_cast(mask)); } @@ -338,5 +341,111 @@ opcode_mix_t::print_results() return true; } +opcode_mix_t::interval_state_snapshot_t * +opcode_mix_t::generate_interval_snapshot(uint64_t interval_id) +{ + return generate_shard_interval_snapshot(&serial_shard_, interval_id); +} + +opcode_mix_t::interval_state_snapshot_t * +opcode_mix_t::generate_shard_interval_snapshot(void *shard_data, uint64_t interval_id) +{ + assert(shard_data != nullptr); + auto &shard = *reinterpret_cast(shard_data); + auto *snap = new snapshot_t; + snap->opcode_counts_ = shard.opcode_counts; + snap->category_counts_ = shard.category_counts; + return snap; +} + +bool +opcode_mix_t::finalize_interval_snapshots( + std::vector &interval_snapshots) +{ + // Loop through snapshots in reverse order, subtracting the *earlier* + // snapshot's cumulative values from this snapshot's cumulative values, to get + // deltas. The first snapshot needs no updates, obviously. + for (int i = static_cast(interval_snapshots.size()) - 1; i > 0; --i) { + auto &this_snap = *reinterpret_cast(interval_snapshots[i]); + auto &prior_snap = *reinterpret_cast(interval_snapshots[i - 1]); + for (auto &opc_count : this_snap.opcode_counts_) { + opc_count.second -= prior_snap.opcode_counts_[opc_count.first]; + } + for (auto &cat_count : this_snap.category_counts_) { + cat_count.second -= prior_snap.category_counts_[cat_count.first]; + } + } + return true; +} + +opcode_mix_t::interval_state_snapshot_t * +opcode_mix_t::combine_interval_snapshots( + const std::vector latest_shard_snapshots, + uint64_t interval_end_timestamp) +{ + snapshot_t *super_snap = new snapshot_t; + for (const interval_state_snapshot_t *base_snap : latest_shard_snapshots) { + const auto *snap = reinterpret_cast(base_snap); + // Skip nullptrs and snapshots from different intervals. + if (snap == nullptr || + snap->get_interval_end_timestamp() != interval_end_timestamp) { + continue; + } + for (const auto opc_count : snap->opcode_counts_) { + super_snap->opcode_counts_[opc_count.first] += opc_count.second; + } + for (const auto cat_count : snap->category_counts_) { + super_snap->category_counts_[cat_count.first] += cat_count.second; + } + } + return super_snap; +} + +bool +opcode_mix_t::print_interval_results( + const std::vector &interval_snapshots) +{ + // Number of opcodes and categories to print per interval. + constexpr int PRINT_TOP_N = 3; + std::cerr << "There were " << interval_snapshots.size() << " intervals created.\n"; + for (auto *base_snap : interval_snapshots) { + const auto *snap = reinterpret_cast(base_snap); + std::cerr << "ID:" << snap->get_interval_id() << " ending at instruction " + << snap->get_instr_count_cumulative() << " has " + << snap->opcode_counts_.size() << " opcodes" + << " and " << snap->category_counts_.size() << " categories.\n"; + std::vector> sorted(snap->opcode_counts_.begin(), + snap->opcode_counts_.end()); + std::sort(sorted.begin(), sorted.end(), cmp_val); + for (int i = 0; i < PRINT_TOP_N && i < static_cast(sorted.size()); ++i) { + std::cerr << " [" << i + 1 << "]" + << " Opcode: " << decode_opcode_name(sorted[i].first) << " (" + << sorted[i].first << ")" + << " Count=" << sorted[i].second << " PKI=" + << sorted[i].second * 1000.0 / snap->get_instr_count_delta() + << "\n"; + } + std::vector> sorted_cats(snap->category_counts_.begin(), + snap->category_counts_.end()); + std::sort(sorted_cats.begin(), sorted_cats.end(), cmp_val); + for (int i = 0; i < PRINT_TOP_N && i < static_cast(sorted_cats.size()); + ++i) { + std::cerr << " [" << i + 1 << "]" + << " Category=" << get_category_names(sorted_cats[i].first) + << " Count=" << sorted_cats[i].second << " PKI=" + << sorted_cats[i].second * 1000.0 / snap->get_instr_count_delta() + << "\n"; + } + } + return true; +} + +bool +opcode_mix_t::release_interval_snapshot(interval_state_snapshot_t *interval_snapshot) +{ + delete interval_snapshot; + return true; +} + } // namespace drmemtrace } // namespace dynamorio diff --git a/clients/drcachesim/tools/opcode_mix.h b/clients/drcachesim/tools/opcode_mix.h index 09619ff935b..526f27cdd43 100644 --- a/clients/drcachesim/tools/opcode_mix.h +++ b/clients/drcachesim/tools/opcode_mix.h @@ -82,6 +82,26 @@ class opcode_mix_t : public analysis_tool_t { std::string parallel_shard_error(void *shard_data) override; + // Interval support. + interval_state_snapshot_t * + generate_interval_snapshot(uint64_t interval_id) override; + interval_state_snapshot_t * + combine_interval_snapshots( + const std::vector latest_shard_snapshots, + uint64_t interval_end_timestamp) override; + bool + print_interval_results( + const std::vector &interval_snapshots) override; + bool + release_interval_snapshot(interval_state_snapshot_t *interval_snapshot) override; + interval_state_snapshot_t * + generate_shard_interval_snapshot(void *shard_data, uint64_t interval_id) override; + + // Convert the captured cumulative snapshots to deltas. + bool + finalize_interval_snapshots( + std::vector &interval_snapshots) override; + protected: std::string get_category_names(uint category); @@ -107,6 +127,14 @@ class opcode_mix_t : public analysis_tool_t { uint category; }; + class snapshot_t : public interval_state_snapshot_t { + public: + // Snapshot the counts as cumulative stats, and then converted them to deltas in + // finalize_interval_snapshots(). Printed interval results are all deltas. + std::unordered_map opcode_counts_; + std::unordered_map category_counts_; + }; + struct worker_data_t { std::unordered_map opcode_data_cache; }; diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index 7c4e0c73074..9855b91fe77 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -4081,6 +4081,9 @@ if (BUILD_CLIENTS) torunonly_drcacheoff(interval-instr-count-output ${ci_shared_app} "" "@-simulator_type@basic_counts@-interval_instr_count@10000" "") + torunonly_drcacheoff(interval-opcode-mix-output ${ci_shared_app} "" + "@-simulator_type@opcode_mix@-interval_instr_count@10000" "") + # As for the online test, we check that only 1 thread is in the final trace. torunonly_drcacheoff(max-global client.annotation-concurrency # Include function tracing to sanity test combining with delay and max.