diff --git a/api/docs/release.dox b/api/docs/release.dox index 5acf8fb5e9b..2d4b4414caa 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -198,6 +198,8 @@ Further non-compatibility-affecting changes include: - Added instr_is_opnd_store_source(). - Added kernel context switch sequence injection support to the drmemtrace scheduler. - Added dr_running_under_dynamorio(). + - Added instr_get_category_name() API that returns the string version (as char*) of a + category. - Added #dynamorio::drmemtrace::TRACE_MARKER_TYPE_VECTOR_LENGTH marker to indicate the current vector length for architectures with a hardware defined or runtime changeable vector length (such as AArch64's SVE scalable vectors). diff --git a/clients/drcachesim/tests/offline-opcode_categories.templatex b/clients/drcachesim/tests/offline-opcode_categories.templatex new file mode 100644 index 00000000000..a321ef549b5 --- /dev/null +++ b/clients/drcachesim/tests/offline-opcode_categories.templatex @@ -0,0 +1,32 @@ +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Hello world! +Opcode mix tool results: + 133 : total executed instructions + 34 : mov + 17 : mov + 17 : syscall + 16 : sub + 16 : cmp + 16 : jnz + 16 : lea + 1 : and + + 4 : sets of categories + 51 : move + 33 : branch + 33 : math + 16 : load diff --git a/clients/drcachesim/tools/opcode_mix.cpp b/clients/drcachesim/tools/opcode_mix.cpp index 18743ff8a35..8ac7ad25a17 100644 --- a/clients/drcachesim/tools/opcode_mix.cpp +++ b/clients/drcachesim/tools/opcode_mix.cpp @@ -189,7 +189,7 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) decode_pc = const_cast(memref.instr.encoding); if (memref.instr.encoding_is_new) { // The code may have changed: invalidate the cache. - shard->worker->opcode_cache.erase(trace_pc); + shard->worker->opcode_data_cache.erase(trace_pc); } } else { // Legacy trace support where we need the binaries. @@ -221,9 +221,11 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) } } int opcode; - auto cached_opcode = shard->worker->opcode_cache.find(trace_pc); - if (cached_opcode != shard->worker->opcode_cache.end()) { - opcode = cached_opcode->second; + uint category; + auto cached_opcode_category = shard->worker->opcode_data_cache.find(trace_pc); + if (cached_opcode_category != shard->worker->opcode_data_cache.end()) { + opcode = cached_opcode_category->second.opcode; + category = cached_opcode_category->second.category; } else { instr_t instr; instr_init(dcontext_.dcontext, &instr); @@ -236,10 +238,12 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) return false; } opcode = instr_get_opcode(&instr); - shard->worker->opcode_cache[trace_pc] = opcode; + category = instr_get_category(&instr); + shard->worker->opcode_data_cache[trace_pc] = opcode_data_t(opcode, category); instr_free(dcontext_.dcontext, &instr); } ++shard->opcode_counts[opcode]; + ++shard->category_counts[category]; return true; } @@ -263,7 +267,35 @@ opcode_mix_t::process_memref(const memref_t &memref) static bool cmp_val(const std::pair &l, const std::pair &r) { - return (l.second > r.second); + return (l.second > r.second) || (l.second == r.second && l.first < r.first); +} + +std::string +opcode_mix_t::get_category_names(uint category) +{ + std::string category_name; + if (category == DR_INSTR_CATEGORY_UNCATEGORIZED) { + category_name += instr_get_category_name(DR_INSTR_CATEGORY_UNCATEGORIZED); + return category_name; + } + + const uint max_mask = 0x80000000; + for (uint mask = 0x1; mask <= max_mask; mask <<= 1) { + if (TESTANY(mask, category)) { + category_name += " "; + category_name += + instr_get_category_name(static_cast(mask)); + } + + /* + * Guard against 32 bit overflow. + */ + if (mask == max_mask) { + break; + } + } + + return category_name; } bool @@ -278,6 +310,9 @@ opcode_mix_t::print_results() for (const auto &keyvals : shard.second->opcode_counts) { total.opcode_counts[keyvals.first] += keyvals.second; } + for (const auto &keyvals : shard.second->category_counts) { + total.category_counts[keyvals.first] += keyvals.second; + } } } std::cerr << TOOL_NAME << " results:\n"; @@ -289,6 +324,17 @@ opcode_mix_t::print_results() std::cerr << std::setw(15) << keyvals.second << " : " << std::setw(9) << decode_opcode_name(keyvals.first) << "\n"; } + std::cerr << "\n"; + std::cerr << std::setw(15) << total.category_counts.size() + << " : sets of categories\n"; + std::vector> sorted_category_counts( + total.category_counts.begin(), total.category_counts.end()); + std::sort(sorted_category_counts.begin(), sorted_category_counts.end(), cmp_val); + for (const auto &keyvals : sorted_category_counts) { + std::cerr << std::setw(15) << keyvals.second << " : " << std::setw(9) + << get_category_names(keyvals.first) << "\n"; + } + return true; } diff --git a/clients/drcachesim/tools/opcode_mix.h b/clients/drcachesim/tools/opcode_mix.h index 613e6a43992..09619ff935b 100644 --- a/clients/drcachesim/tools/opcode_mix.h +++ b/clients/drcachesim/tools/opcode_mix.h @@ -39,6 +39,7 @@ #include #include #include +#include #include #include "dr_api.h" // Must be before trace_entry.h from analysis_tool.h. @@ -82,8 +83,32 @@ class opcode_mix_t : public analysis_tool_t { parallel_shard_error(void *shard_data) override; protected: + std::string + get_category_names(uint category); + + struct opcode_data_t { + opcode_data_t() + : opcode(OP_INVALID) + , category(DR_INSTR_CATEGORY_UNCATEGORIZED) + { + } + opcode_data_t(int opcode, uint category) + : opcode(opcode) + , category(category) + { + } + int opcode; + /* + * The category field is a uint instead of a dr_instr_category_t because + * multiple category bits can be set when an instruction belongs to more + * than one category. We assume 32 bits (i.e., 32 categories) is enough + * to be future-proof. + */ + uint category; + }; + struct worker_data_t { - std::unordered_map opcode_cache; + std::unordered_map opcode_data_cache; }; struct shard_data_t { @@ -103,6 +128,7 @@ class opcode_mix_t : public analysis_tool_t { worker_data_t *worker; int64_t instr_count; std::unordered_map opcode_counts; + std::unordered_map category_counts; std::string error; app_pc last_trace_module_start; size_t last_trace_module_size; diff --git a/core/ir/instr_api.h b/core/ir/instr_api.h index e5175b7b1b6..5b6017d4a88 100644 --- a/core/ir/instr_api.h +++ b/core/ir/instr_api.h @@ -1911,6 +1911,8 @@ instr_is_rep_string_op(instr_t *instr); /** * Indicates which category the instruction corresponds to. + * Update instr_get_category_name() in core/ir/instr_shared.c + * when adding new categories in this enum. */ typedef enum { DR_INSTR_CATEGORY_UNCATEGORIZED = 0x0, /**< Uncategorized. */ @@ -1937,6 +1939,15 @@ typedef enum { DR_FP_MATH, /**< Performs arithmetic or conditional operations. */ } dr_fp_type_t; +DR_API +/** + * Assumes \p category is a DR_INSTR_CATEGORY_ constant. + * See #dr_instr_category_t. + * Returns \p category name in string format. + */ +const char * +instr_get_category_name(dr_instr_category_t category); + DR_API /** * Returns true iff \p instr is a floating point instruction. diff --git a/core/ir/instr_shared.c b/core/ir/instr_shared.c index 32cf9841f96..30f0f4b1aea 100644 --- a/core/ir/instr_shared.c +++ b/core/ir/instr_shared.c @@ -470,6 +470,25 @@ instr_get_category(instr_t *instr) /* in rest of file, directly de-reference for performance (PR 622253) */ #define instr_get_category inlined_instr_get_category +const char * +instr_get_category_name(dr_instr_category_t category) +{ + switch (category) { + case DR_INSTR_CATEGORY_UNCATEGORIZED: return "uncategorized"; + case DR_INSTR_CATEGORY_FP: return "fp"; + case DR_INSTR_CATEGORY_LOAD: return "load"; + case DR_INSTR_CATEGORY_STORE: return "store"; + case DR_INSTR_CATEGORY_BRANCH: return "branch"; + case DR_INSTR_CATEGORY_SIMD: return "simd"; + case DR_INSTR_CATEGORY_STATE: return "state"; + case DR_INSTR_CATEGORY_MOVE: return "move"; + case DR_INSTR_CATEGORY_CONVERT: return "convert"; + case DR_INSTR_CATEGORY_MATH: return "math"; + case DR_INSTR_CATEGORY_OTHER: return "other"; + default: return ""; + } +} + static inline void instr_being_modified(instr_t *instr, bool raw_bits_valid) { diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index ab457b418fa..12d363d9a5a 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -4254,6 +4254,9 @@ if (BUILD_CLIENTS) "@-simulator_type@func_view" "only_5") endif (NOT RISCV64) if (DR_HOST_X86 AND DR_HOST_X64 AND LINUX) + torunonly_drcacheoff(opcode_categories allasm_x86_64 "" + "@-simulator_type@opcode_mix" "") + # Requires sudo to access pagemap. # XXX: Should we not enable this outside of the Github suite where we know # we have passwordless sudo? The pause for a password may cause problems diff --git a/suite/tests/api/drdecode_x86.c b/suite/tests/api/drdecode_x86.c index fc53a4e3e31..1027d2233b1 100644 --- a/suite/tests/api/drdecode_x86.c +++ b/suite/tests/api/drdecode_x86.c @@ -34,14 +34,16 @@ #include "configure.h" #include "dr_api.h" +#include "tools.h" #include #include +#include #define GD GLOBAL_DCONTEXT -#define ASSERT(x) \ - ((void)((!(x)) ? (printf("ASSERT FAILURE: %s:%d: %s\n", __FILE__, __LINE__, #x), \ - abort(), 0) \ +#define ASSERT(x) \ + ((void)((!(x)) ? (print("ASSERT FAILURE: %s:%d: %s\n", __FILE__, __LINE__, #x), \ + abort(), 0) \ : 0)) #define BUFFER_SIZE_BYTES(buf) sizeof(buf) @@ -155,13 +157,25 @@ test_noalloc(void) */ } -#define CHECK_CATEGORY(dcontext, instr, pc, category) \ - ASSERT(instr_encode(dcontext, instr, pc) - pc < BUFFER_SIZE_ELEMENTS(pc)); \ - instr_reset(dcontext, instr); \ - instr_set_operands_valid(instr, true); \ - ASSERT(decode(dcontext, pc, instr) != NULL); \ - ASSERT(instr_get_category(instr) == category); \ - instr_destroy(dcontext, instr); +#define CHECK_CATEGORY(dcontext, instr, pc, categories, category_names) \ + do { \ + byte *instr_encoded_pc = instr_encode(dcontext, instr, pc); \ + ASSERT(instr_encoded_pc - pc < BUFFER_SIZE_ELEMENTS(pc)); \ + instr_reset(dcontext, instr); \ + instr_set_operands_valid(instr, true); \ + byte *instr_decoded_pc = decode(dcontext, pc, instr); \ + ASSERT(instr_decoded_pc != NULL); \ + for (int i = 0; i < BUFFER_SIZE_ELEMENTS(categories); ++i) { \ + if (categories[i] == DR_INSTR_CATEGORY_UNCATEGORIZED) { \ + ASSERT(instr_get_category(instr) == categories[i]); \ + } else { \ + ASSERT(TESTANY(categories[i], instr_get_category(instr))); \ + } \ + ASSERT(strncmp(instr_get_category_name(categories[i]), category_names[i], \ + strlen(category_names[i])) == 0); \ + } \ + instr_destroy(dcontext, instr); \ + } while (0); static void test_categories(void) @@ -172,17 +186,36 @@ test_categories(void) /* 55 OP_mov_ld */ instr = XINST_CREATE_load(GD, opnd_create_reg(DR_REG_XAX), OPND_CREATE_MEMPTR(DR_REG_XAX, 42)); - CHECK_CATEGORY(GD, instr, buf, DR_INSTR_CATEGORY_LOAD); + const dr_instr_category_t categories_load[] = { DR_INSTR_CATEGORY_LOAD }; + const char *category_names_load[] = { "load" }; + CHECK_CATEGORY(GD, instr, buf, categories_load, category_names_load); /* 14 OP_cmp */ instr = XINST_CREATE_cmp(GD, opnd_create_reg(DR_REG_EAX), opnd_create_reg(DR_REG_EAX)); - CHECK_CATEGORY(GD, instr, buf, DR_INSTR_CATEGORY_MATH); + const dr_instr_category_t categories_cmp[] = { DR_INSTR_CATEGORY_MATH }; + const char *category_names_cmp[] = { "math" }; + CHECK_CATEGORY(GD, instr, buf, categories_cmp, category_names_cmp); /* 46 OP_jmp */ instr_t *after_callee = INSTR_CREATE_label(GD); instr = XINST_CREATE_jump(GD, opnd_create_instr(after_callee)); - CHECK_CATEGORY(GD, instr, buf, DR_INSTR_CATEGORY_BRANCH); + const dr_instr_category_t categories_jmp[] = { DR_INSTR_CATEGORY_BRANCH }; + const char *category_names_jmp[] = { "branch" }; + CHECK_CATEGORY(GD, instr, buf, categories_jmp, category_names_jmp); + + /* OP_fwait */ + instr = INSTR_CREATE_fwait(GD); + const dr_instr_category_t categories_fwait[] = { DR_INSTR_CATEGORY_FP, + DR_INSTR_CATEGORY_STATE }; + const char *category_names_fwait[] = { "fp", "state" }; + CHECK_CATEGORY(GD, instr, buf, categories_fwait, category_names_fwait); + + /* OP_in */ + instr = INSTR_CREATE_in_1(GD); + const dr_instr_category_t categories_in[] = { DR_INSTR_CATEGORY_UNCATEGORIZED }; + const char *category_names_in[] = { "uncategorized" }; + CHECK_CATEGORY(GD, instr, buf, categories_in, category_names_in); } static void @@ -241,7 +274,7 @@ main() test_store_source(); - printf("done\n"); + print("done\n"); return 0; }