From b9441b340bdd9977f74c06257bffde559b8d1835 Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Thu, 30 Nov 2023 21:54:56 -0500 Subject: [PATCH] i#5505 kernel tracing: Add syscall instr encodings (#6479) Adds encodings for kernel system call instructions to the trace in raw2trace. Kernel system call traces are decoded using libipt which also provides the instruction encodings. We add support to drir_t to write these encodings to a new buffer which is re-used for all dynamic instances of that instr even across multiple system call traces. Fixes taken/not-taken detection for conditional branches in the syscall trace. Adds support in the syscall_mix tool to report the counts of each system call's traces also. Adds sysnum to system call trace start and end markers to achieve this. Ran all Intel-PT tests locally: ``` $ ctest -VV -R 'SUDO' ... The following tests passed: code_api|client.drpttracer_SUDO-test code_api|tool.drcachesim.phys_SUDO # not really PT. Just included because of ctest -R. code_api|tool.drcachesim.phys-threads_SUDO # not really PT. Just included because of ctest -R. code_api|tool.drcacheoff.phys_SUDO # not really PT. Just included because of ctest -R. code_api|tool.drcacheoff.kernel.simple_SUDO code_api|tool.drcacheoff.kernel.opcode-mix_SUDO code_api|tool.drcacheoff.kernel.syscall-mix_SUDO 100% tests passed, 0 tests failed out of 7 ``` Found some flakiness due to #6486 in local runs of the kernel sudo tests, which will be addressed separately. Issue: #5505 --- clients/drcachesim/common/trace_entry.h | 6 +- clients/drcachesim/drpt2trace/drir.h | 73 ++++++++++++++++++- clients/drcachesim/drpt2trace/drpt2trace.cpp | 7 +- clients/drcachesim/drpt2trace/ir2trace.cpp | 23 ++++-- clients/drcachesim/drpt2trace/ir2trace.h | 2 +- clients/drcachesim/drpt2trace/pt2ir.cpp | 26 ++----- clients/drcachesim/drpt2trace/pt2ir.h | 2 +- .../drcachesim/drpt2trace/test_simple.expect | 14 ---- .../drpt2trace/test_simple.templatex | 14 ++++ clients/drcachesim/reader/reader.cpp | 3 +- .../tests/offline-kernel-opcode-mix.templatex | 6 ++ .../offline-kernel-syscall-mix.templatex | 6 ++ .../tests/offline-syscall-mix.templatex | 2 +- .../drcachesim/tests/syscall-mix.templatex | 2 +- clients/drcachesim/tools/syscall_mix.cpp | 39 ++++++++-- clients/drcachesim/tools/syscall_mix.h | 1 + clients/drcachesim/tools/view.cpp | 6 +- clients/drcachesim/tracer/raw2trace.cpp | 54 ++++++++++---- clients/drcachesim/tracer/raw2trace.h | 1 + suite/tests/CMakeLists.txt | 15 ++-- 20 files changed, 224 insertions(+), 78 deletions(-) delete mode 100644 clients/drcachesim/drpt2trace/test_simple.expect create mode 100644 clients/drcachesim/drpt2trace/test_simple.templatex create mode 100644 clients/drcachesim/tests/offline-kernel-opcode-mix.templatex create mode 100644 clients/drcachesim/tests/offline-kernel-syscall-mix.templatex diff --git a/clients/drcachesim/common/trace_entry.h b/clients/drcachesim/common/trace_entry.h index 166c5f32055..794472ed1a2 100644 --- a/clients/drcachesim/common/trace_entry.h +++ b/clients/drcachesim/common/trace_entry.h @@ -531,12 +531,14 @@ typedef enum { TRACE_MARKER_TYPE_MAYBE_BLOCKING_SYSCALL, /** - * Indicates a point in the trace where a syscall's kernel trace starts. + * Indicates a point in the trace where a syscall's kernel trace starts. The value + * of the marker is set to the syscall number. */ TRACE_MARKER_TYPE_SYSCALL_TRACE_START, /** - * Indicates a point in the trace where a syscall's trace end. + * Indicates a point in the trace where a syscall's trace ends. The value of the + * marker is set to the syscall number. */ TRACE_MARKER_TYPE_SYSCALL_TRACE_END, diff --git a/clients/drcachesim/drpt2trace/drir.h b/clients/drcachesim/drpt2trace/drir.h index eb4b133f20b..a8f177a423c 100644 --- a/clients/drcachesim/drpt2trace/drir.h +++ b/clients/drcachesim/drpt2trace/drir.h @@ -41,6 +41,10 @@ #include "dr_api.h" #include "utils.h" +#include +#include +#include + namespace dynamorio { namespace drmemtrace { @@ -62,8 +66,10 @@ class drir_t { } } + // Appends the given instr to the internal ilist, and records (replaces if + // one already exists) the given encoding for the orig_pc. void - append(instr_t *instr) + append(instr_t *instr, app_pc orig_pc, int instr_length, uint8_t *encoding) { ASSERT(drcontext_ != nullptr, "drir_t: invalid drcontext_"); ASSERT(ilist_ != nullptr, "drir_t: invalid ilist_"); @@ -72,23 +78,88 @@ class drir_t { return; } instrlist_append(ilist_, instr); + record_encoding(orig_pc, instr_length, encoding); } + // Returns the opaque pointer to the dcontext_t used to construct this + // object. void * get_drcontext() { return drcontext_; } + // Returns the instrlist_t of instrs accumulated so far. instrlist_t * get_ilist() { return ilist_; } + // Clears the instrs accumulated in the ilist. Note that this does + // not clear the encodings accumulated. + void + clear_ilist() + { + instrlist_clear(drcontext_, ilist_); + } + + // Returns the address of the encoding recorded for the given orig_pc. + // Encodings are persisted across clear_ilist() calls, so we will + // return the same decode_pc for the same orig_pc unless a new encoding + // is added for the same orig_pc. + app_pc + get_decode_pc(app_pc orig_pc) + { + if (decode_pc_.find(orig_pc) == decode_pc_.end()) { + return nullptr; + } + return decode_pc_[orig_pc].first; + } + private: void *drcontext_; instrlist_t *ilist_; +#define SYSCALL_PT_ENCODING_BUF_SIZE (1024 * 1024) + // For each original app pc key, this stores a pair value: the first + // element is the address where the encoding is stored for the instruction + // at that app pc, the second element is the length of the encoding. + std::unordered_map> decode_pc_; + // A vector of buffers of size SYSCALL_PT_ENCODING_BUF_SIZE. Each buffer + // stores some encoded instructions back-to-back. Note that each element + // in the buffer is a single byte, so one instr's encoding occupies possibly + // multiple consecutive elements. + // We allocate new memory to store kernel instruction encodings in + // increments of SYSCALL_PT_ENCODING_BUF_SIZE. We do not treat this like a + // cache and clear previously stored encodings because we want to ensure + // decode_pc uniqueness to callers of get_decode_pc. + std::vector> instr_encodings_; + // Next available offset into instr_encodings_.back(). + size_t next_encoding_offset_ = 0; + + void + record_encoding(app_pc orig_pc, int instr_len, uint8_t *encoding) + { + auto it = decode_pc_.find(orig_pc); + // We record the encoding only if we don't already have the same encoding for + // the given orig_pc. + if (it != decode_pc_.end() && + // We confirm that the instruction encoding has not changed. Just in case + // the kernel is doing JIT. + it->second.second == instr_len && + memcmp(it->second.first, encoding, it->second.second) == 0) { + return; + } + if (instr_encodings_.empty() || + next_encoding_offset_ + instr_len >= SYSCALL_PT_ENCODING_BUF_SIZE) { + instr_encodings_.emplace_back(new uint8_t[SYSCALL_PT_ENCODING_BUF_SIZE]); + next_encoding_offset_ = 0; + } + app_pc encode_pc = &instr_encodings_.back()[next_encoding_offset_]; + memcpy(encode_pc, encoding, instr_len); + decode_pc_[orig_pc] = std::make_pair(encode_pc, instr_len); + next_encoding_offset_ += instr_len; + } }; } // namespace drmemtrace diff --git a/clients/drcachesim/drpt2trace/drpt2trace.cpp b/clients/drcachesim/drpt2trace/drpt2trace.cpp index e1961c3e5fa..654740376d1 100644 --- a/clients/drcachesim/drpt2trace/drpt2trace.cpp +++ b/clients/drcachesim/drpt2trace/drpt2trace.cpp @@ -461,7 +461,8 @@ main(int argc, const char *argv[]) uint8_t *pt_data = pt_raw_buffer.data(); size_t pt_data_size = pt_raw_buffer.size(); - pt2ir_convert_status_t status = ptconverter->convert(pt_data, pt_data_size, drir); + pt2ir_convert_status_t status = + ptconverter->convert(pt_data, pt_data_size, &drir); if (status != PT2IR_CONV_SUCCESS) { std::cerr << CLIENT_NAME << ": failed to convert PT raw trace to DR IR." << "[error status: " << status << "]" << std::endl; @@ -521,7 +522,7 @@ main(int argc, const char *argv[]) /* Convert the PT Data to DR IR. */ pt2ir_convert_status_t status = - ptconverter->convert(pt_data, pt_data_size, drir); + ptconverter->convert(pt_data, pt_data_size, &drir); if (status != PT2IR_CONV_SUCCESS) { std::cerr << CLIENT_NAME << ": failed to convert PT raw trace to DR IR." << "[error status: " << status << "]" << std::endl; @@ -542,7 +543,7 @@ main(int argc, const char *argv[]) /* Convert the DR IR to trace entries. */ std::vector entries; ir2trace_convert_status_t ir2trace_convert_status = - ir2trace_t::convert(drir, entries); + ir2trace_t::convert(&drir, entries); if (ir2trace_convert_status != IR2TRACE_CONV_SUCCESS) { std::cerr << CLIENT_NAME << ": failed to convert DR IR to trace entries." << "[error status: " << ir2trace_convert_status << "]" << std::endl; diff --git a/clients/drcachesim/drpt2trace/ir2trace.cpp b/clients/drcachesim/drpt2trace/ir2trace.cpp index 62e36621343..57e27e931d1 100644 --- a/clients/drcachesim/drpt2trace/ir2trace.cpp +++ b/clients/drcachesim/drpt2trace/ir2trace.cpp @@ -55,16 +55,19 @@ namespace drmemtrace { #define ERRMSG_HEADER "[drpt2ir] " ir2trace_convert_status_t -ir2trace_t::convert(DR_PARAM_IN drir_t &drir, +ir2trace_t::convert(DR_PARAM_IN drir_t *drir, DR_PARAM_INOUT std::vector &trace, DR_PARAM_IN int verbosity) { - if (drir.get_ilist() == NULL) { + if (drir == nullptr || drir->get_ilist() == NULL) { return IR2TRACE_CONV_ERROR_INVALID_PARAMETER; } - instr_t *instr = instrlist_first(drir.get_ilist()); + instr_t *instr = instrlist_first(drir->get_ilist()); + bool prev_was_repstr = false; while (instr != NULL) { trace_entry_t entry = {}; + entry.size = instr_length(GLOBAL_DCONTEXT, instr); + entry.addr = reinterpret_cast(instr_get_app_pc(instr)); if (!trace.empty() && trace.back().type == TRACE_TYPE_INSTR_CONDITIONAL_JUMP) { if (instr_get_prev(instr) == nullptr || @@ -87,6 +90,7 @@ ir2trace_t::convert(DR_PARAM_IN drir_t &drir, */ entry.type = TRACE_TYPE_INSTR; if (instr_opcode_valid(instr)) { + bool cur_is_repstr = false; if (instr_is_call_direct(instr)) { entry.type = TRACE_TYPE_INSTR_DIRECT_CALL; } else if (instr_is_call_indirect(instr)) { @@ -103,15 +107,20 @@ ir2trace_t::convert(DR_PARAM_IN drir_t &drir, } else if (instr_get_opcode(instr) == OP_sysenter) { entry.type = TRACE_TYPE_INSTR_SYSENTER; } else if (instr_is_rep_string_op(instr)) { - entry.type = TRACE_TYPE_INSTR_MAYBE_FETCH; + cur_is_repstr = true; + if (prev_was_repstr) { + entry.type = TRACE_TYPE_INSTR_MAYBE_FETCH; + } else { + prev_was_repstr = true; + } + } + if (!cur_is_repstr) { + prev_was_repstr = false; } } else { VPRINT(1, "Trying to convert an invalid instruction.\n"); } - entry.size = instr_length(GLOBAL_DCONTEXT, instr); - entry.addr = (uintptr_t)instr_get_app_pc(instr); - trace.push_back(entry); instr = instr_get_next(instr); diff --git a/clients/drcachesim/drpt2trace/ir2trace.h b/clients/drcachesim/drpt2trace/ir2trace.h index 18458dc8e3c..13444881f22 100644 --- a/clients/drcachesim/drpt2trace/ir2trace.h +++ b/clients/drcachesim/drpt2trace/ir2trace.h @@ -92,7 +92,7 @@ class ir2trace_t { * error code. */ static ir2trace_convert_status_t - convert(DR_PARAM_IN drir_t &drir, DR_PARAM_INOUT std::vector &trace, + convert(DR_PARAM_IN drir_t *drir, DR_PARAM_INOUT std::vector &trace, DR_PARAM_IN int verbosity = 0); }; diff --git a/clients/drcachesim/drpt2trace/pt2ir.cpp b/clients/drcachesim/drpt2trace/pt2ir.cpp index 4a5ad88f293..13ea7221daf 100644 --- a/clients/drcachesim/drpt2trace/pt2ir.cpp +++ b/clients/drcachesim/drpt2trace/pt2ir.cpp @@ -257,13 +257,13 @@ pt2ir_t::init(DR_PARAM_IN pt2ir_config_t &pt2ir_config, DR_PARAM_IN int verbosit pt2ir_convert_status_t pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_size, - DR_PARAM_INOUT drir_t &drir) + DR_PARAM_INOUT drir_t *drir) { if (!pt2ir_initialized_) { return PT2IR_CONV_ERROR_NOT_INITIALIZED; } - if (pt_data == nullptr || pt_data_size <= 0) { + if (pt_data == nullptr || pt_data_size <= 0 || drir == nullptr) { return PT2IR_CONV_ERROR_INVALID_INPUT; } @@ -379,24 +379,14 @@ pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_ } /* Use drdecode to decode insn(pt_insn) to instr_t. */ - instr_t *instr = instr_create(drir.get_drcontext()); - instr_init(drir.get_drcontext(), instr); + instr_t *instr = instr_create(drir->get_drcontext()); + instr_init(drir->get_drcontext(), instr); instr_set_isa_mode(instr, insn.mode == ptem_32bit ? DR_ISA_IA32 : DR_ISA_AMD64); - bool instr_valid = false; - if (decode(drir.get_drcontext(), insn.raw, instr) != nullptr) - instr_valid = true; - instr_set_translation(instr, (app_pc)insn.ip); - instr_allocate_raw_bits(drir.get_drcontext(), instr, insn.size); - if (!instr_valid) { - /* The decode() function will not correctly identify the raw bits for - * invalid instruction. So we need to set the raw bits of instr manually. - */ - instr_free_raw_bits(drir.get_drcontext(), instr); - instr_set_raw_bits(instr, insn.raw, insn.size); - instr_allocate_raw_bits(drir.get_drcontext(), instr, insn.size); + app_pc instr_ip = reinterpret_cast(insn.ip); + if (decode_from_copy(drir->get_drcontext(), insn.raw, instr_ip, instr) == + nullptr) { #ifdef DEBUG - /* Print the invalid instruction‘s PC and raw bytes in DEBUG builds. */ if (verbosity_ >= 1) { fprintf(stderr, @@ -409,7 +399,7 @@ pt2ir_t::convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_ } #endif } - drir.append(instr); + drir->append(instr, instr_ip, insn.size, insn.raw); } } return PT2IR_CONV_SUCCESS; diff --git a/clients/drcachesim/drpt2trace/pt2ir.h b/clients/drcachesim/drpt2trace/pt2ir.h index a1e2f49f01e..02ec9a0a4f6 100644 --- a/clients/drcachesim/drpt2trace/pt2ir.h +++ b/clients/drcachesim/drpt2trace/pt2ir.h @@ -365,7 +365,7 @@ class pt2ir_t { */ pt2ir_convert_status_t convert(DR_PARAM_IN const uint8_t *pt_data, DR_PARAM_IN size_t pt_data_size, - DR_PARAM_INOUT drir_t &drir); + DR_PARAM_INOUT drir_t *drir); private: /* Diagnose converting errors and output diagnostic results. diff --git a/clients/drcachesim/drpt2trace/test_simple.expect b/clients/drcachesim/drpt2trace/test_simple.expect deleted file mode 100644 index c55837c430b..00000000000 --- a/clients/drcachesim/drpt2trace/test_simple.expect +++ /dev/null @@ -1,14 +0,0 @@ -TAG 0x0000000000000000 - +0 L2 b8 01 00 00 00 mov $0x00000001 -> %eax - +5 L2 bf 01 00 00 00 mov $0x00000001 -> %edi - +10 L2 48 be 00 20 40 00 00 mov $0x0000000000402000 -> %rsi - 00 00 00 - +20 L2 ba 0e 00 00 00 mov $0x0000000e -> %edx - +25 L2 0f 05 syscall -> %rcx %r11 - +27 L2 b8 3c 00 00 00 mov $0x0000003c -> %eax - +32 L2 bf 00 00 00 00 mov $0x00000000 -> %edi - +37 L2 0f 05 syscall -> %rcx %r11 -END 0x0000000000000000 - -Number of Instructions: 8 -Number of Trace Entries: 8 diff --git a/clients/drcachesim/drpt2trace/test_simple.templatex b/clients/drcachesim/drpt2trace/test_simple.templatex new file mode 100644 index 00000000000..7dc05ea81df --- /dev/null +++ b/clients/drcachesim/drpt2trace/test_simple.templatex @@ -0,0 +1,14 @@ +TAG 0x0000000000000000 + \+0 L3 .* mov \$0x00000001 -> %eax + \+5 L3 .* mov \$0x00000001 -> %edi + \+10 L3 .* \$0x0000000000402000 -> %rsi + .* + \+20 L3 .* mov \$0x0000000e -> %edx + \+25 L3 .* syscall -> %rcx %r11 + \+27 L3 .* mov \$0x0000003c -> %eax + \+32 L3 .* mov \$0x00000000 -> %edi + \+37 L3 .* syscall -> %rcx %r11 +END 0x0000000000000000 +.* +Number of Instructions: 8 +Number of Trace Entries: 8 diff --git a/clients/drcachesim/reader/reader.cpp b/clients/drcachesim/reader/reader.cpp index 1cd157ab44a..783d2a44bb8 100644 --- a/clients/drcachesim/reader/reader.cpp +++ b/clients/drcachesim/reader/reader.cpp @@ -321,8 +321,7 @@ reader_t::process_input_entry() version_ = cur_ref_.marker.marker_value; else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_FILETYPE) { filetype_ = cur_ref_.marker.marker_value; - if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, filetype_) && - !TESTANY(OFFLINE_FILE_TYPE_KERNEL_SYSCALLS, filetype_)) { + if (TESTANY(OFFLINE_FILE_TYPE_ENCODINGS, filetype_)) { expect_no_encodings_ = false; } } else if (cur_ref_.marker.marker_type == TRACE_MARKER_TYPE_CACHE_LINE_SIZE) diff --git a/clients/drcachesim/tests/offline-kernel-opcode-mix.templatex b/clients/drcachesim/tests/offline-kernel-opcode-mix.templatex new file mode 100644 index 00000000000..ce75a56588d --- /dev/null +++ b/clients/drcachesim/tests/offline-kernel-opcode-mix.templatex @@ -0,0 +1,6 @@ +Hello, world! +Opcode mix tool results: +.*: total executed instructions +.* +.*: .*clac +.* diff --git a/clients/drcachesim/tests/offline-kernel-syscall-mix.templatex b/clients/drcachesim/tests/offline-kernel-syscall-mix.templatex new file mode 100644 index 00000000000..8582336ef38 --- /dev/null +++ b/clients/drcachesim/tests/offline-kernel-syscall-mix.templatex @@ -0,0 +1,6 @@ +Hello, world! +Syscall mix tool results: + syscall count : syscall_num +.* + syscall trace count : syscall_num +.* diff --git a/clients/drcachesim/tests/offline-syscall-mix.templatex b/clients/drcachesim/tests/offline-syscall-mix.templatex index 231b6bb93d2..a02a9ca3e1e 100644 --- a/clients/drcachesim/tests/offline-syscall-mix.templatex +++ b/clients/drcachesim/tests/offline-syscall-mix.templatex @@ -1,4 +1,4 @@ Hello, world! Syscall mix tool results: - count : syscall_num + syscall count : syscall_num ( *[1-9][0-9]* : *[0-9]*.*)+ diff --git a/clients/drcachesim/tests/syscall-mix.templatex b/clients/drcachesim/tests/syscall-mix.templatex index 59ae08aad9e..e58b2ec7da2 100644 --- a/clients/drcachesim/tests/syscall-mix.templatex +++ b/clients/drcachesim/tests/syscall-mix.templatex @@ -1,5 +1,5 @@ Hello, world! ---- ---- Syscall mix tool results: - count : syscall_num + syscall count : syscall_num ( *[1-9][0-9]* : *[0-9]*.*)+ diff --git a/clients/drcachesim/tools/syscall_mix.cpp b/clients/drcachesim/tools/syscall_mix.cpp index 2885e37e364..a18c6f1e5ed 100644 --- a/clients/drcachesim/tools/syscall_mix.cpp +++ b/clients/drcachesim/tools/syscall_mix.cpp @@ -111,14 +111,21 @@ bool syscall_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref) { shard_data_t *shard = reinterpret_cast(shard_data); - if (memref.marker.type != TRACE_TYPE_MARKER || - memref.marker.marker_type != TRACE_MARKER_TYPE_SYSCALL) - return true; - int syscall_num = static_cast(memref.marker.marker_value); + if (memref.marker.type == TRACE_TYPE_MARKER && + memref.marker.marker_type == TRACE_MARKER_TYPE_SYSCALL) { + int syscall_num = static_cast(memref.marker.marker_value); #ifdef X64 - assert(static_cast(syscall_num) == memref.marker.marker_value); + assert(static_cast(syscall_num) == memref.marker.marker_value); #endif - ++shard->syscall_counts[syscall_num]; + ++shard->syscall_counts[syscall_num]; + } else if (memref.marker.type == TRACE_TYPE_MARKER && + memref.marker.marker_type == TRACE_MARKER_TYPE_SYSCALL_TRACE_START) { + int syscall_num = static_cast(memref.marker.marker_value); +#ifdef X64 + assert(static_cast(syscall_num) == memref.marker.marker_value); +#endif + ++shard->syscall_trace_counts[syscall_num]; + } return true; } @@ -142,6 +149,8 @@ syscall_mix_t::process_memref(const memref_t &memref) static bool cmp_second_val(const std::pair &l, const std::pair &r) { + if (l.second == r.second) + return l.first > r.first; return l.second > r.second; } @@ -156,10 +165,13 @@ syscall_mix_t::print_results() for (const auto &keyvals : shard.second->syscall_counts) { total.syscall_counts[keyvals.first] += keyvals.second; } + for (const auto &keyvals : shard.second->syscall_trace_counts) { + total.syscall_trace_counts[keyvals.first] += keyvals.second; + } } } std::cerr << TOOL_NAME << " results:\n"; - std::cerr << std::setw(15) << "count" + std::cerr << std::setw(15) << "syscall count" << " : " << std::setw(9) << "syscall_num\n"; std::vector> sorted(total.syscall_counts.begin(), total.syscall_counts.end()); @@ -170,6 +182,19 @@ syscall_mix_t::print_results() std::cerr << std::setw(15) << keyvals.second << " : " << std::setw(9) << keyvals.first << "\n"; } + if (!total.syscall_trace_counts.empty()) { + std::cerr << std::setw(20) << "syscall trace count" + << " : " << std::setw(9) << "syscall_num\n"; + std::vector> sorted_trace( + total.syscall_trace_counts.begin(), total.syscall_trace_counts.end()); + std::sort(sorted_trace.begin(), sorted_trace.end(), cmp_second_val); + for (const auto &keyvals : sorted_trace) { + // XXX: It would be nicer to print the system call name string instead + // of its number. + std::cerr << std::setw(20) << keyvals.second << " : " << std::setw(9) + << keyvals.first << "\n"; + } + } return true; } diff --git a/clients/drcachesim/tools/syscall_mix.h b/clients/drcachesim/tools/syscall_mix.h index 7dc42a1a3c4..04cfb449f81 100644 --- a/clients/drcachesim/tools/syscall_mix.h +++ b/clients/drcachesim/tools/syscall_mix.h @@ -71,6 +71,7 @@ class syscall_mix_t : public analysis_tool_t { protected: struct shard_data_t { std::unordered_map syscall_counts; + std::unordered_map syscall_trace_counts; std::string error; }; diff --git a/clients/drcachesim/tools/view.cpp b/clients/drcachesim/tools/view.cpp index 2c77e04d682..3a07e944671 100644 --- a/clients/drcachesim/tools/view.cpp +++ b/clients/drcachesim/tools/view.cpp @@ -418,10 +418,12 @@ view_t::parallel_shard_memref(void *shard_data, const memref_t &memref) // Handled above. break; case TRACE_MARKER_TYPE_SYSCALL_TRACE_START: - std::cerr << "\n"; + std::cerr << "\n"; break; case TRACE_MARKER_TYPE_SYSCALL_TRACE_END: - std::cerr << "\n"; + std::cerr << "\n"; break; case TRACE_MARKER_TYPE_BRANCH_TARGET: // These are not expected to be visible (since the reader adds them diff --git a/clients/drcachesim/tracer/raw2trace.cpp b/clients/drcachesim/tracer/raw2trace.cpp index c6dd6439d93..f38c9e079b2 100644 --- a/clients/drcachesim/tracer/raw2trace.cpp +++ b/clients/drcachesim/tracer/raw2trace.cpp @@ -1008,9 +1008,12 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall } /* Convert the PT Data to DR IR. */ - drir_t drir(GLOBAL_DCONTEXT); - pt2ir_convert_status_t pt2ir_convert_status = - tdata->pt2ir.convert(pt_data->data.get(), pt_data_size, drir); + if (tdata->pt_decode_state_ == nullptr) { + tdata->pt_decode_state_ = std::unique_ptr(new drir_t(GLOBAL_DCONTEXT)); + } + tdata->pt_decode_state_->clear_ilist(); + pt2ir_convert_status_t pt2ir_convert_status = tdata->pt2ir.convert( + pt_data->data.get(), pt_data_size, tdata->pt_decode_state_.get()); if (pt2ir_convert_status != PT2IR_CONV_SUCCESS) { tdata->error = "Failed to convert PT raw trace to DR IR [error status: " + std::to_string(pt2ir_convert_status) + "]"; @@ -1018,13 +1021,15 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall } /* Convert the DR IR to trace entries. */ + addr_t sysnum = + pt_data->header[dynamorio::drmemtrace::PDB_HEADER_SYSNUM_IDX].sysnum.sysnum; std::vector entries; trace_entry_t start_entry = { .type = TRACE_TYPE_MARKER, .size = TRACE_MARKER_TYPE_SYSCALL_TRACE_START, - .addr = 0 }; + .addr = sysnum }; entries.push_back(start_entry); ir2trace_convert_status_t ir2trace_convert_status = - ir2trace_t::convert(drir, entries); + ir2trace_t::convert(tdata->pt_decode_state_.get(), entries); if (ir2trace_convert_status != IR2TRACE_CONV_SUCCESS) { tdata->error = "Failed to convert DR IR to trace entries [error status: " + std::to_string(ir2trace_convert_status) + "]"; @@ -1032,7 +1037,7 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall } trace_entry_t end_entry = { .type = TRACE_TYPE_MARKER, .size = TRACE_MARKER_TYPE_SYSCALL_TRACE_END, - .addr = 0 }; + .addr = sysnum }; entries.push_back(end_entry); if (entries.size() == 2) { tdata->error = "No trace entries generated from PT data"; @@ -1040,17 +1045,40 @@ raw2trace_t::process_syscall_pt(raw2trace_thread_data_t *tdata, uint64_t syscall } accumulate_to_statistic(tdata, RAW2TRACE_STAT_SYSCALL_TRACES_DECODED, 1); + app_pc saved_decode_pc; + trace_entry_t entries_with_encodings[WRITE_BUFFER_SIZE]; + trace_entry_t *buf = entries_with_encodings; for (const auto &entry : entries) { - if (type_is_instr(static_cast(entry.type))) + if (type_is_instr(static_cast(entry.type))) { + if (buf != entries_with_encodings) { + if (!write(tdata, entries_with_encodings, buf, &saved_decode_pc, 1)) { + return false; + } + buf = entries_with_encodings; + } accumulate_to_statistic(tdata, RAW2TRACE_STAT_KERNEL_INSTR_COUNT, 1); + // The per-thread drir_t object (pt_decode_state_) keeps instr encoding + // state across system calls. So different dynamic instances of the same + // instruction in system calls will have the same decode_pc. + saved_decode_pc = tdata->pt_decode_state_->get_decode_pc( + reinterpret_cast(entry.addr)); + if (saved_decode_pc == nullptr) { + tdata->error = + "Unknown pc after ir2trace: did ir2trace insert new instr?"; + return false; + } + if (!append_encoding(tdata, saved_decode_pc, entry.size, buf, + entries_with_encodings)) + return false; + } + *buf = entry; + ++buf; } - - if (!tdata->out_file->write(reinterpret_cast(entries.data()), - sizeof(trace_entry_t) * entries.size())) { - tdata->error = "Failed to write to output file"; - return false; + if (buf != entries_with_encodings) { + if (!write(tdata, entries_with_encodings, buf, &saved_decode_pc, 1)) { + return false; + } } - return true; } #endif diff --git a/clients/drcachesim/tracer/raw2trace.h b/clients/drcachesim/tracer/raw2trace.h index dfdbd333cad..6c451242284 100644 --- a/clients/drcachesim/tracer/raw2trace.h +++ b/clients/drcachesim/tracer/raw2trace.h @@ -1080,6 +1080,7 @@ class raw2trace_t { std::vector rseq_decode_pcs_; #ifdef BUILD_PT_POST_PROCESSOR + std::unique_ptr pt_decode_state_ = nullptr; std::istream *kthread_file; bool pt_metadata_processed = false; pt2ir_t pt2ir; diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index 1d60cb6d480..1ecdc74f66d 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -4554,7 +4554,7 @@ if (BUILD_CLIENTS) if (proc_supports_pt) if (BUILD_PT_TRACER AND BUILD_PT_POST_PROCESSOR) get_target_path_for_execution(drpt2trace_path drpt2trace "${location_suffix}") - macro (torunonly_drcacheoff_kernel testname exetgt extra_ops app_args) + macro (torunonly_drcacheoff_kernel testname exetgt extra_ops app_args sim_atops) set(testname_full "tool.drcacheoff.kernel.${testname}_SUDO") torunonly_ci(${testname_full} ${exetgt} drcachesim "offline-kernel-${testname}.c" # for templatex basename @@ -4571,13 +4571,18 @@ if (BUILD_CLIENTS) set(${testname_full}_precmd "foreach@${cmd_pfx}${CMAKE_COMMAND}@-E@remove_directory@${testname_full}.*.dir") set(${testname_full}_postcmd - "firstglob@${cmd_pfx}${drcachesim_path}@-simulator_type@basic_counts@-indir@${testname_full}.*.dir${sim_atops}") + "firstglob@${cmd_pfx}${drcachesim_path}@-indir@${testname_full}.*.dir${sim_atops}") endmacro () # We use '-raw_compress none' because when snappy or lz4 is used for raw traces, # the check that complains about malloc use in the client is disabled by invoking # dr_allow_unsafe_static_behavior. We want to perform this check on the kernel # tracing flow. - torunonly_drcacheoff_kernel(simple ${ci_shared_app} "-raw_compress none" "") + torunonly_drcacheoff_kernel(simple ${ci_shared_app} "-raw_compress none" "" + "@-simulator_type@basic_counts") + torunonly_drcacheoff_kernel(opcode-mix ${ci_shared_app} "-raw_compress none" "" + "@-simulator_type@opcode_mix") + torunonly_drcacheoff_kernel(syscall-mix ${ci_shared_app} "-raw_compress none" "" + "@-simulator_type@syscall_mix") endif (BUILD_PT_TRACER AND BUILD_PT_POST_PROCESSOR) endif (proc_supports_pt) @@ -4704,7 +4709,7 @@ if (BUILD_CLIENTS) "-sb_sysroot" "${PROJECT_SOURCE_DIR}/clients/drcachesim/drpt2trace/test_simple.raw") torunonly_api(tool.drpt2trace.sideband drpt2trace - "../../clients/drcachesim/drpt2trace/test_simple.expect" + "../../clients/drcachesim/drpt2trace/test_simple.templatex" "" "${drpt2trace_sideband_args}" ON OFF) set(drpt2trace_elf_args ${drpt2trace_commong} "-mode" "ELF" @@ -4712,7 +4717,7 @@ if (BUILD_CLIENTS) "-elf" "${PROJECT_SOURCE_DIR}/clients/drcachesim/drpt2trace/test_simple.raw/hello") torunonly_api(tool.drpt2trace.elf drpt2trace - "../../clients/drcachesim/drpt2trace/test_simple.expect" + "../../clients/drcachesim/drpt2trace/test_simple.templatex" "" "${drpt2trace_elf_args}" ON OFF) endif (BUILD_PT_TRACER AND BUILD_PT_POST_PROCESSOR) endif (BUILD_CLIENTS)