From 73aae264aa7a41bf930b110572ea8f91add93d1c Mon Sep 17 00:00:00 2001 From: Abhinav Anil Sharma Date: Sun, 15 Dec 2024 02:20:30 -0500 Subject: [PATCH] i#7113 decode cache: move module read into raw2trace_shared (#7124) Moves the module read functionality into raw2trace_shared. Refactors opcode_mix and view to use the new function instead of creating a raw2trace_directory_t. This is in preparation for further changes that include the module read logic in a new library (drmemtrace_decode_cache) that provides an instr decode cache (decode_cache_t). To avoid having to pull in the whole drmemtrace_raw2trace implementation into drmemtrace_decode_cache, which will end up including it in the invariant checker and everything that depends on, we separate out the module read logic into raw2trace_shared which is intended for such cases. While the raw2trace and raw2trace_shared implementation usually is made to deal with only file streams, we make an exception here for read_module_file() which uses DR file APIs instead. This is likely fine because this function is not intended for use with code that wants file streams, and it is better than other alternatives: moving raw2trace-using functionality out of raw2trace_directory_t so we can use raw2trace_directory_t in decode_cache_t (this would still add zlib/lz4 etc unnecessary file libs to drmemtrace_decode_cache), create yet another raw2trace_shared_non_stream.cpp for shared code that uses DR file APIs, or duplicating the read_module_file impl in decode_cache_t. This also removes raw2trace_directory_t::initialize_module_file() as that functionality is now provided by read_module_file in raw2trace_shared. Issue: #7113 --- api/docs/release.dox | 4 +++ clients/drcachesim/CMakeLists.txt | 8 +++--- clients/drcachesim/tools/opcode_mix.cpp | 20 +++++++++----- clients/drcachesim/tools/opcode_mix.h | 7 +++-- clients/drcachesim/tools/view.cpp | 18 ++++++++++--- clients/drcachesim/tools/view.h | 8 +++--- .../drcachesim/tracer/raw2trace_directory.cpp | 27 +++---------------- .../drcachesim/tracer/raw2trace_directory.h | 9 +------ .../drcachesim/tracer/raw2trace_shared.cpp | 26 +++++++++++++++++- clients/drcachesim/tracer/raw2trace_shared.h | 20 ++++++++++++++ make/DynamoRIOConfig.cmake.in | 0 11 files changed, 93 insertions(+), 54 deletions(-) mode change 100755 => 100644 make/DynamoRIOConfig.cmake.in diff --git a/api/docs/release.dox b/api/docs/release.dox index a0333e528e6..5b8faf839ed 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -134,6 +134,10 @@ changes: is the encoding of the removed instruction up to a pointer's length. Added #OFFLINE_FILE_VERSION_RETIRED_INSTRUCTIONS_ONLY to increase the trace version for drmemtraces with uncompleted instructions removed. + - Moved module file read logic into read_module_file() in raw2trace_shared, and + removed raw2trace_directory_t::initialize_module_file() since the + read_module_file() can be directly used without having to pull in the whole + raw2trace_directory_t. Further non-compatibility-affecting changes include: - Added X64 Linux support to dr_create_memory_dump(). This API has the same diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt index a67e5b64504..942e49847b8 100644 --- a/clients/drcachesim/CMakeLists.txt +++ b/clients/drcachesim/CMakeLists.txt @@ -161,9 +161,11 @@ add_exported_library(drmemtrace_reuse_distance STATIC tools/reuse_distance.cpp) add_exported_library(drmemtrace_histogram STATIC tools/histogram.cpp) add_exported_library(drmemtrace_reuse_time STATIC tools/reuse_time.cpp) add_exported_library(drmemtrace_basic_counts STATIC tools/basic_counts.cpp) -add_exported_library(drmemtrace_opcode_mix STATIC tools/opcode_mix.cpp) +add_exported_library(drmemtrace_opcode_mix STATIC + tools/opcode_mix.cpp tracer/raw2trace_shared.cpp) add_exported_library(drmemtrace_syscall_mix STATIC tools/syscall_mix.cpp) -add_exported_library(drmemtrace_view STATIC tools/view.cpp) +add_exported_library(drmemtrace_view STATIC + tools/view.cpp tracer/raw2trace_shared.cpp) add_exported_library(drmemtrace_func_view STATIC tools/func_view.cpp) add_exported_library(drmemtrace_invariant_checker STATIC tools/invariant_checker.cpp) add_exported_library(drmemtrace_schedule_stats STATIC tools/schedule_stats.cpp) @@ -1202,7 +1204,7 @@ if (BUILD_TESTS) add_executable(tool.drcacheoff.skip_unit_tests tests/skip_unit_tests.cpp) configure_DynamoRIO_standalone(tool.drcacheoff.skip_unit_tests) target_link_libraries(tool.drcacheoff.skip_unit_tests drmemtrace_analyzer - drmemtrace_view drmemtrace_raw2trace test_helpers) + drmemtrace_view test_helpers ${zlib_libs}) add_win32_flags(tool.drcacheoff.skip_unit_tests) use_DynamoRIO_extension(tool.drcacheoff.skip_unit_tests drreg_static) use_DynamoRIO_extension(tool.drcacheoff.skip_unit_tests drcovlib_static) diff --git a/clients/drcachesim/tools/opcode_mix.cpp b/clients/drcachesim/tools/opcode_mix.cpp index 07c712e09d0..6ff25819d31 100644 --- a/clients/drcachesim/tools/opcode_mix.cpp +++ b/clients/drcachesim/tools/opcode_mix.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2017-2023 Google, Inc. All rights reserved. + * Copyright (c) 2017-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -56,7 +56,7 @@ #include "dr_api.h" #include "memref.h" #include "raw2trace.h" -#include "raw2trace_directory.h" +#include "raw2trace_shared.h" #include "reader.h" #include "trace_entry.h" #include "utils.h" @@ -92,13 +92,16 @@ opcode_mix_t::initialize() return ""; // Legacy trace support where binaries are needed. // We do not support non-module code for such traces. - std::string error = directory_.initialize_module_file(module_file_path_); - if (!error.empty()) - return "Failed to initialize directory: " + error; + file_t modfile; + std::string error = read_module_file(module_file_path_, modfile, modfile_bytes_); + if (!error.empty()) { + return "Failed to read module file: " + error; + } module_mapper_ = - module_mapper_t::create(directory_.modfile_bytes_, nullptr, nullptr, nullptr, - nullptr, knob_verbose_, knob_alt_module_dir_); + module_mapper_t::create(modfile_bytes_, nullptr, nullptr, nullptr, nullptr, + knob_verbose_, knob_alt_module_dir_); module_mapper_->get_loaded_modules(); + dr_close_file(modfile); error = module_mapper_->get_last_error(); if (!error.empty()) return "Failed to load binaries: " + error; @@ -110,6 +113,9 @@ opcode_mix_t::~opcode_mix_t() for (auto &iter : shard_map_) { delete iter.second; } + if (modfile_bytes_ != nullptr) { + delete[] modfile_bytes_; + } } bool diff --git a/clients/drcachesim/tools/opcode_mix.h b/clients/drcachesim/tools/opcode_mix.h index d67fd985855..e07e7997b81 100644 --- a/clients/drcachesim/tools/opcode_mix.h +++ b/clients/drcachesim/tools/opcode_mix.h @@ -46,7 +46,6 @@ #include "analysis_tool.h" #include "memref.h" #include "raw2trace.h" -#include "raw2trace_directory.h" #include "trace_entry.h" namespace dynamorio { @@ -185,9 +184,9 @@ class opcode_mix_t : public analysis_tool_t { std::string module_file_path_; std::unique_ptr module_mapper_; std::mutex mapper_mutex_; - // We reference directory.modfile_bytes throughout operation, so its lifetime - // must match ours. - raw2trace_directory_t directory_; + // XXX: Perhaps module_mapper_t should be made to own the cleanup of + // modfile_bytes_. + char *modfile_bytes_ = nullptr; std::unordered_map shard_map_; // This mutex is only needed in parallel_shard_init. In all other accesses to diff --git a/clients/drcachesim/tools/view.cpp b/clients/drcachesim/tools/view.cpp index 4728c5629d9..264b2797f78 100644 --- a/clients/drcachesim/tools/view.cpp +++ b/clients/drcachesim/tools/view.cpp @@ -53,7 +53,7 @@ #include "memref.h" #include "memtrace_stream.h" #include "raw2trace.h" -#include "raw2trace_directory.h" +#include "raw2trace_shared.h" #include "trace_entry.h" #include "utils.h" @@ -124,9 +124,12 @@ view_t::initialize_stream(memtrace_stream_t *serial_stream) if (module_file_path_.empty()) { has_modules_ = false; } else { - std::string error = directory_.initialize_module_file(module_file_path_); + file_t modfile; + std::string error = read_module_file(module_file_path_, modfile, modfile_bytes_); if (!error.empty()) has_modules_ = false; + else + dr_close_file(modfile); } if (!has_modules_) { // Continue but omit disassembly to support cases where binaries are @@ -136,8 +139,8 @@ view_t::initialize_stream(memtrace_stream_t *serial_stream) // Legacy trace support where binaries are needed. // We do not support non-module code for such traces. module_mapper_ = - module_mapper_t::create(directory_.modfile_bytes_, nullptr, nullptr, nullptr, - nullptr, knob_verbose_, knob_alt_module_dir_); + module_mapper_t::create(modfile_bytes_, nullptr, nullptr, nullptr, nullptr, + knob_verbose_, knob_alt_module_dir_); module_mapper_->get_loaded_modules(); std::string error = module_mapper_->get_last_error(); if (!error.empty()) @@ -662,5 +665,12 @@ view_t::print_results() return true; } +view_t::~view_t() +{ + if (modfile_bytes_ != nullptr) { + delete[] modfile_bytes_; + } +} + } // namespace drmemtrace } // namespace dynamorio diff --git a/clients/drcachesim/tools/view.h b/clients/drcachesim/tools/view.h index 30d5f033e14..4ba6d4102ed 100644 --- a/clients/drcachesim/tools/view.h +++ b/clients/drcachesim/tools/view.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2018-2023 Google, Inc. All rights reserved. + * Copyright (c) 2018-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -47,7 +47,6 @@ #include "memref.h" #include "memtrace_stream.h" #include "raw2trace.h" -#include "raw2trace_directory.h" namespace dynamorio { namespace drmemtrace { @@ -61,6 +60,7 @@ class view_t : public analysis_tool_t { view_t(const std::string &module_file_path, uint64_t skip_refs, uint64_t sim_refs, const std::string &syntax, unsigned int verbose, const std::string &alt_module_dir = ""); + virtual ~view_t(); std::string initialize_stream(memtrace_stream_t *serial_stream) override; bool @@ -136,7 +136,9 @@ class view_t : public analysis_tool_t { // std::optional here. std::string module_file_path_; std::unique_ptr module_mapper_; - raw2trace_directory_t directory_; + // XXX: Perhaps module_mapper_t should be made to own the cleanup of + // modfile_bytes_. + char *modfile_bytes_ = nullptr; unsigned int knob_verbose_; int trace_version_; diff --git a/clients/drcachesim/tracer/raw2trace_directory.cpp b/clients/drcachesim/tracer/raw2trace_directory.cpp index cfd1880183e..fb9ec8b6b7b 100644 --- a/clients/drcachesim/tracer/raw2trace_directory.cpp +++ b/clients/drcachesim/tracer/raw2trace_directory.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2017-2023 Google, Inc. All rights reserved. + * Copyright (c) 2017-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -56,6 +56,7 @@ #include "directory_iterator.h" #include "dr_api.h" // Must be after windows.h. #include "raw2trace_directory.h" // Includes dr_api.h which must be after windows.h. +#include "raw2trace_shared.h" #include "reader.h" #include "raw2trace.h" #include "record_file_reader.h" @@ -433,22 +434,6 @@ raw2trace_directory_t::open_cpu_schedule_file() #endif } -std::string -raw2trace_directory_t::read_module_file(const std::string &modfilename) -{ - modfile_ = dr_open_file(modfilename.c_str(), DR_FILE_READ); - if (modfile_ == INVALID_FILE) - return "Failed to open module file " + modfilename; - uint64 modfile_size; - if (!dr_file_size(modfile_, &modfile_size)) - return "Failed to get module file size: " + modfilename; - size_t modfile_size_ = (size_t)modfile_size; - modfile_bytes_ = new char[modfile_size_]; - if (dr_read_file(modfile_, modfile_bytes_, modfile_size_) < (ssize_t)modfile_size_) - return "Didn't read whole module file " + modfilename; - return ""; -} - bool raw2trace_directory_t::is_window_subdir(const std::string &dir) { @@ -553,7 +538,7 @@ raw2trace_directory_t::initialize(const std::string &indir, const std::string &o } std::string modfilename = modfile_dir + std::string(DIRSEP) + DRMEMTRACE_MODULE_LIST_FILENAME; - std::string err = read_module_file(modfilename); + std::string err = read_module_file(modfilename, modfile_, modfile_bytes_); if (!err.empty()) return err; @@ -604,12 +589,6 @@ raw2trace_directory_t::initialize(const std::string &indir, const std::string &o return open_thread_files(); } -std::string -raw2trace_directory_t::initialize_module_file(const std::string &module_file_path) -{ - return read_module_file(module_file_path); -} - std::string raw2trace_directory_t::initialize_funclist_file( const std::string &funclist_file_path, diff --git a/clients/drcachesim/tracer/raw2trace_directory.h b/clients/drcachesim/tracer/raw2trace_directory.h index 1d490c6c4c7..c8de4f30928 100644 --- a/clients/drcachesim/tracer/raw2trace_directory.h +++ b/clients/drcachesim/tracer/raw2trace_directory.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2017-2023 Google, Inc. All rights reserved. + * Copyright (c) 2017-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -78,11 +78,6 @@ class raw2trace_directory_t { initialize(const std::string &indir, const std::string &outdir, const std::string &compress = DEFAULT_TRACE_COMPRESSION_TYPE, const std::string &syscall_template_file = ""); - // Use this instead of initialize() to only fill in modfile_bytes, for - // constructing a module_mapper_t. Returns "" on success or an error message on - // failure. - std::string - initialize_module_file(const std::string &module_file_path); // Use this instead of initialize() to only read the funcion map file. // Returns "" on success or an error message on failure. // On success, pushes the parsed entries from the file into "entries". @@ -115,8 +110,6 @@ class raw2trace_directory_t { std::string trace_suffix(); std::string - read_module_file(const std::string &modfilename); - std::string open_thread_files(); std::string open_thread_log_file(const char *basename); diff --git a/clients/drcachesim/tracer/raw2trace_shared.cpp b/clients/drcachesim/tracer/raw2trace_shared.cpp index 5421a400ed6..ca2b17e82b2 100644 --- a/clients/drcachesim/tracer/raw2trace_shared.cpp +++ b/clients/drcachesim/tracer/raw2trace_shared.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2016-2023 Google, Inc. All rights reserved. + * Copyright (c) 2016-2024 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -166,6 +166,30 @@ drmemtrace_get_timestamp_from_offline_trace(const void *trace, size_t trace_size return DRMEMTRACE_SUCCESS; } +std::string +read_module_file(const std::string &modfilename, file_t &modfile, char *&modfile_bytes) +{ + modfile = dr_open_file(modfilename.c_str(), DR_FILE_READ); + if (modfile == INVALID_FILE) + return "Failed to open module file " + modfilename; + uint64 modfile_size; + if (!dr_file_size(modfile, &modfile_size)) { + dr_close_file(modfile); + modfile = INVALID_FILE; + return "Failed to get module file size: " + modfilename; + } + size_t modfile_size_ = (size_t)modfile_size; + modfile_bytes = new char[modfile_size_]; + if (dr_read_file(modfile, modfile_bytes, modfile_size_) < (ssize_t)modfile_size_) { + dr_close_file(modfile); + delete[] modfile_bytes; + modfile = INVALID_FILE; + modfile_bytes = nullptr; + return "Didn't read whole module file " + modfilename; + } + return ""; +} + // The output range is really a segment and not the whole module. app_pc module_mapper_t::find_mapped_trace_bounds(app_pc trace_address, diff --git a/clients/drcachesim/tracer/raw2trace_shared.h b/clients/drcachesim/tracer/raw2trace_shared.h index 18b08f6dc43..47952f9e6f5 100644 --- a/clients/drcachesim/tracer/raw2trace_shared.h +++ b/clients/drcachesim/tracer/raw2trace_shared.h @@ -159,6 +159,26 @@ class memref_counter_t : public reader_t { std::list list_; }; +/** + * Reads the module file at the given \p modfilename path. Returns an + * empty string if successful, or the error string if not. + * + * This uses the DR file APIs to read the module file. Generally we + * attempt to isolate implementation that uses DR file APIs to + * raw2trace_directory_t, and have raw2trace and raw2trace_shared + * deal only with file streams, but we make an exception here. This + * is acceptable because no current users of read_module_file + * need to read it from a stream. Also, this is a simple convenience + * routine that only reads the file without any module file specific + * logic. + * + * If successful, the returned \p modfile must be closed using + * \p dr_close_file, and the returned \p modefilebytes must be freed + * using a delete[]. + */ +std::string +read_module_file(const std::string &modfilename, file_t &modfile, char *&modfile_bytes); + struct module_t { module_t(const char *path, app_pc orig, byte *map, size_t offs, size_t size, size_t total_size, bool external = false) diff --git a/make/DynamoRIOConfig.cmake.in b/make/DynamoRIOConfig.cmake.in old mode 100755 new mode 100644