Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i#7113 decode cache: Add analyzer library for decode_cache_t #7114

Open
wants to merge 47 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
fca712e
i#7113: Add library to cache information about decoded instructions
abhinav92003 Dec 9, 2024
abebffc
Docx improvement, and handle regdeps branch_target case.
abhinav92003 Dec 9, 2024
18f7028
Use instr_noalloc_t where possible.
abhinav92003 Dec 10, 2024
4487168
Remove redundant test.
abhinav92003 Dec 10, 2024
41595eb
move impl to cpp
abhinav92003 Dec 10, 2024
d2e94c7
Move impl to cpp
abhinav92003 Dec 10, 2024
f0f8a74
Cleanup and aarch64 mov fix.
abhinav92003 Dec 10, 2024
a1b1d63
Fix windows bug
abhinav92003 Dec 10, 2024
db8a3ad
Reviewer suggested changes
abhinav92003 Dec 10, 2024
1e810b5
Cleanup
abhinav92003 Dec 10, 2024
1fc4c04
Merge branch 'master' into i7113-decode-cache-lib
abhinav92003 Dec 14, 2024
bf76f70
Merge branch 'master' into i7113-decode-cache-lib
abhinav92003 Dec 15, 2024
45e062f
Add instr_decode_cache_t support to opcode_mix; add module_mapper_t s…
abhinav92003 Dec 15, 2024
5e28112
Drop instr_ from instr_decode_cache
abhinav92003 Dec 15, 2024
0a33a51
Handle missing use_module_mapper case
abhinav92003 Dec 15, 2024
29d10a3
Fix clang-format
abhinav92003 Dec 15, 2024
0e2df67
Make add_decode_info simpler and fix build error
abhinav92003 Dec 15, 2024
716a0ea
Cleanup
abhinav92003 Dec 15, 2024
fefe38b
Proactive destruction of module mapper
abhinav92003 Dec 16, 2024
2f0a708
Remove stale file
abhinav92003 Dec 16, 2024
84a2039
Move impl to cpp
abhinav92003 Dec 16, 2024
141e3c5
Fix when we use module mapper in opcode mix
abhinav92003 Dec 16, 2024
1092a21
Revert view deps
abhinav92003 Dec 16, 2024
b2ba91c
Use filetype instead of encoding_is_new
abhinav92003 Dec 16, 2024
d70e227
Cleanup
abhinav92003 Dec 16, 2024
d51d823
Add tmate to windows test
abhinav92003 Dec 16, 2024
0000c5b
Remove test filter
abhinav92003 Dec 16, 2024
3737ec5
Add missing standalone_init
abhinav92003 Dec 16, 2024
652bab0
Add tmate again
abhinav92003 Dec 16, 2024
1177304
Remove drmemtrace_static from test deps
abhinav92003 Dec 16, 2024
96efb50
Keep obj count tracking for tests
abhinav92003 Dec 16, 2024
31e1eab
Keep only one bool for use_module_mapper
abhinav92003 Dec 16, 2024
3702f29
Convert to doc comment
abhinav92003 Dec 16, 2024
d7a4d10
Add tmate... again
abhinav92003 Dec 16, 2024
57f34ad
Disable module mapper tests on Windows due to i#5960
abhinav92003 Dec 17, 2024
3f9cc4e
Remove tmate
abhinav92003 Dec 17, 2024
44971f6
Add TODO for some future items
abhinav92003 Dec 17, 2024
3042d6b
More apt function visibility
abhinav92003 Dec 17, 2024
2b301b5
Merge branch 'master' into i7113-decode-cache-lib
abhinav92003 Dec 19, 2024
4157f23
Reviewer suggested changes
abhinav92003 Dec 19, 2024
0f50370
Add clear_cache API for parallel_shard_exit
abhinav92003 Dec 19, 2024
8dc950c
Add optimization to avoid repeated module map lookups
abhinav92003 Dec 19, 2024
160e052
Remove common-case opt. Need add_decode_info for new encodings
abhinav92003 Dec 19, 2024
74da310
Optimize lookups into the cache
abhinav92003 Dec 19, 2024
6bcc33b
Skip re-decoding on invalid cached decode info. It's redundant.
abhinav92003 Dec 19, 2024
b24d79a
Cleanup
abhinav92003 Dec 22, 2024
e175cd9
Avoid DecodeInfo object construction when not needed.
abhinav92003 Dec 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 34 additions & 3 deletions clients/drcachesim/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -161,19 +161,27 @@ add_exported_library(drmemtrace_reuse_distance STATIC tools/reuse_distance.cpp)
add_exported_library(drmemtrace_histogram STATIC tools/histogram.cpp)
add_exported_library(drmemtrace_reuse_time STATIC tools/reuse_time.cpp)
add_exported_library(drmemtrace_basic_counts STATIC tools/basic_counts.cpp)
add_exported_library(drmemtrace_opcode_mix STATIC
tools/opcode_mix.cpp tracer/raw2trace_shared.cpp)
add_exported_library(drmemtrace_opcode_mix STATIC tools/opcode_mix.cpp)
add_exported_library(drmemtrace_syscall_mix STATIC tools/syscall_mix.cpp)
add_exported_library(drmemtrace_view STATIC
tools/view.cpp tracer/raw2trace_shared.cpp)
add_exported_library(drmemtrace_func_view STATIC tools/func_view.cpp)
add_exported_library(drmemtrace_invariant_checker STATIC tools/invariant_checker.cpp)
add_exported_library(drmemtrace_schedule_stats STATIC tools/schedule_stats.cpp)
add_exported_library(drmemtrace_decode_cache STATIC
tools/common/decode_cache.cpp
# XXX: Possibly create a library for raw2trace_shared, to avoid
# multiple build overhead.
tracer/raw2trace_shared.cpp)
abhinav92003 marked this conversation as resolved.
Show resolved Hide resolved
add_exported_library(drmemtrace_schedule_file STATIC common/schedule_file.cpp)
add_exported_library(drmemtrace_mutex_dbg_owned STATIC common/mutex_dbg_owned.cpp)

target_link_libraries(drmemtrace_invariant_checker drdecode drmemtrace_schedule_file)
target_link_libraries(drmemtrace_invariant_checker drdecode drmemtrace_schedule_file
drmemtrace_decode_cache)
target_link_libraries(drmemtrace_decode_cache drcovlib_static)
target_link_libraries(drmemtrace_opcode_mix drmemtrace_decode_cache)

configure_DynamoRIO_standalone(drmemtrace_decode_cache)
configure_DynamoRIO_standalone(drmemtrace_opcode_mix)
configure_DynamoRIO_standalone(drmemtrace_view)
configure_DynamoRIO_standalone(drmemtrace_invariant_checker)
Expand Down Expand Up @@ -320,6 +328,7 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/reader)
# so that we can more cleanly separate tracer and raw2trace code.
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/tracer)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/scheduler)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/tools/common)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})

if (BUILD_PT_POST_PROCESSOR)
Expand Down Expand Up @@ -611,6 +620,7 @@ restore_nonclient_flags(drmemtrace_analyzer)
restore_nonclient_flags(drmemtrace_invariant_checker)
restore_nonclient_flags(drmemtrace_schedule_stats)
restore_nonclient_flags(drmemtrace_schedule_file)
restore_nonclient_flags(drmemtrace_decode_cache)

# We need to pass /EHsc and we pull in libcmtd into drcachesim from a dep lib.
# Thus we need to override the /MT with /MTd.
Expand Down Expand Up @@ -684,6 +694,7 @@ add_win32_flags(drmemtrace_analyzer)
add_win32_flags(drmemtrace_invariant_checker)
add_win32_flags(drmemtrace_schedule_stats)
add_win32_flags(drmemtrace_schedule_file)
add_win32_flags(drmemtrace_decode_cache)
add_win32_flags(directory_iterator)
add_win32_flags(test_helpers)
add_win32_flags(drmemtrace_mutex_dbg_owned)
Expand Down Expand Up @@ -942,6 +953,26 @@ if (BUILD_TESTS)
add_win32_flags(tool.drcacheoff.burst_aarch64_sys)
endif ()

add_executable(tool.drcachesim.decode_cache_test tests/decode_cache_test.cpp)
configure_DynamoRIO_standalone(tool.drcachesim.decode_cache_test)
add_win32_flags(tool.drcachesim.decode_cache_test)
target_link_libraries(tool.drcachesim.decode_cache_test
drmemtrace_decode_cache test_helpers)
add_test(NAME tool.drcachesim.decode_cache_test
COMMAND tool.drcachesim.decode_cache_test)
set_tests_properties(tool.drcachesim.decode_cache_test PROPERTIES
TIMEOUT ${test_seconds})

add_executable(tool.drcacheoff.opcode_mix_test tests/opcode_mix_test.cpp)
configure_DynamoRIO_standalone(tool.drcacheoff.opcode_mix_test)
add_win32_flags(tool.drcacheoff.opcode_mix_test)
target_link_libraries(tool.drcacheoff.opcode_mix_test
drmemtrace_opcode_mix drmemtrace_decode_cache test_helpers)
add_test(NAME tool.drcacheoff.opcode_mix_test
COMMAND tool.drcacheoff.opcode_mix_test)
set_tests_properties(tool.drcacheoff.opcode_mix_test PROPERTIES
TIMEOUT ${test_seconds})

# XXX i#1997: dynamorio_static is not supported on Mac yet
# FIXME i#2949: gcc 7.3 fails to link certain configs
# TODO i#3544: Port tests to RISC-V 64
Expand Down
303 changes: 303 additions & 0 deletions clients/drcachesim/tests/decode_cache_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,303 @@
/* **********************************************************
* Copyright (c) 2024 Google, LLC All rights reserved.
* **********************************************************/

/*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of Google, Inc. nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE, LLC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/

/* Tests for the decode_cache_t library. */

#include <iostream>
#include <vector>

#include "decode_cache.h"
#include "../common/memref.h"
#include "memref_gen.h"

namespace dynamorio {
namespace drmemtrace {

static constexpr addr_t TID_A = 1;
static constexpr offline_file_type_t ENCODING_FILE_TYPE =
static_cast<offline_file_type_t>(OFFLINE_FILE_TYPE_ENCODINGS);

class test_decode_info_t : public decode_info_base_t {
public:
bool is_nop_ = false;
bool is_ret_ = false;
bool is_ipt_ = false;
bool decode_info_set_ = false;

private:
void
set_decode_info_derived(void *dcontext,
const dynamorio::drmemtrace::_memref_instr_t &memref_instr,
instr_t *instr) override
{
// decode_cache_t should call set_decode_info only one time per object.
assert(!decode_info_set_);
is_nop_ = instr_is_nop(instr);
is_ret_ = instr_is_return(instr);
is_ipt_ = instr_is_interrupt(instr);
decode_info_set_ = true;
}
};

std::string
check_decode_caching(void *drcontext, bool persist_instrs, bool use_module_mapper)
{
static constexpr addr_t BASE_ADDR = 0x123450;
instr_t *nop = XINST_CREATE_nop(drcontext);
instr_t *ret = XINST_CREATE_return(drcontext);
instr_t *ipt = XINST_CREATE_interrupt(drcontext, OPND_CREATE_INT8(10));
instrlist_t *ilist = instrlist_create(drcontext);
instrlist_append(ilist, nop);
instrlist_append(ilist, ret);
instrlist_append(ilist, ipt);
std::vector<memref_with_IR_t> memref_setup = {
{ gen_instr(TID_A), nop },
{ gen_instr(TID_A), ret },
{ gen_instr(TID_A), nop },
{ gen_instr(TID_A), ipt },
};
std::vector<memref_t> memrefs;
instrlist_t *ilist_for_test_decode_cache = nullptr;
std::string module_file_for_test_decode_cache = "";
if (use_module_mapper) {
// This does not set encodings in the memref.instr.
memrefs = add_encodings_to_memrefs(ilist, memref_setup, 0,
/*set_only_instr_addr=*/true);
// We pass the instrs to construct the test_module_mapper_t in the
// test_decode_cache_t;
ilist_for_test_decode_cache = ilist;
module_file_for_test_decode_cache = "some_mod_file";
} else {
memrefs = add_encodings_to_memrefs(ilist, memref_setup, BASE_ADDR);
}

test_decode_info_t test_decode_info;
if (test_decode_info.is_valid()) {
return "Unexpected valid default-constructed decode info";
}

if (persist_instrs) {
// These are tests to verify the operation of instr_decode_info_t: that it stores
// the instr_t correctly.
// Tests for instr_decode_cache_t are done when persist_instrs = false (see
// the else part below).
test_decode_cache_t<instr_decode_info_t> decode_cache(
drcontext,
/*persist_decoded_instr=*/true, ilist_for_test_decode_cache);
std::string err =
decode_cache.init(ENCODING_FILE_TYPE, module_file_for_test_decode_cache, "");
if (err != "")
return err;
for (const memref_t &memref : memrefs) {
instr_decode_info_t *unused_cached_decode_info;
err = decode_cache.add_decode_info(memref.instr, unused_cached_decode_info);
if (err != "")
return err;
}
instr_decode_info_t *decode_info_nop =
decode_cache.get_decode_info(reinterpret_cast<app_pc>(memrefs[0].instr.addr));
if (decode_info_nop == nullptr || !decode_info_nop->is_valid() ||
!instr_is_nop(decode_info_nop->get_decoded_instr())) {
return "Unexpected instr_decode_info_t for nop instr";
}
instr_decode_info_t *decode_info_ret =
decode_cache.get_decode_info(reinterpret_cast<app_pc>(memrefs[1].instr.addr));
if (decode_info_ret == nullptr || !decode_info_ret->is_valid() ||
!instr_is_return(decode_info_ret->get_decoded_instr())) {
return "Unexpected instr_decode_info_t for ret instr";
}
} else {
// These are tests to verify the operation of instr_decode_cache_t: that it caches
// decode info correctly.
test_decode_cache_t<test_decode_info_t> decode_cache(
drcontext,
/*persist_decoded_instrs=*/false, ilist_for_test_decode_cache);
std::string err =
decode_cache.init(ENCODING_FILE_TYPE, module_file_for_test_decode_cache, "");
if (err != "")
return err;
// Test: Lookup non-existing pc.
if (decode_cache.get_decode_info(
reinterpret_cast<app_pc>(memrefs[0].instr.addr)) != nullptr) {
return "Unexpected test_decode_info_t for never-seen pc";
}

test_decode_info_t *cached_decode_info;

// Test: Lookup existing pc.
err = decode_cache.add_decode_info(memrefs[0].instr, cached_decode_info);
if (err != "")
return err;
test_decode_info_t *decode_info_nop =
decode_cache.get_decode_info(reinterpret_cast<app_pc>(memrefs[0].instr.addr));
if (decode_info_nop == nullptr || decode_info_nop != cached_decode_info ||
!decode_info_nop->is_valid() || !decode_info_nop->is_nop_) {
return "Unexpected test_decode_info_t for nop instr";
}

// Test: Lookup another existing pc.
err = decode_cache.add_decode_info(memrefs[1].instr, cached_decode_info);
if (err != "")
return err;
test_decode_info_t *decode_info_ret =
decode_cache.get_decode_info(reinterpret_cast<app_pc>(memrefs[1].instr.addr));
if (decode_info_ret == nullptr || decode_info_ret != cached_decode_info ||
!decode_info_ret->is_valid() || !decode_info_ret->is_ret_) {
return "Unexpected test_decode_info_t for ret instr";
}

// Test: Lookup existing pc but from a different memref.
// Set up the second nop memref to reuse the same encoding as the first nop.
memrefs[2].instr.encoding_is_new = false;
err = decode_cache.add_decode_info(memrefs[2].instr, cached_decode_info);
if (err != "")
return err;
test_decode_info_t *decode_info_nop_2 =
decode_cache.get_decode_info(reinterpret_cast<app_pc>(memrefs[2].instr.addr));
if (decode_info_nop_2 != decode_info_nop ||
decode_info_nop_2 != cached_decode_info) {
return "Unexpected decode info instance for second instance of nop";
}

if (!use_module_mapper) {
// Test: Overwrite existing decode info for a pc. Works only with embedded
// encodings.
// Pretend the interrupt is at the same trace pc as the ret.
// Encodings have been added to the memref already so this still remains
// an interrupt instruction even though we've modified addr.
memrefs[3].instr.addr = memrefs[1].instr.addr;
err = decode_cache.add_decode_info(memrefs[3].instr, cached_decode_info);
if (err != "")
return err;
test_decode_info_t *decode_info_ipt = decode_cache.get_decode_info(
reinterpret_cast<app_pc>(memrefs[3].instr.addr));
if (decode_info_ipt == nullptr || decode_info_ipt != cached_decode_info ||
!decode_info_ipt->is_valid() || !decode_info_ipt->is_ipt_ ||
decode_info_ipt->is_ret_) {
return "Unexpected test_decode_info_t for ipt instr";
}
decode_info_ret = decode_cache.get_decode_info(
reinterpret_cast<app_pc>(memrefs[1].instr.addr));
if (decode_info_ret != decode_info_ipt) {
return "Expected ret and ipt memref pcs to return the same decode info";
}
}

// Test: Verify all cached decode info gets cleared.
decode_cache.clear_cache();
decode_info_nop =
decode_cache.get_decode_info(reinterpret_cast<app_pc>(memrefs[0].instr.addr));
decode_info_ret =
decode_cache.get_decode_info(reinterpret_cast<app_pc>(memrefs[1].instr.addr));
if (decode_info_nop != nullptr || decode_info_ret != nullptr) {
return "Cached decode info not cleared after clear_cache()";
}
}
instrlist_clear_and_destroy(drcontext, ilist);
std::cerr << "check_decode_caching with persist_instrs: " << persist_instrs
<< ", use_module_mapper: " << use_module_mapper << " passed\n";
return "";
}

std::string
check_missing_module_mapper_and_no_encoding(void *drcontext)
{
memref_t instr = gen_instr(TID_A);
test_decode_cache_t<instr_decode_info_t> decode_cache(
drcontext,
/*persist_decoded_instr=*/true, /*ilist_for_test_module_mapper=*/nullptr);
instr_decode_info_t dummy;
// Initialize to non-nullptr for the test.
instr_decode_info_t *cached_decode_info = &dummy;

// Missing init before add_decode_info.
std::string err = decode_cache.add_decode_info(instr.instr, cached_decode_info);
if (err == "") {
return "Expected error at add_decode_info but did not get any";
}
if (cached_decode_info != nullptr) {
return "Expected returned reference cached_decode_info to be nullptr";
}

// init for a filetype without encodings, with no module file path either.
err = decode_cache.init(
static_cast<offline_file_type_t>(OFFLINE_FILE_TYPE_SYSCALL_NUMBERS), "", "");
if (err == "") {
return "Expected error at init but did not get any";
}
std::cerr << "check_missing_module_mapper passed\n";
return "";
}

int
test_main(int argc, const char *argv[])
{
void *drcontext = dr_standalone_init();
std::string err = check_decode_caching(drcontext, /*persist_instrs=*/false,
/*use_module_mapper=*/false);
if (err != "") {
std::cerr << err << "\n";
exit(1);
}
err = check_decode_caching(drcontext, /*persist_instrs=*/true,
/*use_module_mapper=*/false);
if (err != "") {
std::cerr << err << "\n";
exit(1);
}
#ifndef WINDOWS
// TODO i#5960: Enable these tests after the test-only Windows issue is
// fixed.
err = check_decode_caching(drcontext, /*persist_instrs=*/false,
/*use_module_mapper=*/true);
if (err != "") {
std::cerr << err << "\n";
exit(1);
}
err = check_decode_caching(drcontext, /*persist_instrs=*/true,
/*use_module_mapper=*/true);
if (err != "") {
std::cerr << err << "\n";
exit(1);
}
#endif
err = check_missing_module_mapper_and_no_encoding(drcontext);
if (err != "") {
std::cerr << err << "\n";
exit(1);
}

return 0;
}

} // namespace drmemtrace
} // namespace dynamorio
Loading
Loading