-
Notifications
You must be signed in to change notification settings - Fork 568
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
i#6934: Initial commit for Memory trace synthesis framework from Instruction Replay, useful for kernel memtracing and beyond #6931
base: master
Are you sure you want to change the base?
Changes from all commits
96f4003
41666ef
f74c632
3590f1f
048ddc0
6014432
c3d03a4
06b01cb
6070f3e
0c97302
6c530a8
b94fb08
34b7d22
3e922fd
0f9d8b7
7e95684
de2bdb7
b7d7643
1b68d54
8fb4f2a
69358fc
a5f385d
d755ba0
dba42fe
e91e70f
0d4f797
fe51787
a08951b
76c6ece
8327781
b88f9a8
55ed02b
e123028
b792ce0
016fc28
95858a6
8570c13
0e0f3bc
659bc21
10fc010
be94e8a
ed31e38
33447cf
550f38e
93205a2
1e90f2a
4b5fe4d
b48e640
70f1930
0148792
25b98f4
148fddd
0121325
0836c4a
94bc3bc
9cf3f86
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -168,6 +168,11 @@ add_exported_library(drmemtrace_func_view STATIC tools/func_view.cpp) | |
add_exported_library(drmemtrace_invariant_checker STATIC tools/invariant_checker.cpp) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please see https://dynamorio.org/page_code_reviews.html#sec_commit_messages on conventions on PR description title and body. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see. So should I open a new issue to acquire an issue number? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, as I don't think one exists yet. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be a lot easier to review this prototype code with a good overview and description. Please edit the PR description to describe the components being added here and what each one does. |
||
add_exported_library(drmemtrace_schedule_stats STATIC tools/schedule_stats.cpp) | ||
add_exported_library(drmemtrace_schedule_file STATIC common/schedule_file.cpp) | ||
add_exported_library(drmemtrace_access_region STATIC tools/access_region.cpp) | ||
add_exported_library(drmemtrace_reuse_pattern STATIC tools/reuse_pattern.cpp) | ||
|
||
add_subdirectory(mirage) | ||
target_link_libraries(drmemtrace_reuse_pattern mirage) | ||
|
||
target_link_libraries(drmemtrace_invariant_checker drdecode drmemtrace_schedule_file) | ||
|
||
|
@@ -213,6 +218,7 @@ if (BUILD_PT_POST_PROCESSOR) | |
add_subdirectory(drpt2trace) | ||
endif (BUILD_PT_POST_PROCESSOR) | ||
|
||
|
||
set(raw2trace_srcs | ||
tracer/raw2trace.cpp | ||
tracer/raw2trace_shared.cpp | ||
|
@@ -284,7 +290,7 @@ target_link_libraries(drmemtrace_launcher drmemtrace_simulator drmemtrace_reuse_ | |
drmemtrace_histogram drmemtrace_reuse_time drmemtrace_basic_counts | ||
drmemtrace_opcode_mix drmemtrace_syscall_mix drmemtrace_view drmemtrace_func_view | ||
drmemtrace_raw2trace directory_iterator drmemtrace_invariant_checker | ||
drmemtrace_schedule_stats drmemtrace_record_filter) | ||
drmemtrace_schedule_stats drmemtrace_record_filter drmemtrace_access_region drmemtrace_reuse_pattern) | ||
if (UNIX) | ||
target_link_libraries(drmemtrace_launcher dl) | ||
endif () | ||
|
@@ -316,6 +322,7 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}) | |
if (BUILD_PT_POST_PROCESSOR) | ||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/drpt2trace) | ||
endif (BUILD_PT_POST_PROCESSOR) | ||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/mirage) | ||
|
||
add_exported_library(drmemtrace_analyzer STATIC | ||
analyzer.cpp | ||
|
@@ -366,6 +373,8 @@ install_client_nonDR_header(drmemtrace tools/view_create.h) | |
install_client_nonDR_header(drmemtrace tools/func_view_create.h) | ||
install_client_nonDR_header(drmemtrace tools/filter/record_filter_create.h) | ||
install_client_nonDR_header(drmemtrace tools/filter/record_filter.h) | ||
install_client_nonDR_header(drmemtrace tools/access_region_create.h) | ||
install_client_nonDR_header(drmemtrace tools/reuse_pattern_create.h) | ||
# TODO i#6412: Create a separate directory for non-tracer headers so that | ||
# we can more cleanly separate tracer and raw2trace code. | ||
install_client_nonDR_header(drmemtrace tracer/raw2trace.h) | ||
|
@@ -596,6 +605,8 @@ restore_nonclient_flags(drmemtrace_analyzer) | |
restore_nonclient_flags(drmemtrace_invariant_checker) | ||
restore_nonclient_flags(drmemtrace_schedule_stats) | ||
restore_nonclient_flags(drmemtrace_schedule_file) | ||
restore_nonclient_flags(drmemtrace_access_region) | ||
restore_nonclient_flags(drmemtrace_reuse_pattern) | ||
|
||
# We need to pass /EHsc and we pull in libcmtd into drcachesim from a dep lib. | ||
# Thus we need to override the /MT with /MTd. | ||
|
@@ -664,6 +675,8 @@ add_win32_flags(drmemtrace_invariant_checker) | |
add_win32_flags(drmemtrace_schedule_stats) | ||
add_win32_flags(drmemtrace_schedule_file) | ||
add_win32_flags(directory_iterator) | ||
add_win32_flags(drmemtrace_access_region) | ||
add_win32_flags(drmemtrace_reuse_pattern) | ||
add_win32_flags(test_helpers) | ||
if (WIN32 AND DEBUG) | ||
get_target_property(sim_srcs drmemtrace_launcher SOURCES) | ||
|
@@ -849,7 +862,8 @@ if (BUILD_TESTS) | |
drmemtrace_histogram drmemtrace_reuse_time drmemtrace_basic_counts | ||
drmemtrace_opcode_mix drmemtrace_syscall_mix drmemtrace_view drmemtrace_func_view | ||
drmemtrace_raw2trace directory_iterator drmemtrace_invariant_checker | ||
drmemtrace_schedule_stats drmemtrace_analyzer drmemtrace_record_filter) | ||
drmemtrace_schedule_stats drmemtrace_analyzer drmemtrace_record_filter drmemtrace_access_region | ||
drmemtrace_reuse_pattern) | ||
if (UNIX) | ||
target_link_libraries(tool.drcachesim.core_sharded dl) | ||
endif () | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
cmake_minimum_required(VERSION 3.7) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Every file should have a header with copyright and license information. |
||
|
||
include (../../../make/policies.cmake NO_POLICY_SCOPE) | ||
|
||
if (NOT LINUX OR NOT X86 OR NOT X64) | ||
message(FATAL_ERROR "This is only for Linux x86_64.") | ||
endif () | ||
|
||
add_dr_defines() | ||
|
||
set(mirage_srcs | ||
dr_mir_api.cpp | ||
# common - shared code | ||
./common/list.cpp | ||
./common/bitmap.cpp | ||
# frontend - translating DRIR to MIR | ||
./frontend/gen_ops.cpp | ||
./frontend/gen_opnd_api.cpp | ||
./frontend/translate_context.cpp | ||
# backend - interpreting MIR | ||
./backend/replayer.cpp | ||
./backend/reg_analyzer.cpp | ||
# ./be/*.cpp | ||
# interpreter - MIR specification | ||
./ir/mir_insn.cpp | ||
) | ||
|
||
add_library(mirage STATIC ${mirage_srcs}) | ||
|
||
target_include_directories(mirage PUBLIC | ||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/common> | ||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/frontend> | ||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/ir> | ||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/backend> | ||
) | ||
|
||
configure_DynamoRIO_decoder(mirage) | ||
add_dependencies(mirage api_headers) | ||
target_link_libraries(mirage) | ||
install_client_nonDR_header(drmemtrace dr_mir_api.h) | ||
DR_export_target(mirage) | ||
install_exported_target(mirage ${INSTALL_CLIENTS_LIB}) | ||
|
||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tests) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#ifndef ABSTRACT_BACKEND_H | ||
#define ABSTRACT_BACKEND_H | ||
|
||
#include "mir_insn.h" | ||
|
||
class AbstractBackend { | ||
public: | ||
virtual void replay(mir_insn_list_t *insn_list) = 0; | ||
}; | ||
|
||
#endif // ABSTRACT_BACKEND_H |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
#include "reg_analyzer.h" | ||
|
||
RegAnalyzer::RegAnalyzer() { | ||
for (int i = 0; i < DR_NUM_GPR_REGS; i++) { | ||
gp_reg_file[i] = 1 << i; | ||
} | ||
} | ||
|
||
RegAnalyzer::~RegAnalyzer() { | ||
|
||
} | ||
|
||
void RegAnalyzer::replay(mir_insn_list_t *insn_list) { | ||
struct list_elem* e; | ||
for (e = list_begin(insn_list); e != list_end(insn_list); e = list_next(e)) { | ||
mir_insn_t* insn = list_entry(e, mir_insn_t, elem); | ||
step(insn); | ||
} | ||
} | ||
|
||
void RegAnalyzer::report() { | ||
printf("--> Memory Access Report -->\n"); | ||
for (auto it = mem_access_pattern.begin(); it != mem_access_pattern.end(); it++) { | ||
if (it->first == 0) { | ||
printf("constant memory access: %lu\n", it->second); | ||
} else { | ||
printf("variable memory access for 0x%lx : %lu times\n", it->first, it->second); | ||
} | ||
} | ||
printf("--> Register State Report -->\n"); | ||
for (int i = 0; i < DR_NUM_GPR_REGS; i++) { | ||
printf("register %d: %lx\n", i, gp_reg_file[i]); | ||
} | ||
} | ||
|
||
uint64_t RegAnalyzer::read_mem(mir_insn_t *insn) { | ||
access_mem(insn); | ||
return 0x80000000; // signed bit marks the contamination of register | ||
} | ||
|
||
void RegAnalyzer::write_mem(mir_insn_t *insn) { | ||
access_mem(insn); | ||
} | ||
|
||
void RegAnalyzer::access_mem(mir_insn_t *insn) { | ||
if (is_const_addr(insn)) { | ||
inc_mem_access_count(0); | ||
} else { | ||
inc_mem_access_count(get_val_from_opnd(insn->opnd0) | get_val_from_opnd(insn->opnd1)); | ||
} | ||
} | ||
|
||
bool RegAnalyzer::is_const_addr(mir_insn_t *insn) { | ||
return (insn->opnd0.type == MIR_OPND_IMM || | ||
(insn->opnd0.type == MIR_OPND_REG && insn->opnd0.value.reg == REG_NULL)) && | ||
(insn->opnd1.type == MIR_OPND_IMM || | ||
(insn->opnd1.type == MIR_OPND_REG && insn->opnd1.value.reg == REG_NULL)); | ||
} | ||
|
||
void RegAnalyzer::step(mir_insn_t *insn) { | ||
uint64_t src0_val = get_val_from_opnd(insn->opnd0); | ||
uint64_t src1_val = get_val_from_opnd(insn->opnd1); | ||
uint64_t dst_val; | ||
switch (insn->op) { | ||
case MIR_OP_NULL: | ||
break; | ||
case MIR_OP_MOV: | ||
set_val_to_opnd(insn->dst, src0_val); | ||
// all arithmetic operations are treated the same | ||
case MIR_OP_ADD: | ||
case MIR_OP_SUB: | ||
case MIR_OP_MUL: | ||
case MIR_OP_DIV: | ||
case MIR_OP_REM: | ||
case MIR_OP_AND: | ||
case MIR_OP_OR: | ||
case MIR_OP_XOR: | ||
case MIR_OP_SHL: | ||
case MIR_OP_SHR: | ||
dst_val = src0_val | src1_val; | ||
set_val_to_opnd(insn->dst, dst_val); | ||
break; | ||
case MIR_OP_LD8: | ||
case MIR_OP_LD16: | ||
case MIR_OP_LD32: | ||
case MIR_OP_LD64: | ||
dst_val = read_mem(insn); | ||
// set_val_to_opnd(insn->dst, dst_val); | ||
break; | ||
case MIR_OP_ST8: | ||
case MIR_OP_ST16: | ||
case MIR_OP_ST32: | ||
case MIR_OP_ST64: | ||
write_mem(insn); | ||
break; | ||
default: | ||
break; | ||
} | ||
} | ||
|
||
uint64_t RegAnalyzer::get_reg_val(reg_id_t reg) { | ||
if (reg == REG_NULL) { | ||
return 0; | ||
} else if (IS_DR_REG_GPR(reg)) { | ||
return gp_reg_file[GET_DR_REG_GPR_NUM(reg)]; | ||
} else if (IS_DR_REG_64(reg)) { | ||
return gp_reg_file[GET_DR_REG_64_NUM(reg)]; | ||
} else if (IS_DR_REG_32(reg)) { | ||
return (uint32_t)gp_reg_file[GET_DR_REG_32_NUM(reg)]; | ||
} else if (IS_DR_REG_16(reg)) { | ||
return (uint16_t)gp_reg_file[GET_DR_REG_16_NUM(reg)]; | ||
} else if (IS_DR_REG_8(reg)) { | ||
return (uint8_t)gp_reg_file[GET_DR_REG_8_NUM(reg)]; | ||
} else if (IS_TMP_REG(reg)) { | ||
return tmp_reg_file[GET_TMP_REG_NUM(reg)]; | ||
} else { | ||
return 0; | ||
} | ||
} | ||
|
||
uint64_t RegAnalyzer::get_val_from_opnd(mir_opnd_t opnd) { | ||
if (opnd.type == MIR_OPND_REG) { | ||
return get_reg_val(opnd.value.reg); | ||
} else if (opnd.type == MIR_OPND_IMM) { | ||
return 0; // constants do not contribute | ||
} | ||
return 0; | ||
} | ||
|
||
void RegAnalyzer::set_val_to_opnd(mir_opnd_t opnd, uint64_t value) { | ||
if (opnd.type == MIR_OPND_REG) { | ||
//debug | ||
// if (opnd.value.reg == REG_XSP && value != 0x10) { | ||
// printf("setting reg xsp to %lx\n", value); | ||
// volatile uint64_t xsp = get_reg_val(REG_XSP); | ||
// printf("xsp: %lx\n", xsp); | ||
// } | ||
if (opnd.value.reg == REG_NULL) { | ||
return; | ||
} else if (IS_DR_REG_GPR(opnd.value.reg)) { | ||
gp_reg_file[GET_DR_REG_GPR_NUM(opnd.value.reg)] = value; | ||
} else if (IS_DR_REG_64(opnd.value.reg)) { | ||
gp_reg_file[GET_DR_REG_64_NUM(opnd.value.reg)] = value; | ||
} else if (IS_DR_REG_32(opnd.value.reg)) { | ||
gp_reg_file[GET_DR_REG_32_NUM(opnd.value.reg)] = (uint32_t)value; | ||
} else if (IS_DR_REG_16(opnd.value.reg)) { | ||
gp_reg_file[GET_DR_REG_16_NUM(opnd.value.reg)] = (uint16_t)value; | ||
} else if (IS_DR_REG_8(opnd.value.reg)) { | ||
gp_reg_file[GET_DR_REG_8_NUM(opnd.value.reg)] = (uint8_t)value; | ||
} else if (IS_TMP_REG(opnd.value.reg)) { | ||
tmp_reg_file[GET_TMP_REG_NUM(opnd.value.reg)] = value; | ||
} | ||
} else if (opnd.type == MIR_OPND_IMM) { | ||
assert(false && "attempted to set imm value"); | ||
} | ||
} | ||
|
||
void RegAnalyzer::inc_mem_access_count(uint64_t addr) { | ||
if (mem_access_pattern.find(addr) == mem_access_pattern.end()) { | ||
mem_access_pattern[addr] = 1; | ||
} else { | ||
mem_access_pattern[addr]++; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this PR is meant to get feel for all the pieces of this prototype rather than being sent for merging.
To help future PR's for actual merging: note that if this PR were sent as-is for merging our first reaction would be that it's too big: the unified patch
https://patch-diff.githubusercontent.com/raw/DynamoRIO/dynamorio/pull/6931.patch is over 6K lines while https://dynamorio.org/page_code_reviews.html#autotoc_md114 suggests "Review diffs larger than about 1500 lines should be avoided." Generally large changes should be split into smaller pieces and each of those sent separately. If possible during development PR's should be sent early; sometimes an end-to-end stage has to be reached before designs can be settled, in which case the code has to kept in pieces or split later for multiple reviews.
For merging, is there a logical, natural way to split up the code into pieces? If some pieces need others, their separate PR's won't fully run and won't be able to have end-to-end tests, but they could have unit tests or possibly have no tests in the same PR with a message about tests in later PR's that fill in other pieces. For starters, I see an access_region and a reuse_pattern tool: those should each be their own PR's.