Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i#7023: Switch drmemtrace replay of idles to use record count #7036

Merged
merged 2 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 69 additions & 47 deletions clients/drcachesim/scheduler/scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ replay_file_checker_t::check(archive_istream_t *infile)
scheduler_t::schedule_record_t record;
bool prev_was_idle = false;
while (infile->read(reinterpret_cast<char *>(&record), sizeof(record))) {
if (record.type == scheduler_t::schedule_record_t::IDLE) {
if (record.type == scheduler_t::schedule_record_t::IDLE ||
record.type == scheduler_t::schedule_record_t::IDLE_BY_COUNT) {
if (prev_was_idle)
return "Error: consecutive idle records";
prev_was_idle = true;
Expand Down Expand Up @@ -1305,10 +1306,17 @@ scheduler_tmpl_t<RecordType, ReaderType>::read_recorded_schedule()
VPRINT(this, 1, "output %d empty: returning eof up front\n", i);
set_cur_input(i, INVALID_INPUT_ORDINAL);
outputs_[i].at_eof = true;
} else if (outputs_[i].record[0].type == schedule_record_t::IDLE) {
} else if (outputs_[i].record[0].type == schedule_record_t::IDLE ||
outputs_[i].record[0].type == schedule_record_t::IDLE_BY_COUNT) {
set_cur_input(i, INVALID_INPUT_ORDINAL);
outputs_[i].waiting = true;
outputs_[i].wait_start_time = 0; // Updated on first next_record().
if (outputs_[i].record[0].type == schedule_record_t::IDLE) {
// Convert a legacy idle duration from microseconds to record counts.
outputs_[i].record[0].value.idle_duration =
static_cast<uint64_t>(options_.time_units_per_us *
outputs_[i].record[0].value.idle_duration);
}
outputs_[i].idle_start_count = -1; // Updated on first next_record().
VPRINT(this, 3, "output %d starting out idle\n", i);
} else {
assert(outputs_[i].record[0].type == schedule_record_t::DEFAULT);
Expand Down Expand Up @@ -2531,17 +2539,25 @@ scheduler_tmpl_t<RecordType, ReaderType>::record_schedule_segment(
input_ordinal_t input, uint64_t start_instruction, uint64_t stop_instruction)
{
assert(type == schedule_record_t::VERSION || type == schedule_record_t::FOOTER ||
type == schedule_record_t::IDLE || inputs_[input].lock->owned_by_cur_thread());
// ::IDLE is a legacy type we should not see in new recordings.
type == schedule_record_t::IDLE_BY_COUNT ||
inputs_[input].lock->owned_by_cur_thread());
// We always use the current wall-clock time, as the time stored in the prior
// next_record() call can be out of order across outputs and lead to deadlocks.
uint64_t timestamp = get_time_micros();
if (type == schedule_record_t::IDLE &&
outputs_[output].record.back().type == schedule_record_t::IDLE) {
if (type == schedule_record_t::IDLE_BY_COUNT &&
outputs_[output].record.back().type == schedule_record_t::IDLE_BY_COUNT) {
// Merge. We don't need intermediate timestamps when idle, and consecutive
// idle records quickly balloon the file.
return sched_type_t::STATUS_OK;
}
VPRINT(this, 4,
if (type == schedule_record_t::IDLE_BY_COUNT) {
// Start prior to this idle.
outputs_[output].idle_start_count = outputs_[output].idle_count - 1;
// That is what we'll record in the value union shared w/ start_instruction.
assert(start_instruction == outputs_[output].idle_count - 1);
}
VPRINT(this, 3,
"recording out=%d type=%d input=%d start=%" PRIu64 " stop=%" PRIu64
" time=%" PRIu64 "\n",
output, type, input, start_instruction, stop_instruction, timestamp);
Expand All @@ -2560,24 +2576,25 @@ scheduler_tmpl_t<RecordType, ReaderType>::close_schedule_segment(output_ordinal_
assert(!outputs_[output].record.empty());
assert(outputs_[output].record.back().type == schedule_record_t::VERSION ||
outputs_[output].record.back().type == schedule_record_t::FOOTER ||
outputs_[output].record.back().type == schedule_record_t::IDLE ||
// ::IDLE is for legacy recordings, not new ones.
outputs_[output].record.back().type == schedule_record_t::IDLE_BY_COUNT ||
input.lock->owned_by_cur_thread());
if (outputs_[output].record.back().type == schedule_record_t::SKIP) {
// Skips already have a final stop value.
return sched_type_t::STATUS_OK;
}
if (outputs_[output].record.back().type == schedule_record_t::IDLE) {
// Just like in record_schedule_segment() we use wall-clock time for recording
// replay timestamps.
uint64_t end = get_time_micros();
assert(end >= outputs_[output].record.back().timestamp);
if (outputs_[output].record.back().type == schedule_record_t::IDLE_BY_COUNT) {
uint64_t end_idle_count = outputs_[output].idle_count;
assert(outputs_[output].idle_start_count >= 0);
assert(end_idle_count >=
static_cast<uint64_t>(outputs_[output].idle_start_count));
outputs_[output].record.back().value.idle_duration =
end - outputs_[output].record.back().timestamp;
end_idle_count - outputs_[output].idle_start_count;
VPRINT(this, 3,
"close_schedule_segment: idle duration %" PRIu64 " = %" PRIu64
"close_schedule_segment[%d]: idle duration %" PRIu64 " = %" PRIu64
" - %" PRIu64 "\n",
outputs_[output].record.back().value.idle_duration, end,
outputs_[output].record.back().timestamp);
output, outputs_[output].record.back().value.idle_duration, end_idle_count,
outputs_[output].idle_start_count);
return sched_type_t::STATUS_OK;
}
uint64_t instr_ord = get_instr_ordinal(input);
Expand All @@ -2596,9 +2613,9 @@ scheduler_tmpl_t<RecordType, ReaderType>::close_schedule_segment(output_ordinal_
++instr_ord;
}
VPRINT(this, 3,
"close_schedule_segment: input=%d type=%d start=%" PRIu64 " stop=%" PRIu64
"close_schedule_segment[%d]: input=%d type=%d start=%" PRIu64 " stop=%" PRIu64
"\n",
input.index, outputs_[output].record.back().type,
output, input.index, outputs_[output].record.back().type,
outputs_[output].record.back().value.start_instruction, instr_ord);
// Check for empty default entries, except the starter 0,0 ones.
assert(outputs_[output].record.back().type != schedule_record_t::DEFAULT ||
Expand Down Expand Up @@ -2927,7 +2944,9 @@ scheduler_tmpl_t<RecordType, ReaderType>::set_cur_input(output_ordinal_t output,
add_to_ready_queue(output, &inputs_[prev_input]);
}
} else if (options_.schedule_record_ostream != nullptr &&
outputs_[output].record.back().type == schedule_record_t::IDLE) {
(outputs_[output].record.back().type == schedule_record_t::IDLE ||
outputs_[output].record.back().type ==
schedule_record_t::IDLE_BY_COUNT)) {
input_info_t unused;
sched_type_t::stream_status_t status = close_schedule_segment(output, unused);
if (status != sched_type_t::STATUS_OK)
Expand Down Expand Up @@ -3038,19 +3057,27 @@ scheduler_tmpl_t<RecordType, ReaderType>::pick_next_input_as_previously(
}
return eof_or_idle(output, outputs_[output].cur_input);
}
const schedule_record_t &segment = outputs_[output].record[record_index + 1];
if (segment.type == schedule_record_t::IDLE) {
schedule_record_t &segment = outputs_[output].record[record_index + 1];
if (segment.type == schedule_record_t::IDLE ||
segment.type == schedule_record_t::IDLE_BY_COUNT) {
outputs_[output].waiting = true;
outputs_[output].wait_start_time = get_output_time(output);
if (segment.type == schedule_record_t::IDLE) {
// Convert a legacy idle duration from microseconds to record counts.
segment.value.idle_duration = static_cast<uint64_t>(
options_.time_units_per_us * segment.value.idle_duration);
}
outputs_[output].idle_start_count = outputs_[output].idle_count;
outputs_[output].record_index->fetch_add(1, std::memory_order_release);
++outputs_[output].idle_count;
VPRINT(this, 5, "%s[%d]: next replay segment idle for %" PRIu64 "\n",
__FUNCTION__, output, segment.value.idle_duration);
return sched_type_t::STATUS_IDLE;
}
index = segment.key.input;
VPRINT(this, 5,
"pick_next_input_as_previously[%d]: next replay segment in=%d (@%" PRId64
") type=%d start=%" PRId64 " end=%" PRId64 "\n",
output, index, get_instr_ordinal(inputs_[index]), segment.type,
"%s[%d]: next replay segment in=%d (@%" PRId64 ") type=%d start=%" PRId64
" end=%" PRId64 "\n",
__FUNCTION__, output, index, get_instr_ordinal(inputs_[index]), segment.type,
segment.value.start_instruction, segment.stop_instruction);
{
std::lock_guard<mutex_dbg_owned> lock(*inputs_[index].lock);
Expand Down Expand Up @@ -3404,7 +3431,9 @@ scheduler_tmpl_t<RecordType, ReaderType>::pick_next_input(output_ordinal_t outpu
if (options_.schedule_record_ostream != nullptr) {
sched_type_t::stream_status_t record_status =
record_schedule_segment(
output, schedule_record_t::IDLE, 0, 0, 0);
output, schedule_record_t::IDLE_BY_COUNT, 0,
// Start prior to this idle.
outputs_[output].idle_count - 1, 0);
if (record_status != sched_type_t::STATUS_OK)
return record_status;
}
Expand Down Expand Up @@ -3734,18 +3763,9 @@ scheduler_tmpl_t<RecordType, ReaderType>::next_record(output_ordinal_t output,
// do return an error on a time smaller than an input's current start time when we
// check for quantum end.
if (cur_time == 0) {
if (options_.mapping == MAP_AS_PREVIOUSLY) {
// XXX i#7023: We should instead store the simulator's time (whether
// passed in or our instr-based formula below) in the records and do away
// with wall-clock time for idle measurement. Either way, we should make
// it clear in the docs whether the user/simulator has to pass in the
// time on replay.
cur_time = get_time_micros();
} else {
// We add 1 to avoid an invalid value of 0.
cur_time = 1 + outputs_[output].stream->get_output_instruction_ordinal() +
outputs_[output].idle_count;
}
// We add 1 to avoid an invalid value of 0.
cur_time = 1 + outputs_[output].stream->get_output_instruction_ordinal() +
outputs_[output].idle_count;
}
// Invalid values for cur_time are checked below.
outputs_[output].cur_time->store(cur_time, std::memory_order_release);
Expand All @@ -3755,22 +3775,24 @@ scheduler_tmpl_t<RecordType, ReaderType>::next_record(output_ordinal_t output,
}
if (outputs_[output].waiting) {
if (options_.mapping == MAP_AS_PREVIOUSLY &&
outputs_[output].wait_start_time > 0) {
outputs_[output].idle_start_count >= 0) {
uint64_t duration = outputs_[output]
.record[outputs_[output].record_index->load(
std::memory_order_acquire)]
.value.idle_duration;
uint64_t now = get_output_time(output);
if (now - outputs_[output].wait_start_time < duration) {
uint64_t now = outputs_[output].idle_count;
if (now - outputs_[output].idle_start_count < duration) {
VPRINT(this, 4,
"next_record[%d]: elapsed %" PRIu64 " < duration %" PRIu64 "\n",
output, now - outputs_[output].wait_start_time, duration);
// XXX i#7023: This should always be STATUS_IDLE, right?
return sched_type_t::STATUS_WAIT;
output, now - outputs_[output].idle_start_count, duration);
++outputs_[output].idle_count;
return sched_type_t::STATUS_IDLE;
} else
outputs_[output].wait_start_time = 0;
outputs_[output].idle_start_count = -1;
}
VPRINT(this, 5, "next_record[%d]: need new input (cur=waiting)\n", output);
VPRINT(this, 5,
"next_record[%d]: need new input (cur=waiting; idles=%" PRIu64 ")\n",
output, outputs_[output].idle_count);
sched_type_t::stream_status_t res = pick_next_input(output, 0);
if (res != sched_type_t::STATUS_OK && res != sched_type_t::STATUS_SKIPPED)
return res;
Expand Down
8 changes: 6 additions & 2 deletions clients/drcachesim/scheduler/scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -1459,6 +1459,9 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
// Indicates that the output is idle. The value.idle_duration field holds
// a duration in microseconds.
IDLE,
// Indicates that the output is idle. The value.idle_duration field holds
// a duration as a count of idle records.
IDLE_BY_COUNT,
};
static constexpr int VERSION_CURRENT = 0;
schedule_record_t() = default;
Expand Down Expand Up @@ -1493,6 +1496,7 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
{
}
// For record_type_t::IDLE, the duration in microseconds of the idling.
// For record_type_t::IDLE_BY_COUNT, the duration as a count of idle records.
uint64_t idle_duration;
// Input stream ordinal of starting point, for non-IDLE types.
uint64_t start_instruction = 0;
Expand Down Expand Up @@ -1692,8 +1696,8 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
int64_t as_traced_cpuid = -1;
// Used for MAP_AS_PREVIOUSLY with live_replay_output_count_.
bool at_eof = false;
// Used for replaying wait periods.
uint64_t wait_start_time = 0;
// Used for recording and replaying idle periods.
int64_t idle_start_count = -1;
// Exported statistics. Currently all integers and cast to double on export.
std::vector<int64_t> stats =
std::vector<int64_t>(memtrace_stream_t::SCHED_STAT_TYPE_COUNT);
Expand Down
Loading