Skip to content

Commit

Permalink
i#7157 syscall sched: Handle static injected syscall traces in schedu…
Browse files Browse the repository at this point in the history
…ler (#7158)

Adds handling for statically-injected kernel syscall traces in the
scheduler.

Ensures that quantum and voluntary context switches are delayed until
after the statically-injected syscall trace. This involved fixing the
bookkeeping logic which is done on the next user-space instr now.

Note that the injected kernel syscall traces do not include any
scheduling-related markers such as TRACE_MARKER_TYPE_SYSCALL_UNSCHEDULE
and TRACE_MARKER_TYPE_TIMESTAMP.

For now we keep status quo on the scheduler behavior of showing the
post-syscall markers before the switch.

Adds a unit test for static-injected kernel syscall trace handling by
the scheduler for various scenarios involving syscall sequences shorter
and longer than the quantum, for system calls that do and do not cause
context switches, and occurring at different offsets into the quantum.

Issue: #7157
  • Loading branch information
abhinav92003 authored Dec 19, 2024
1 parent 5f3be87 commit 4adbf0f
Show file tree
Hide file tree
Showing 4 changed files with 306 additions and 26 deletions.
3 changes: 3 additions & 0 deletions clients/drcachesim/scheduler/scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,9 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
* The scheduling quantum duration for preemption, in instruction count,
* for #QUANTUM_INSTRUCTIONS. The time passed to next_record() is ignored
* for purposes of quantum preempts.
*
* Instructions executed in a quantum may end up higher than the specified
* value to avoid interruption of the kernel system call sequence.
*/
// We pick 10 million to match 2 instructions per nanosecond with a 5ms quantum.
uint64_t quantum_duration_instrs = 10 * 1000 * 1000;
Expand Down
71 changes: 48 additions & 23 deletions clients/drcachesim/scheduler/scheduler_dynamic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,9 @@ scheduler_dynamic_tmpl_t<RecordType, ReaderType>::check_for_input_switch(
// boundaries so we live with those being before the switch.
// XXX: Once we insert kernel traces, we may have to try harder
// to stop before the post-syscall records.
if (this->record_type_is_instr_boundary(record, outputs_[output].last_record)) {
if (this->record_type_is_instr_boundary(record, outputs_[output].last_record) &&
// We want to delay the context switch until after the injected syscall trace.
!outputs_[output].in_syscall_code) {
if (input->switch_to_input != sched_type_t::INVALID_INPUT_ORDINAL) {
// The switch request overrides any latency threshold.
need_new_input = true;
Expand Down Expand Up @@ -506,18 +508,28 @@ scheduler_dynamic_tmpl_t<RecordType, ReaderType>::check_for_input_switch(
}
if (options_.quantum_unit == sched_type_t::QUANTUM_INSTRUCTIONS &&
this->record_type_is_instr_boundary(record, outputs_[output].last_record) &&
!outputs_[output].in_kernel_code) {
!outputs_[output].in_context_switch_code) {
++input->instrs_in_quantum;
if (input->instrs_in_quantum > options_.quantum_duration_instrs) {
// We again prefer to switch to another input even if the current
// input has the oldest timestamp, prioritizing context switches
// over timestamp ordering.
VPRINT(this, 4, "next_record[%d]: input %d hit end of instr quantum\n",
output, input->index);
preempt = true;
need_new_input = true;
input->instrs_in_quantum = 0;
++outputs_[output].stats[memtrace_stream_t::SCHED_STAT_QUANTUM_PREEMPTS];
if (outputs_[output].in_syscall_code) {
// XXX: Maybe this should be printed only once per-syscall-instance to
// reduce log spam.
VPRINT(this, 5,
"next_record[%d]: input %d delaying context switch "
"after end of instr quantum due to syscall trace\n",
output, input->index);

} else {
// We again prefer to switch to another input even if the current
// input has the oldest timestamp, prioritizing context switches
// over timestamp ordering.
VPRINT(this, 4, "next_record[%d]: input %d hit end of instr quantum\n",
output, input->index);
preempt = true;
need_new_input = true;
input->instrs_in_quantum = 0;
++outputs_[output].stats[memtrace_stream_t::SCHED_STAT_QUANTUM_PREEMPTS];
}
}
} else if (options_.quantum_unit == sched_type_t::QUANTUM_TIME) {
if (cur_time == 0 || cur_time < input->prev_time_in_quantum) {
Expand All @@ -535,14 +547,23 @@ scheduler_dynamic_tmpl_t<RecordType, ReaderType>::check_for_input_switch(
// in between (e.g., scatter/gather long sequence of reads/writes) by
// setting input->switching_pre_instruction.
this->record_type_is_instr_boundary(record, outputs_[output].last_record)) {
VPRINT(this, 4,
"next_record[%d]: input %d hit end of time quantum after %" PRIu64
"\n",
output, input->index, input->time_spent_in_quantum);
preempt = true;
need_new_input = true;
input->time_spent_in_quantum = 0;
++outputs_[output].stats[memtrace_stream_t::SCHED_STAT_QUANTUM_PREEMPTS];
if (outputs_[output].in_syscall_code) {
// XXX: Maybe this should be printed only once per-syscall-instance to
// reduce log spam.
VPRINT(this, 5,
"next_record[%d]: input %d delaying context switch after end of "
"time quantum after %" PRIu64 " due to syscall trace\n",
output, input->index, input->time_spent_in_quantum);
} else {
VPRINT(this, 4,
"next_record[%d]: input %d hit end of time quantum after %" PRIu64
"\n",
output, input->index, input->time_spent_in_quantum);
preempt = true;
need_new_input = true;
input->time_spent_in_quantum = 0;
++outputs_[output].stats[memtrace_stream_t::SCHED_STAT_QUANTUM_PREEMPTS];
}
}
}
// For sched_type_t::DEPENDENCY_TIMESTAMPS: enforcing asked-for
Expand Down Expand Up @@ -574,16 +595,20 @@ scheduler_dynamic_tmpl_t<RecordType, ReaderType>::process_marker(
break;
case TRACE_MARKER_TYPE_CONTEXT_SWITCH_START:
outputs_[output].in_context_switch_code = true;
ANNOTATE_FALLTHROUGH;
break;
case TRACE_MARKER_TYPE_SYSCALL_TRACE_START:
outputs_[output].in_kernel_code = true;
outputs_[output].in_syscall_code = true;
break;
case TRACE_MARKER_TYPE_CONTEXT_SWITCH_END:
// We have to delay until the next record.
outputs_[output].hit_switch_code_end = true;
ANNOTATE_FALLTHROUGH;
break;
case TRACE_MARKER_TYPE_SYSCALL_TRACE_END:
outputs_[output].in_kernel_code = false;
outputs_[output].in_syscall_code = false;
break;
case TRACE_MARKER_TYPE_TIMESTAMP:
// Syscall sequences are not expected to have a timestamp.
assert(!outputs_[output].in_syscall_code);
break;
case TRACE_MARKER_TYPE_DIRECT_THREAD_SWITCH: {
if (!options_.honor_direct_switches)
Expand Down
2 changes: 1 addition & 1 deletion clients/drcachesim/scheduler/scheduler_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ template <typename RecordType, typename ReaderType> class scheduler_impl_tmpl_t
// This is accessed by other outputs for stealing and rebalancing.
// Indirected so we can store it in our vector.
std::unique_ptr<std::atomic<bool>> active;
bool in_kernel_code = false;
bool in_syscall_code = false;
bool in_context_switch_code = false;
bool hit_switch_code_end = false;
// Used for time-based quanta.
Expand Down
Loading

0 comments on commit 4adbf0f

Please sign in to comment.