diff --git a/clients/drcachesim/scheduler/scheduler.cpp b/clients/drcachesim/scheduler/scheduler.cpp index 2b133f8eab6..37680dc06c0 100644 --- a/clients/drcachesim/scheduler/scheduler.cpp +++ b/clients/drcachesim/scheduler/scheduler.cpp @@ -1676,7 +1676,7 @@ scheduler_tmpl_t::process_next_initial_record( return false; } } else if (marker_type == TRACE_MARKER_TYPE_SYSCALL_UNSCHEDULE) { - if (options_.honor_direct_switches) { + if (options_.honor_direct_switches && options_.mapping != MAP_AS_PREVIOUSLY) { input.unscheduled = true; // Ignore this marker during regular processing. input.skip_next_unscheduled = true; @@ -1701,6 +1701,29 @@ scheduler_tmpl_t::get_initial_input_content( // output stream(s). for (size_t i = 0; i < inputs_.size(); ++i) { input_info_t &input = inputs_[i]; + + // If the input jumps to the middle immediately, do that now so we'll have + // the proper start timestamp. + if (!input.regions_of_interest.empty() && + // The docs say for replay we allow the user to pass ROI but ignore it. + // Maybe we should disallow it so we don't need checks like this? + options_.mapping != MAP_AS_PREVIOUSLY) { + RecordType record = create_invalid_record(); + sched_type_t::stream_status_t res = + advance_region_of_interest(/*output=*/-1, record, input); + if (res == sched_type_t::STATUS_SKIPPED) { + input.next_timestamp = + static_cast(input.reader->get_last_timestamp()); + // We can skip the rest of the loop here (the filetype will be there + // in the stream). + continue; + } + if (res != sched_type_t::STATUS_OK) { + VPRINT(this, 1, "Failed to advance initial ROI with status %d\n", res); + return sched_type_t::STATUS_ERROR_RANGE_INVALID; + } + } + bool found_filetype = false; bool found_timestamp = !gather_timestamps || input.next_timestamp > 0; if (process_next_initial_record(input, create_invalid_record(), found_filetype, @@ -2078,30 +2101,60 @@ scheduler_tmpl_t::advance_region_of_interest( return sched_type_t::STATUS_OK; VPRINT(this, 2, - "skipping from %" PRIu64 " to %" PRIu64 " instrs (%" PRIu64 + "skipping from %" PRId64 " to %" PRIu64 " instrs (%" PRIu64 " in reader) for ROI\n", cur_instr, cur_range.start_instruction, cur_range.start_instruction - cur_reader_instr - 1); if (options_.schedule_record_ostream != nullptr) { - sched_type_t::stream_status_t status = close_schedule_segment(output, input); - if (status != sched_type_t::STATUS_OK) - return status; - status = record_schedule_segment(output, schedule_record_t::SKIP, input.index, - cur_instr, cur_range.start_instruction); - if (status != sched_type_t::STATUS_OK) - return status; - status = record_schedule_segment(output, schedule_record_t::DEFAULT, input.index, - cur_range.start_instruction); - if (status != sched_type_t::STATUS_OK) - return status; + if (output >= 0) { + record_schedule_skip(output, input.index, cur_instr, + cur_range.start_instruction); + } // Else, will be done in set_cur_input once assigned to an output. } if (cur_range.start_instruction < cur_reader_instr) { // We do not support skipping without skipping over the pre-read: we would // need to extract from the queue. return sched_type_t::STATUS_INVALID; } - return skip_instructions(output, input, - cur_range.start_instruction - cur_reader_instr - 1); + return skip_instructions(input, cur_range.start_instruction - cur_reader_instr - 1); +} + +template +typename scheduler_tmpl_t::stream_status_t +scheduler_tmpl_t::record_schedule_skip(output_ordinal_t output, + input_ordinal_t input, + uint64_t start_instruction, + uint64_t stop_instruction) +{ + if (options_.schedule_record_ostream == nullptr) + return sched_type_t::STATUS_INVALID; + sched_type_t::stream_status_t status; + // Close any prior default record for this input. If we switched inputs, + // we'll already have closed the prior in set_cur_input(). + if (outputs_[output].record.back().type == schedule_record_t::DEFAULT && + outputs_[output].record.back().key.input == input) { + status = close_schedule_segment(output, inputs_[input]); + if (status != sched_type_t::STATUS_OK) + return status; + } + if (outputs_[output].record.size() == 1) { + // Replay doesn't handle starting out with a skip record: we need a + // start=0,stop=0 dummy entry to get things rolling at the start of + // an output's records, if we're the first record after the version. + assert(outputs_[output].record.back().type == schedule_record_t::VERSION); + status = record_schedule_segment(output, schedule_record_t::DEFAULT, input, 0, 0); + if (status != sched_type_t::STATUS_OK) + return status; + } + status = record_schedule_segment(output, schedule_record_t::SKIP, input, + start_instruction, stop_instruction); + if (status != sched_type_t::STATUS_OK) + return status; + status = record_schedule_segment(output, schedule_record_t::DEFAULT, input, + stop_instruction); + if (status != sched_type_t::STATUS_OK) + return status; + return sched_type_t::STATUS_OK; } template @@ -2124,8 +2177,7 @@ scheduler_tmpl_t::clear_input_queue(input_info_t &input) template typename scheduler_tmpl_t::stream_status_t -scheduler_tmpl_t::skip_instructions(output_ordinal_t output, - input_info_t &input, +scheduler_tmpl_t::skip_instructions(input_info_t &input, uint64_t skip_amount) { // reader_t::at_eof_ is true until init() is called. @@ -2161,28 +2213,11 @@ scheduler_tmpl_t::skip_instructions(output_ordinal_t out } } input.in_cur_region = true; - auto *stream = outputs_[output].stream; // We've documented that an output stream's ordinals ignore skips in its input // streams, so we do not need to remember the input's ordinals pre-skip and increase // our output's ordinals commensurately post-skip. - // If we skipped from the start we may not have seen the initial headers: - // use the input's cached copies. - // We set the version and filetype up front for outputs with - // an initial input, so we check a different field to detect a - // skip. - if (stream->cache_line_size_ == 0 || - // Check the version too as a fallback for inputs with no cache size. - stream->version_ == 0) { - stream->version_ = input.reader->get_version(); - stream->last_timestamp_ = input.reader->get_last_timestamp(); - stream->first_timestamp_ = input.reader->get_first_timestamp(); - stream->filetype_ = input.reader->get_filetype(); - stream->cache_line_size_ = input.reader->get_cache_line_size(); - stream->chunk_instr_count_ = input.reader->get_chunk_instr_count(); - stream->page_size_ = input.reader->get_page_size(); - } // We let the user know we've skipped. There's no discontinuity for the // first one so we do not insert a marker there (if we do want to insert one, // we need to update the view tool to handle a window marker as the very @@ -2229,6 +2264,10 @@ scheduler_tmpl_t::record_schedule_segment( // idle records quickly balloon the file. return sched_type_t::STATUS_OK; } + VPRINT(this, 4, + "recording out=%d type=%d input=%d start=%" PRIu64 " stop=%" PRIu64 + " time=%" PRIu64 "\n", + output, type, input, start_instruction, stop_instruction, timestamp); outputs_[output].record.emplace_back(type, input, start_instruction, stop_instruction, timestamp); // The stop is typically updated later in close_schedule_segment(). @@ -2275,9 +2314,11 @@ scheduler_tmpl_t::close_schedule_segment(output_ordinal_ input.index); ++instr_ord; } - VPRINT( - this, 3, "close_schedule_segment: input=%d start=%" PRId64 " stop=%" PRId64 "\n", - input.index, outputs_[output].record.back().value.start_instruction, instr_ord); + VPRINT(this, 3, + "close_schedule_segment: input=%d type=%d start=%" PRIu64 " stop=%" PRIu64 + "\n", + input.index, outputs_[output].record.back().type, + outputs_[output].record.back().value.start_instruction, instr_ord); // Check for empty default entries, except the starter 0,0 ones. assert(outputs_[output].record.back().type != schedule_record_t::DEFAULT || outputs_[output].record.back().value.start_instruction < instr_ord || @@ -2500,11 +2541,17 @@ scheduler_tmpl_t::set_cur_input(output_ordinal_t output, std::lock_guard lock(*inputs_[input].lock); - if (prev_input < 0 && outputs_[output].stream->filetype_ == 0) { + if (prev_input < 0 && outputs_[output].stream->version_ == 0) { // Set the version and filetype up front, to let the user query at init time - // as documented. - outputs_[output].stream->version_ = inputs_[input].reader->get_version(); - outputs_[output].stream->filetype_ = inputs_[input].reader->get_filetype(); + // as documented. Also set the other fields in case we did a skip for ROI. + auto *stream = outputs_[output].stream; + stream->version_ = inputs_[input].reader->get_version(); + stream->last_timestamp_ = inputs_[input].reader->get_last_timestamp(); + stream->first_timestamp_ = inputs_[input].reader->get_first_timestamp(); + stream->filetype_ = inputs_[input].reader->get_filetype(); + stream->cache_line_size_ = inputs_[input].reader->get_cache_line_size(); + stream->chunk_instr_count_ = inputs_[input].reader->get_chunk_instr_count(); + stream->page_size_ = inputs_[input].reader->get_page_size(); } if (inputs_[input].pid != INVALID_PID) { @@ -2541,14 +2588,26 @@ scheduler_tmpl_t::set_cur_input(output_ordinal_t output, } inputs_[input].prev_time_in_quantum = outputs_[output].cur_time; + if (options_.schedule_record_ostream != nullptr) { uint64_t instr_ord = get_instr_ordinal(inputs_[input]); VPRINT(this, 3, "set_cur_input: recording input=%d start=%" PRId64 "\n", input, instr_ord); - sched_type_t::stream_status_t status = - record_schedule_segment(output, schedule_record_t::DEFAULT, input, instr_ord); - if (status != sched_type_t::STATUS_OK) - return status; + if (!inputs_[input].regions_of_interest.empty() && + inputs_[input].cur_region == 0 && inputs_[input].in_cur_region && + (instr_ord == inputs_[input].regions_of_interest[0].start_instruction || + // The ord may be 1 less because we're still on the inserted timestamp. + instr_ord + 1 == inputs_[input].regions_of_interest[0].start_instruction)) { + // We skipped during init but didn't have an output for recording the skip: + // record it now. + record_schedule_skip(output, input, 0, + inputs_[input].regions_of_interest[0].start_instruction); + } else { + sched_type_t::stream_status_t status = record_schedule_segment( + output, schedule_record_t::DEFAULT, input, instr_ord); + if (status != sched_type_t::STATUS_OK) + return status; + } } return STATUS_OK; } @@ -2665,7 +2724,7 @@ scheduler_tmpl_t::pick_next_input_as_previously( "next_record[%d]: skipping from %" PRId64 " to %" PRId64 " in %d for schedule\n", output, cur_reader_instr, segment.stop_instruction, index); - auto status = skip_instructions(output, inputs_[index], + auto status = skip_instructions(inputs_[index], segment.stop_instruction - cur_reader_instr - 1 /*exclusive*/); // Increment the region to get window id markers with ordinals. diff --git a/clients/drcachesim/scheduler/scheduler.h b/clients/drcachesim/scheduler/scheduler.h index 5611b664180..a716fa84449 100644 --- a/clients/drcachesim/scheduler/scheduler.h +++ b/clients/drcachesim/scheduler/scheduler.h @@ -104,6 +104,7 @@ template class scheduler_tmpl_t { STATUS_ERROR_FILE_READ_FAILED, /**< Error: file read failed. */ STATUS_ERROR_NOT_IMPLEMENTED, /**< Error: not implemented. */ STATUS_ERROR_FILE_WRITE_FAILED, /**< Error: file write failed. */ + STATUS_ERROR_RANGE_INVALID, /**< Error: region of interest invalid. */ }; /** @@ -204,6 +205,12 @@ template class scheduler_tmpl_t { { } /** Convenience constructor for common usage. */ + input_thread_info_t(memref_tid_t tid, std::vector regions) + : tids(1, tid) + , regions_of_interest(regions) + { + } + /** Convenience constructor for common usage. */ input_thread_info_t(memref_tid_t tid, int priority) : tids(1, tid) , priority(priority) @@ -252,6 +259,13 @@ template class scheduler_tmpl_t { * separation, but it is not inserted prior to the first range. A * #dynamorio::drmemtrace::TRACE_TYPE_THREAD_EXIT record is inserted after the * final range. These ranges must be non-overlapping and in increasing order. + * + * Be aware that selecting a subset of code can remove inter-input + * communication steps that could be required for forward progress. + * For example, if selected subsets include #TRACE_MARKER_TYPE_SYSCALL_UNSCHEDULE + * with no timeout but do not include a corresponding + * #TRACE_MARKER_TYPE_SYSCALL_SCHEDULE for wakeup, an input could remain + * unscheduled. */ std::vector regions_of_interest; }; @@ -1518,7 +1532,13 @@ template class scheduler_tmpl_t { // Does a direct skip, unconditionally. // The caller must hold the input.lock. stream_status_t - skip_instructions(output_ordinal_t output, input_info_t &input, uint64_t skip_amount); + skip_instructions(input_info_t &input, uint64_t skip_amount); + + // Records an input skip in the output's recorded schedule. + // The caller must hold the input.lock. + stream_status_t + record_schedule_skip(output_ordinal_t output, input_ordinal_t input, + uint64_t start_instruction, uint64_t stop_instruction); scheduler_status_t read_and_instantiate_traced_schedule(); diff --git a/clients/drcachesim/tests/scheduler_unit_tests.cpp b/clients/drcachesim/tests/scheduler_unit_tests.cpp index b49103559e5..f1ce705749d 100644 --- a/clients/drcachesim/tests/scheduler_unit_tests.cpp +++ b/clients/drcachesim/tests/scheduler_unit_tests.cpp @@ -406,6 +406,7 @@ test_regions_bare_no_marker() /* clang-format off */ make_thread(1), make_pid(1), + make_marker(TRACE_MARKER_TYPE_PAGE_SIZE, 4096), // This would not happen in a real trace, only in tests. But it does // match a dynamic skip from the middle when an instruction has already // been read but not yet passed to the output stream. @@ -663,14 +664,9 @@ test_regions_too_far() std::vector sched_inputs; sched_inputs.emplace_back(std::move(readers)); sched_inputs[0].thread_modifiers.push_back(scheduler_t::input_thread_info_t(regions)); - if (scheduler.init(sched_inputs, 1, - scheduler_t::make_scheduler_serial_options(/*verbosity=*/4)) != - scheduler_t::STATUS_SUCCESS) - assert(false); - auto *stream = scheduler.get_stream(0); - memref_t memref; - scheduler_t::stream_status_t status = stream->next_record(memref); - assert(status == scheduler_t::STATUS_REGION_INVALID); + auto status = scheduler.init( + sched_inputs, 1, scheduler_t::make_scheduler_serial_options(/*verbosity=*/4)); + assert(status == scheduler_t::STATUS_ERROR_RANGE_INVALID); } static void @@ -4607,6 +4603,129 @@ test_unscheduled_initially() } } +static void +test_unscheduled_initially_roi() +{ +#ifdef HAS_ZIP + std::cerr + << "\n----------------\nTesting initially-unscheduled + time deps with ROI\n"; + static constexpr int NUM_OUTPUTS = 1; + static constexpr memref_tid_t TID_BASE = 100; + static constexpr memref_tid_t TID_A = TID_BASE + 0; + static constexpr memref_tid_t TID_B = TID_BASE + 1; + std::vector refs_A = { + make_thread(TID_A), + make_pid(1), + make_version(TRACE_ENTRY_VERSION), + make_timestamp(1001), + make_marker(TRACE_MARKER_TYPE_CPU_ID, 0), + // A starts out unscheduled but we skip that. + // (In a real trace some other thread would have to wake up A: + // we omit that here to keep the test small.) + make_marker(TRACE_MARKER_TYPE_SYSCALL_UNSCHEDULE, 0), + make_timestamp(4202), + make_marker(TRACE_MARKER_TYPE_CPU_ID, 0), + make_instr(/*pc=*/101), + // We don't actually start until here. + make_instr(/*pc=*/102), + make_instr(/*pc=*/103), + make_exit(TID_A), + }; + std::vector refs_B = { + make_thread(TID_B), + make_pid(1), + make_version(TRACE_ENTRY_VERSION), + make_timestamp(3001), + make_marker(TRACE_MARKER_TYPE_CPU_ID, 0), + make_instr(/*pc=*/201), + make_timestamp(4001), + make_marker(TRACE_MARKER_TYPE_CPU_ID, 0), + make_instr(/*pc=*/202), + // B starts here, with a lower last timestamp than A. + make_instr(/*pc=*/203), + make_instr(/*pc=*/204), + make_exit(TID_B), + }; + // Instr counts are 1-based. + std::vector regions_A; + regions_A.emplace_back(2, 0); + std::vector regions_B; + regions_B.emplace_back(3, 0); + // B should run first due to the lower timestamp at its ROI despite A's + // start-of-trace timestamp being lower. + static const char *const CORE0_SCHED_STRING = "..BB...AA."; + + std::string record_fname = "tmp_test_unsched_ROI.zip"; + { + // Record. + std::vector readers; + readers.emplace_back(std::unique_ptr(new mock_reader_t(refs_A)), + std::unique_ptr(new mock_reader_t()), TID_A); + readers.emplace_back(std::unique_ptr(new mock_reader_t(refs_B)), + std::unique_ptr(new mock_reader_t()), TID_B); + std::vector sched_inputs; + sched_inputs.emplace_back(std::move(readers)); + sched_inputs.back().thread_modifiers.push_back( + scheduler_t::input_thread_info_t(TID_A, regions_A)); + sched_inputs.back().thread_modifiers.push_back( + scheduler_t::input_thread_info_t(TID_B, regions_B)); + scheduler_t::scheduler_options_t sched_ops(scheduler_t::MAP_TO_ANY_OUTPUT, + scheduler_t::DEPENDENCY_TIMESTAMPS, + scheduler_t::SCHEDULER_DEFAULTS, + /*verbosity=*/4); + zipfile_ostream_t outfile(record_fname); + sched_ops.schedule_record_ostream = &outfile; + scheduler_t scheduler; + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != + scheduler_t::STATUS_SUCCESS) + assert(false); + std::vector sched_as_string = + run_lockstep_simulation(scheduler, NUM_OUTPUTS, TID_BASE, /*send_time=*/true); + for (int i = 0; i < NUM_OUTPUTS; i++) { + std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n"; + } + assert(sched_as_string[0] == CORE0_SCHED_STRING); + if (scheduler.write_recorded_schedule() != scheduler_t::STATUS_SUCCESS) + assert(false); + } + { + replay_file_checker_t checker; + zipfile_istream_t infile(record_fname); + std::string res = checker.check(&infile); + if (!res.empty()) + std::cerr << "replay file checker failed: " << res; + assert(res.empty()); + } + { + // Test replay as it has complexities with skip records. + std::vector readers; + readers.emplace_back(std::unique_ptr(new mock_reader_t(refs_A)), + std::unique_ptr(new mock_reader_t()), TID_A); + readers.emplace_back(std::unique_ptr(new mock_reader_t(refs_B)), + std::unique_ptr(new mock_reader_t()), TID_B); + std::vector sched_inputs; + sched_inputs.emplace_back(std::move(readers)); + // The regions are ignored on replay so we do not specify them. + scheduler_t::scheduler_options_t sched_ops(scheduler_t::MAP_AS_PREVIOUSLY, + scheduler_t::DEPENDENCY_TIMESTAMPS, + scheduler_t::SCHEDULER_DEFAULTS, + /*verbosity=*/4); + zipfile_istream_t infile(record_fname); + sched_ops.schedule_replay_istream = &infile; + scheduler_t scheduler; + if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) != + scheduler_t::STATUS_SUCCESS) + assert(false); + std::vector sched_as_string = + run_lockstep_simulation(scheduler, NUM_OUTPUTS, TID_BASE, /*send_time=*/true); + for (int i = 0; i < NUM_OUTPUTS; i++) { + std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n"; + } + assert(sched_as_string[0] == CORE0_SCHED_STRING); + } +#endif +} + static void test_kernel_switch_sequences() { @@ -5119,6 +5238,7 @@ test_main(int argc, const char *argv[]) test_unscheduled(); test_unscheduled_fallback(); test_unscheduled_initially(); + test_unscheduled_initially_roi(); test_kernel_switch_sequences(); test_random_schedule(); test_record_scheduler();