diff --git a/clients/drcachesim/scheduler/scheduler.cpp b/clients/drcachesim/scheduler/scheduler.cpp
index 2b133f8eab6..37680dc06c0 100644
--- a/clients/drcachesim/scheduler/scheduler.cpp
+++ b/clients/drcachesim/scheduler/scheduler.cpp
@@ -1676,7 +1676,7 @@ scheduler_tmpl_t<RecordType, ReaderType>::process_next_initial_record(
             return false;
         }
     } else if (marker_type == TRACE_MARKER_TYPE_SYSCALL_UNSCHEDULE) {
-        if (options_.honor_direct_switches) {
+        if (options_.honor_direct_switches && options_.mapping != MAP_AS_PREVIOUSLY) {
             input.unscheduled = true;
             // Ignore this marker during regular processing.
             input.skip_next_unscheduled = true;
@@ -1701,6 +1701,29 @@ scheduler_tmpl_t<RecordType, ReaderType>::get_initial_input_content(
     // output stream(s).
     for (size_t i = 0; i < inputs_.size(); ++i) {
         input_info_t &input = inputs_[i];
+
+        // If the input jumps to the middle immediately, do that now so we'll have
+        // the proper start timestamp.
+        if (!input.regions_of_interest.empty() &&
+            // The docs say for replay we allow the user to pass ROI but ignore it.
+            // Maybe we should disallow it so we don't need checks like this?
+            options_.mapping != MAP_AS_PREVIOUSLY) {
+            RecordType record = create_invalid_record();
+            sched_type_t::stream_status_t res =
+                advance_region_of_interest(/*output=*/-1, record, input);
+            if (res == sched_type_t::STATUS_SKIPPED) {
+                input.next_timestamp =
+                    static_cast<uintptr_t>(input.reader->get_last_timestamp());
+                // We can skip the rest of the loop here (the filetype will be there
+                // in the stream).
+                continue;
+            }
+            if (res != sched_type_t::STATUS_OK) {
+                VPRINT(this, 1, "Failed to advance initial ROI with status %d\n", res);
+                return sched_type_t::STATUS_ERROR_RANGE_INVALID;
+            }
+        }
+
         bool found_filetype = false;
         bool found_timestamp = !gather_timestamps || input.next_timestamp > 0;
         if (process_next_initial_record(input, create_invalid_record(), found_filetype,
@@ -2078,30 +2101,60 @@ scheduler_tmpl_t<RecordType, ReaderType>::advance_region_of_interest(
         return sched_type_t::STATUS_OK;
 
     VPRINT(this, 2,
-           "skipping from %" PRIu64 " to %" PRIu64 " instrs (%" PRIu64
+           "skipping from %" PRId64 " to %" PRIu64 " instrs (%" PRIu64
            " in reader) for ROI\n",
            cur_instr, cur_range.start_instruction,
            cur_range.start_instruction - cur_reader_instr - 1);
     if (options_.schedule_record_ostream != nullptr) {
-        sched_type_t::stream_status_t status = close_schedule_segment(output, input);
-        if (status != sched_type_t::STATUS_OK)
-            return status;
-        status = record_schedule_segment(output, schedule_record_t::SKIP, input.index,
-                                         cur_instr, cur_range.start_instruction);
-        if (status != sched_type_t::STATUS_OK)
-            return status;
-        status = record_schedule_segment(output, schedule_record_t::DEFAULT, input.index,
-                                         cur_range.start_instruction);
-        if (status != sched_type_t::STATUS_OK)
-            return status;
+        if (output >= 0) {
+            record_schedule_skip(output, input.index, cur_instr,
+                                 cur_range.start_instruction);
+        } // Else, will be done in set_cur_input once assigned to an output.
     }
     if (cur_range.start_instruction < cur_reader_instr) {
         // We do not support skipping without skipping over the pre-read: we would
         // need to extract from the queue.
         return sched_type_t::STATUS_INVALID;
     }
-    return skip_instructions(output, input,
-                             cur_range.start_instruction - cur_reader_instr - 1);
+    return skip_instructions(input, cur_range.start_instruction - cur_reader_instr - 1);
+}
+
+template <typename RecordType, typename ReaderType>
+typename scheduler_tmpl_t<RecordType, ReaderType>::stream_status_t
+scheduler_tmpl_t<RecordType, ReaderType>::record_schedule_skip(output_ordinal_t output,
+                                                               input_ordinal_t input,
+                                                               uint64_t start_instruction,
+                                                               uint64_t stop_instruction)
+{
+    if (options_.schedule_record_ostream == nullptr)
+        return sched_type_t::STATUS_INVALID;
+    sched_type_t::stream_status_t status;
+    // Close any prior default record for this input.  If we switched inputs,
+    // we'll already have closed the prior in set_cur_input().
+    if (outputs_[output].record.back().type == schedule_record_t::DEFAULT &&
+        outputs_[output].record.back().key.input == input) {
+        status = close_schedule_segment(output, inputs_[input]);
+        if (status != sched_type_t::STATUS_OK)
+            return status;
+    }
+    if (outputs_[output].record.size() == 1) {
+        // Replay doesn't handle starting out with a skip record: we need a
+        // start=0,stop=0 dummy entry to get things rolling at the start of
+        // an output's records, if we're the first record after the version.
+        assert(outputs_[output].record.back().type == schedule_record_t::VERSION);
+        status = record_schedule_segment(output, schedule_record_t::DEFAULT, input, 0, 0);
+        if (status != sched_type_t::STATUS_OK)
+            return status;
+    }
+    status = record_schedule_segment(output, schedule_record_t::SKIP, input,
+                                     start_instruction, stop_instruction);
+    if (status != sched_type_t::STATUS_OK)
+        return status;
+    status = record_schedule_segment(output, schedule_record_t::DEFAULT, input,
+                                     stop_instruction);
+    if (status != sched_type_t::STATUS_OK)
+        return status;
+    return sched_type_t::STATUS_OK;
 }
 
 template <typename RecordType, typename ReaderType>
@@ -2124,8 +2177,7 @@ scheduler_tmpl_t<RecordType, ReaderType>::clear_input_queue(input_info_t &input)
 
 template <typename RecordType, typename ReaderType>
 typename scheduler_tmpl_t<RecordType, ReaderType>::stream_status_t
-scheduler_tmpl_t<RecordType, ReaderType>::skip_instructions(output_ordinal_t output,
-                                                            input_info_t &input,
+scheduler_tmpl_t<RecordType, ReaderType>::skip_instructions(input_info_t &input,
                                                             uint64_t skip_amount)
 {
     // reader_t::at_eof_ is true until init() is called.
@@ -2161,28 +2213,11 @@ scheduler_tmpl_t<RecordType, ReaderType>::skip_instructions(output_ordinal_t out
         }
     }
     input.in_cur_region = true;
-    auto *stream = outputs_[output].stream;
 
     // We've documented that an output stream's ordinals ignore skips in its input
     // streams, so we do not need to remember the input's ordinals pre-skip and increase
     // our output's ordinals commensurately post-skip.
 
-    // If we skipped from the start we may not have seen the initial headers:
-    // use the input's cached copies.
-    // We set the version and filetype up front for outputs with
-    // an initial input, so we check a different field to detect a
-    // skip.
-    if (stream->cache_line_size_ == 0 ||
-        // Check the version too as a fallback for inputs with no cache size.
-        stream->version_ == 0) {
-        stream->version_ = input.reader->get_version();
-        stream->last_timestamp_ = input.reader->get_last_timestamp();
-        stream->first_timestamp_ = input.reader->get_first_timestamp();
-        stream->filetype_ = input.reader->get_filetype();
-        stream->cache_line_size_ = input.reader->get_cache_line_size();
-        stream->chunk_instr_count_ = input.reader->get_chunk_instr_count();
-        stream->page_size_ = input.reader->get_page_size();
-    }
     // We let the user know we've skipped.  There's no discontinuity for the
     // first one so we do not insert a marker there (if we do want to insert one,
     // we need to update the view tool to handle a window marker as the very
@@ -2229,6 +2264,10 @@ scheduler_tmpl_t<RecordType, ReaderType>::record_schedule_segment(
         // idle records quickly balloon the file.
         return sched_type_t::STATUS_OK;
     }
+    VPRINT(this, 4,
+           "recording out=%d type=%d input=%d start=%" PRIu64 " stop=%" PRIu64
+           " time=%" PRIu64 "\n",
+           output, type, input, start_instruction, stop_instruction, timestamp);
     outputs_[output].record.emplace_back(type, input, start_instruction, stop_instruction,
                                          timestamp);
     // The stop is typically updated later in close_schedule_segment().
@@ -2275,9 +2314,11 @@ scheduler_tmpl_t<RecordType, ReaderType>::close_schedule_segment(output_ordinal_
             input.index);
         ++instr_ord;
     }
-    VPRINT(
-        this, 3, "close_schedule_segment: input=%d start=%" PRId64 " stop=%" PRId64 "\n",
-        input.index, outputs_[output].record.back().value.start_instruction, instr_ord);
+    VPRINT(this, 3,
+           "close_schedule_segment: input=%d type=%d start=%" PRIu64 " stop=%" PRIu64
+           "\n",
+           input.index, outputs_[output].record.back().type,
+           outputs_[output].record.back().value.start_instruction, instr_ord);
     // Check for empty default entries, except the starter 0,0 ones.
     assert(outputs_[output].record.back().type != schedule_record_t::DEFAULT ||
            outputs_[output].record.back().value.start_instruction < instr_ord ||
@@ -2500,11 +2541,17 @@ scheduler_tmpl_t<RecordType, ReaderType>::set_cur_input(output_ordinal_t output,
 
     std::lock_guard<std::mutex> lock(*inputs_[input].lock);
 
-    if (prev_input < 0 && outputs_[output].stream->filetype_ == 0) {
+    if (prev_input < 0 && outputs_[output].stream->version_ == 0) {
         // Set the version and filetype up front, to let the user query at init time
-        // as documented.
-        outputs_[output].stream->version_ = inputs_[input].reader->get_version();
-        outputs_[output].stream->filetype_ = inputs_[input].reader->get_filetype();
+        // as documented.  Also set the other fields in case we did a skip for ROI.
+        auto *stream = outputs_[output].stream;
+        stream->version_ = inputs_[input].reader->get_version();
+        stream->last_timestamp_ = inputs_[input].reader->get_last_timestamp();
+        stream->first_timestamp_ = inputs_[input].reader->get_first_timestamp();
+        stream->filetype_ = inputs_[input].reader->get_filetype();
+        stream->cache_line_size_ = inputs_[input].reader->get_cache_line_size();
+        stream->chunk_instr_count_ = inputs_[input].reader->get_chunk_instr_count();
+        stream->page_size_ = inputs_[input].reader->get_page_size();
     }
 
     if (inputs_[input].pid != INVALID_PID) {
@@ -2541,14 +2588,26 @@ scheduler_tmpl_t<RecordType, ReaderType>::set_cur_input(output_ordinal_t output,
     }
 
     inputs_[input].prev_time_in_quantum = outputs_[output].cur_time;
+
     if (options_.schedule_record_ostream != nullptr) {
         uint64_t instr_ord = get_instr_ordinal(inputs_[input]);
         VPRINT(this, 3, "set_cur_input: recording input=%d start=%" PRId64 "\n", input,
                instr_ord);
-        sched_type_t::stream_status_t status =
-            record_schedule_segment(output, schedule_record_t::DEFAULT, input, instr_ord);
-        if (status != sched_type_t::STATUS_OK)
-            return status;
+        if (!inputs_[input].regions_of_interest.empty() &&
+            inputs_[input].cur_region == 0 && inputs_[input].in_cur_region &&
+            (instr_ord == inputs_[input].regions_of_interest[0].start_instruction ||
+             // The ord may be 1 less because we're still on the inserted timestamp.
+             instr_ord + 1 == inputs_[input].regions_of_interest[0].start_instruction)) {
+            // We skipped during init but didn't have an output for recording the skip:
+            // record it now.
+            record_schedule_skip(output, input, 0,
+                                 inputs_[input].regions_of_interest[0].start_instruction);
+        } else {
+            sched_type_t::stream_status_t status = record_schedule_segment(
+                output, schedule_record_t::DEFAULT, input, instr_ord);
+            if (status != sched_type_t::STATUS_OK)
+                return status;
+        }
     }
     return STATUS_OK;
 }
@@ -2665,7 +2724,7 @@ scheduler_tmpl_t<RecordType, ReaderType>::pick_next_input_as_previously(
                "next_record[%d]: skipping from %" PRId64 " to %" PRId64
                " in %d for schedule\n",
                output, cur_reader_instr, segment.stop_instruction, index);
-        auto status = skip_instructions(output, inputs_[index],
+        auto status = skip_instructions(inputs_[index],
                                         segment.stop_instruction - cur_reader_instr -
                                             1 /*exclusive*/);
         // Increment the region to get window id markers with ordinals.
diff --git a/clients/drcachesim/scheduler/scheduler.h b/clients/drcachesim/scheduler/scheduler.h
index 5611b664180..a716fa84449 100644
--- a/clients/drcachesim/scheduler/scheduler.h
+++ b/clients/drcachesim/scheduler/scheduler.h
@@ -104,6 +104,7 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
         STATUS_ERROR_FILE_READ_FAILED,  /**< Error: file read failed. */
         STATUS_ERROR_NOT_IMPLEMENTED,   /**< Error: not implemented. */
         STATUS_ERROR_FILE_WRITE_FAILED, /**< Error: file write failed. */
+        STATUS_ERROR_RANGE_INVALID,     /**< Error: region of interest invalid. */
     };
 
     /**
@@ -204,6 +205,12 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
         {
         }
         /** Convenience constructor for common usage. */
+        input_thread_info_t(memref_tid_t tid, std::vector<range_t> regions)
+            : tids(1, tid)
+            , regions_of_interest(regions)
+        {
+        }
+        /** Convenience constructor for common usage. */
         input_thread_info_t(memref_tid_t tid, int priority)
             : tids(1, tid)
             , priority(priority)
@@ -252,6 +259,13 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
          * separation, but it is not inserted prior to the first range.  A
          * #dynamorio::drmemtrace::TRACE_TYPE_THREAD_EXIT record is inserted after the
          * final range.  These ranges must be non-overlapping and in increasing order.
+         *
+         * Be aware that selecting a subset of code can remove inter-input
+         * communication steps that could be required for forward progress.
+         * For example, if selected subsets include #TRACE_MARKER_TYPE_SYSCALL_UNSCHEDULE
+         * with no timeout but do not include a corresponding
+         * #TRACE_MARKER_TYPE_SYSCALL_SCHEDULE for wakeup, an input could remain
+         * unscheduled.
          */
         std::vector<range_t> regions_of_interest;
     };
@@ -1518,7 +1532,13 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
     // Does a direct skip, unconditionally.
     // The caller must hold the input.lock.
     stream_status_t
-    skip_instructions(output_ordinal_t output, input_info_t &input, uint64_t skip_amount);
+    skip_instructions(input_info_t &input, uint64_t skip_amount);
+
+    // Records an input skip in the output's recorded schedule.
+    // The caller must hold the input.lock.
+    stream_status_t
+    record_schedule_skip(output_ordinal_t output, input_ordinal_t input,
+                         uint64_t start_instruction, uint64_t stop_instruction);
 
     scheduler_status_t
     read_and_instantiate_traced_schedule();
diff --git a/clients/drcachesim/tests/scheduler_unit_tests.cpp b/clients/drcachesim/tests/scheduler_unit_tests.cpp
index b49103559e5..f1ce705749d 100644
--- a/clients/drcachesim/tests/scheduler_unit_tests.cpp
+++ b/clients/drcachesim/tests/scheduler_unit_tests.cpp
@@ -406,6 +406,7 @@ test_regions_bare_no_marker()
         /* clang-format off */
         make_thread(1),
         make_pid(1),
+        make_marker(TRACE_MARKER_TYPE_PAGE_SIZE, 4096),
         // This would not happen in a real trace, only in tests.  But it does
         // match a dynamic skip from the middle when an instruction has already
         // been read but not yet passed to the output stream.
@@ -663,14 +664,9 @@ test_regions_too_far()
     std::vector<scheduler_t::input_workload_t> sched_inputs;
     sched_inputs.emplace_back(std::move(readers));
     sched_inputs[0].thread_modifiers.push_back(scheduler_t::input_thread_info_t(regions));
-    if (scheduler.init(sched_inputs, 1,
-                       scheduler_t::make_scheduler_serial_options(/*verbosity=*/4)) !=
-        scheduler_t::STATUS_SUCCESS)
-        assert(false);
-    auto *stream = scheduler.get_stream(0);
-    memref_t memref;
-    scheduler_t::stream_status_t status = stream->next_record(memref);
-    assert(status == scheduler_t::STATUS_REGION_INVALID);
+    auto status = scheduler.init(
+        sched_inputs, 1, scheduler_t::make_scheduler_serial_options(/*verbosity=*/4));
+    assert(status == scheduler_t::STATUS_ERROR_RANGE_INVALID);
 }
 
 static void
@@ -4607,6 +4603,129 @@ test_unscheduled_initially()
     }
 }
 
+static void
+test_unscheduled_initially_roi()
+{
+#ifdef HAS_ZIP
+    std::cerr
+        << "\n----------------\nTesting initially-unscheduled + time deps with ROI\n";
+    static constexpr int NUM_OUTPUTS = 1;
+    static constexpr memref_tid_t TID_BASE = 100;
+    static constexpr memref_tid_t TID_A = TID_BASE + 0;
+    static constexpr memref_tid_t TID_B = TID_BASE + 1;
+    std::vector<trace_entry_t> refs_A = {
+        make_thread(TID_A),
+        make_pid(1),
+        make_version(TRACE_ENTRY_VERSION),
+        make_timestamp(1001),
+        make_marker(TRACE_MARKER_TYPE_CPU_ID, 0),
+        // A starts out unscheduled but we skip that.
+        // (In a real trace some other thread would have to wake up A:
+        // we omit that here to keep the test small.)
+        make_marker(TRACE_MARKER_TYPE_SYSCALL_UNSCHEDULE, 0),
+        make_timestamp(4202),
+        make_marker(TRACE_MARKER_TYPE_CPU_ID, 0),
+        make_instr(/*pc=*/101),
+        // We don't actually start until here.
+        make_instr(/*pc=*/102),
+        make_instr(/*pc=*/103),
+        make_exit(TID_A),
+    };
+    std::vector<trace_entry_t> refs_B = {
+        make_thread(TID_B),
+        make_pid(1),
+        make_version(TRACE_ENTRY_VERSION),
+        make_timestamp(3001),
+        make_marker(TRACE_MARKER_TYPE_CPU_ID, 0),
+        make_instr(/*pc=*/201),
+        make_timestamp(4001),
+        make_marker(TRACE_MARKER_TYPE_CPU_ID, 0),
+        make_instr(/*pc=*/202),
+        // B starts here, with a lower last timestamp than A.
+        make_instr(/*pc=*/203),
+        make_instr(/*pc=*/204),
+        make_exit(TID_B),
+    };
+    // Instr counts are 1-based.
+    std::vector<scheduler_t::range_t> regions_A;
+    regions_A.emplace_back(2, 0);
+    std::vector<scheduler_t::range_t> regions_B;
+    regions_B.emplace_back(3, 0);
+    // B should run first due to the lower timestamp at its ROI despite A's
+    // start-of-trace timestamp being lower.
+    static const char *const CORE0_SCHED_STRING = "..BB...AA.";
+
+    std::string record_fname = "tmp_test_unsched_ROI.zip";
+    {
+        // Record.
+        std::vector<scheduler_t::input_reader_t> readers;
+        readers.emplace_back(std::unique_ptr<mock_reader_t>(new mock_reader_t(refs_A)),
+                             std::unique_ptr<mock_reader_t>(new mock_reader_t()), TID_A);
+        readers.emplace_back(std::unique_ptr<mock_reader_t>(new mock_reader_t(refs_B)),
+                             std::unique_ptr<mock_reader_t>(new mock_reader_t()), TID_B);
+        std::vector<scheduler_t::input_workload_t> sched_inputs;
+        sched_inputs.emplace_back(std::move(readers));
+        sched_inputs.back().thread_modifiers.push_back(
+            scheduler_t::input_thread_info_t(TID_A, regions_A));
+        sched_inputs.back().thread_modifiers.push_back(
+            scheduler_t::input_thread_info_t(TID_B, regions_B));
+        scheduler_t::scheduler_options_t sched_ops(scheduler_t::MAP_TO_ANY_OUTPUT,
+                                                   scheduler_t::DEPENDENCY_TIMESTAMPS,
+                                                   scheduler_t::SCHEDULER_DEFAULTS,
+                                                   /*verbosity=*/4);
+        zipfile_ostream_t outfile(record_fname);
+        sched_ops.schedule_record_ostream = &outfile;
+        scheduler_t scheduler;
+        if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) !=
+            scheduler_t::STATUS_SUCCESS)
+            assert(false);
+        std::vector<std::string> sched_as_string =
+            run_lockstep_simulation(scheduler, NUM_OUTPUTS, TID_BASE, /*send_time=*/true);
+        for (int i = 0; i < NUM_OUTPUTS; i++) {
+            std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n";
+        }
+        assert(sched_as_string[0] == CORE0_SCHED_STRING);
+        if (scheduler.write_recorded_schedule() != scheduler_t::STATUS_SUCCESS)
+            assert(false);
+    }
+    {
+        replay_file_checker_t checker;
+        zipfile_istream_t infile(record_fname);
+        std::string res = checker.check(&infile);
+        if (!res.empty())
+            std::cerr << "replay file checker failed: " << res;
+        assert(res.empty());
+    }
+    {
+        // Test replay as it has complexities with skip records.
+        std::vector<scheduler_t::input_reader_t> readers;
+        readers.emplace_back(std::unique_ptr<mock_reader_t>(new mock_reader_t(refs_A)),
+                             std::unique_ptr<mock_reader_t>(new mock_reader_t()), TID_A);
+        readers.emplace_back(std::unique_ptr<mock_reader_t>(new mock_reader_t(refs_B)),
+                             std::unique_ptr<mock_reader_t>(new mock_reader_t()), TID_B);
+        std::vector<scheduler_t::input_workload_t> sched_inputs;
+        sched_inputs.emplace_back(std::move(readers));
+        // The regions are ignored on replay so we do not specify them.
+        scheduler_t::scheduler_options_t sched_ops(scheduler_t::MAP_AS_PREVIOUSLY,
+                                                   scheduler_t::DEPENDENCY_TIMESTAMPS,
+                                                   scheduler_t::SCHEDULER_DEFAULTS,
+                                                   /*verbosity=*/4);
+        zipfile_istream_t infile(record_fname);
+        sched_ops.schedule_replay_istream = &infile;
+        scheduler_t scheduler;
+        if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) !=
+            scheduler_t::STATUS_SUCCESS)
+            assert(false);
+        std::vector<std::string> sched_as_string =
+            run_lockstep_simulation(scheduler, NUM_OUTPUTS, TID_BASE, /*send_time=*/true);
+        for (int i = 0; i < NUM_OUTPUTS; i++) {
+            std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n";
+        }
+        assert(sched_as_string[0] == CORE0_SCHED_STRING);
+    }
+#endif
+}
+
 static void
 test_kernel_switch_sequences()
 {
@@ -5119,6 +5238,7 @@ test_main(int argc, const char *argv[])
     test_unscheduled();
     test_unscheduled_fallback();
     test_unscheduled_initially();
+    test_unscheduled_initially_roi();
     test_kernel_switch_sequences();
     test_random_schedule();
     test_record_scheduler();