Skip to content
This repository has been archived by the owner on Jun 18, 2024. It is now read-only.

Commit

Permalink
scx: Dump debug info after an abort
Browse files Browse the repository at this point in the history
When a scx scheduler gets aborted, it's difficult to tell what the system
was doing after the fact as normal operation is restored by reverting to the
default scheduler. Let's capture runqueue and runnable task states in a
debug dump buffer to aid debugging.

Signed-off-by: Tejun Heo <[email protected]>
  • Loading branch information
htejun committed Jan 22, 2024
1 parent 12d3dd0 commit 630f5ae
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 1 deletion.
3 changes: 3 additions & 0 deletions include/linux/sched/ext.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ struct scx_exit_info {

/* informational message */
char *msg;

/* debug dump */
char *dump;
};

/* sched_ext_ops.flags */
Expand Down
1 change: 1 addition & 0 deletions kernel/sched/build_policy.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <linux/livepatch.h>
#include <linux/pm.h>
#include <linux/psi.h>
#include <linux/seq_buf.h>
#include <linux/seqlock_api.h>
#include <linux/slab.h>
#include <linux/suspend.h>
Expand Down
104 changes: 103 additions & 1 deletion kernel/sched/ext.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ enum scx_internal_consts {

SCX_EXIT_BT_LEN = 64,
SCX_EXIT_MSG_LEN = 1024,
SCX_EXIT_DUMP_LEN = 32768,
};

enum scx_ops_enable_state {
Expand Down Expand Up @@ -3220,6 +3221,7 @@ static void scx_ops_bypass(bool bypass)

static void free_exit_info(struct scx_exit_info *ei)
{
kfree(ei->dump);
kfree(ei->msg);
kfree(ei->bt);
kfree(ei);
Expand All @@ -3235,8 +3237,9 @@ static struct scx_exit_info *alloc_exit_info(void)

ei->bt = kcalloc(sizeof(ei->bt[0]), SCX_EXIT_BT_LEN, GFP_KERNEL);
ei->msg = kzalloc(SCX_EXIT_MSG_LEN, GFP_KERNEL);
ei->dump = kzalloc(SCX_EXIT_DUMP_LEN, GFP_KERNEL);

if (!ei->bt || !ei->msg) {
if (!ei->bt || !ei->msg || !ei->dump) {
free_exit_info(ei);
return NULL;
}
Expand Down Expand Up @@ -3437,8 +3440,100 @@ static void scx_ops_disable(enum scx_exit_kind kind)
schedule_scx_ops_disable_work();
}

static void scx_dump_task(struct seq_buf *s, struct task_struct *p, char marker,
unsigned long now)
{
static unsigned long bt[SCX_EXIT_BT_LEN];
char dsq_id_buf[19] = "(n/a)";
unsigned int bt_len;
size_t avail, used;
char *buf;

if (p->scx.dsq)
scnprintf(dsq_id_buf, sizeof(dsq_id_buf), "0x%llx",
(unsigned long long)p->scx.dsq->id);

seq_buf_printf(s, "\n %c%c %-16s[%6d]: flags=0x%x dsq_flags=0x%x ops_state=0x%lx\n",
marker, task_state_to_char(p), p->comm, p->pid,
p->scx.flags, p->scx.dsq_flags,
atomic_long_read(&p->scx.ops_state));
seq_buf_printf(s, "%*srun_at=%+ldms dsq_id=%s\n", 30, "",
jiffies_delta_msecs(p->scx.runnable_at, now), dsq_id_buf);
seq_buf_printf(s, "%*ssticky/holding_cpu=%d/%d\n", 30, "",
p->scx.sticky_cpu, p->scx.holding_cpu);

bt_len = stack_trace_save_tsk(p, bt, SCX_EXIT_BT_LEN, 1);

avail = seq_buf_get_buf(s, &buf);
used = stack_trace_snprint(buf, avail, bt, bt_len, 3);
seq_buf_commit(s, used < avail ? used : -1);
}

static void scx_dump_state(struct scx_exit_info *ei)
{
const char trunc_marker[] = "\n\n~~~~ TRUNCATED ~~~~\n";
unsigned long now = jiffies;
struct seq_buf s;
size_t avail, used;
char *buf;
int cpu;

seq_buf_init(&s, ei->dump, SCX_EXIT_DUMP_LEN - sizeof(trunc_marker));

seq_buf_printf(&s, "%s[%d] triggered exit kind %d:\n %s (%s)\n\n",
current->comm, current->pid, ei->kind, ei->reason, ei->msg);
seq_buf_printf(&s, "Backtrace:\n");
avail = seq_buf_get_buf(&s, &buf);
used = stack_trace_snprint(buf, avail, ei->bt, ei->bt_len, 1);
seq_buf_commit(&s, used < avail ? used : -1);

seq_buf_printf(&s, "\nRunqueue states\n");
seq_buf_printf(&s, "---------------\n");

for_each_possible_cpu(cpu) {
struct rq *rq = cpu_rq(cpu);
struct rq_flags rf;
struct task_struct *p;

rq_lock(rq, &rf);

if (list_empty(&rq->scx.runnable_list) &&
rq->curr->sched_class == &idle_sched_class)
goto next;

seq_buf_printf(&s, "\nCPU %d: nr_run=%u flags=0x%x cpu_rel=%d pnt_seq=%lu curr=%s[%d](%ps)\n",
cpu, rq->scx.nr_running, rq->scx.flags,
rq->scx.cpu_released, rq->scx.pnt_seq,
rq->curr->comm, rq->curr->pid,
rq->curr->sched_class);
if (!cpumask_empty(rq->scx.cpus_to_kick))
seq_buf_printf(&s, " cpus_to_kick : %*pb\n",
cpumask_pr_args(rq->scx.cpus_to_kick));
if (!cpumask_empty(rq->scx.cpus_to_preempt))
seq_buf_printf(&s, " cpus_to_preempt: %*pb\n",
cpumask_pr_args(rq->scx.cpus_to_preempt));
if (!cpumask_empty(rq->scx.cpus_to_wait))
seq_buf_printf(&s, " cpus_to_wait : %*pb\n",
cpumask_pr_args(rq->scx.cpus_to_wait));

if (rq->curr->sched_class == &ext_sched_class)
scx_dump_task(&s, rq->curr, '*', now);

list_for_each_entry(p, &rq->scx.runnable_list, scx.runnable_node)
scx_dump_task(&s, p, ' ', now);
next:
rq_unlock(rq, &rf);
}

if (seq_buf_has_overflowed(&s)) {
used = strlen(seq_buf_str(&s));
memcpy(ei->dump + used, trunc_marker, sizeof(trunc_marker));
}
}

static void scx_ops_error_irq_workfn(struct irq_work *irq_work)
{
scx_dump_state(scx_exit_info);
schedule_scx_ops_disable_work();
}

Expand All @@ -3460,6 +3555,13 @@ __printf(2, 3) void scx_ops_error_kind(enum scx_exit_kind kind,
vscnprintf(ei->msg, SCX_EXIT_MSG_LEN, fmt, args);
va_end(args);

/*
* Set ei->kind and ->reason for scx_dump_state(). They'll be set again
* in scx_ops_disable_workfn().
*/
ei->kind = kind;
ei->reason = scx_exit_reason(ei->kind);

irq_work_queue(&scx_ops_error_irq_work);
}

Expand Down

0 comments on commit 630f5ae

Please sign in to comment.