aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched/sched.h
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2024-10-23 11:36:41 +0200
committerPeter Zijlstra <peterz@infradead.org>2024-10-23 20:52:26 +0200
commitb55945c500c5723992504aa03b362fab416863a6 (patch)
treea69e8e10526175d716041f0ce09e5e14030f6c57 /kernel/sched/sched.h
parentLinux 6.12-rc4 (diff)
downloadlinux-b55945c500c5723992504aa03b362fab416863a6.tar.gz
linux-b55945c500c5723992504aa03b362fab416863a6.zip
sched: Fix pick_next_task_fair() vs try_to_wake_up() race
Syzkaller robot reported KCSAN tripping over the ASSERT_EXCLUSIVE_WRITER(p->on_rq) in __block_task(). The report noted that both pick_next_task_fair() and try_to_wake_up() were concurrently trying to write to the same p->on_rq, violating the assertion -- even though both paths hold rq->__lock. The logical consequence is that both code paths end up holding a different rq->__lock. And looking through ttwu(), this is possible when the __block_task() 'p->on_rq = 0' store is visible to the ttwu() 'p->on_rq' load, which then assumes the task is not queued and continues to migrate it. Rearrange things such that __block_task() releases @p with the store and no code thereafter will use @p again. Fixes: 152e11f6df29 ("sched/fair: Implement delayed dequeue") Reported-by: syzbot+0ec1e96c2cdf5c0e512a@syzkaller.appspotmail.com Reported-by: Kent Overstreet <kent.overstreet@linux.dev> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Tested-by: Marco Elver <elver@google.com> Link: https://lkml.kernel.org/r/20241023093641.GE16066@noisy.programming.kicks-ass.net
Diffstat (limited to 'kernel/sched/sched.h')
-rw-r--r--kernel/sched/sched.h34
1 files changed, 32 insertions, 2 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 081519ffab46..9f9d1cc390b1 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2769,8 +2769,6 @@ static inline void sub_nr_running(struct rq *rq, unsigned count)
static inline void __block_task(struct rq *rq, struct task_struct *p)
{
- WRITE_ONCE(p->on_rq, 0);
- ASSERT_EXCLUSIVE_WRITER(p->on_rq);
if (p->sched_contributes_to_load)
rq->nr_uninterruptible++;
@@ -2778,6 +2776,38 @@ static inline void __block_task(struct rq *rq, struct task_struct *p)
atomic_inc(&rq->nr_iowait);
delayacct_blkio_start();
}
+
+ ASSERT_EXCLUSIVE_WRITER(p->on_rq);
+
+ /*
+ * The moment this write goes through, ttwu() can swoop in and migrate
+ * this task, rendering our rq->__lock ineffective.
+ *
+ * __schedule() try_to_wake_up()
+ * LOCK rq->__lock LOCK p->pi_lock
+ * pick_next_task()
+ * pick_next_task_fair()
+ * pick_next_entity()
+ * dequeue_entities()
+ * __block_task()
+ * RELEASE p->on_rq = 0 if (p->on_rq && ...)
+ * break;
+ *
+ * ACQUIRE (after ctrl-dep)
+ *
+ * cpu = select_task_rq();
+ * set_task_cpu(p, cpu);
+ * ttwu_queue()
+ * ttwu_do_activate()
+ * LOCK rq->__lock
+ * activate_task()
+ * STORE p->on_rq = 1
+ * UNLOCK rq->__lock
+ *
+ * Callers must ensure to not reference @p after this -- we no longer
+ * own it.
+ */
+ smp_store_release(&p->on_rq, 0);
}
extern void activate_task(struct rq *rq, struct task_struct *p, int flags);