Commit 60f21a264930 for kernel

commit 60f21a2649308bbd84919ba6656d5ccd660953cf
Author: Tejun Heo <tj@kernel.org>
Date:   Mon Apr 27 14:16:34 2026 -1000

    cgroup, sched_ext: Include exiting tasks in cgroup iter

    a72f73c4dd9b ("cgroup: Don't expose dead tasks in cgroup") made
    css_task_iter_advance() skip exiting tasks so cgroup.procs stays consistent
    with waitpid() visibility. Unfortunately, this broke scx_task_iter.

    scx_task_iter walks either scx_tasks (global) or a cgroup subtree via
    css_task_iter() and the two modes are expected to cover the same set of
    tasks. After the above change the cgroup-scoped mode silently skips tasks
    past exit_signals() that are still on scx_tasks.

    scx_sub_enable_workfn()'s abort path is one of the symptoms: an exiting
    SCX_TASK_SUB_INIT task can race past the cgroup iter leaking
    __scx_init_task() state. Other iterations share the same gap.

    Add CSS_TASK_ITER_WITH_DEAD to opt out of the skip and use it from
    scx_task_iter().

    Fixes: b0e4c2f8a0f0 ("sched_ext: Implement cgroup subtree iteration for scx_task_iter")
    Reported-by: Cheng-Yang Chou <yphbchou0911@gmail.com>
    Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e52160e85af4..f6d037a30fd8 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -53,6 +53,7 @@ struct kernel_clone_args;
 enum css_task_iter_flags {
 	CSS_TASK_ITER_PROCS    = (1U << 0),  /* walk only threadgroup leaders */
 	CSS_TASK_ITER_THREADED = (1U << 1),  /* walk all threaded css_sets in the domain */
+	CSS_TASK_ITER_WITH_DEAD = (1U << 2),  /* include exiting tasks */
 	CSS_TASK_ITER_SKIPPED  = (1U << 16), /* internal flags */
 };

diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 1f084ee71443..e51ce4cd3739 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -5059,10 +5059,12 @@ static void css_task_iter_advance(struct css_task_iter *it)

 	task = list_entry(it->task_pos, struct task_struct, cg_list);
 	/*
-	 * Hide tasks that are exiting but not yet removed. Keep zombie
-	 * leaders with live threads visible.
+	 * Hide tasks that are exiting but not yet removed by default. Keep
+	 * zombie leaders with live threads visible. Usages that need to walk
+	 * every existing task can opt out via CSS_TASK_ITER_WITH_DEAD.
 	 */
-	if ((task->flags & PF_EXITING) && !atomic_read(&task->signal->live))
+	if (!(it->flags & CSS_TASK_ITER_WITH_DEAD) &&
+	    (task->flags & PF_EXITING) && !atomic_read(&task->signal->live))
 		goto repeat;

 	if (it->flags & CSS_TASK_ITER_PROCS) {
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 9483be03a4ca..dc5d4787296b 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -766,7 +766,8 @@ static void scx_task_iter_start(struct scx_task_iter *iter, struct cgroup *cgrp)
 		lockdep_assert_held(&cgroup_mutex);
 		iter->cgrp = cgrp;
 		iter->css_pos = css_next_descendant_pre(NULL, &iter->cgrp->self);
-		css_task_iter_start(iter->css_pos, 0, &iter->css_iter);
+		css_task_iter_start(iter->css_pos, CSS_TASK_ITER_WITH_DEAD,
+				    &iter->css_iter);
 		return;
 	}
 #endif
@@ -866,7 +867,8 @@ static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter)
 			iter->css_pos = css_next_descendant_pre(iter->css_pos,
 								&iter->cgrp->self);
 			if (iter->css_pos)
-				css_task_iter_start(iter->css_pos, 0, &iter->css_iter);
+				css_task_iter_start(iter->css_pos, CSS_TASK_ITER_WITH_DEAD,
+						    &iter->css_iter);
 		}
 		return NULL;
 	}