Commit 129cdce9da9e for kernel

commit 129cdce9da9e44c52d38889e0411be9817bca114
Author: Jakub Kicinski <kuba@kernel.org>
Date:   Wed Jun 24 11:20:16 2026 -0700

    net: add the driver-facing netdev_work scheduling API

    With an extra event mask we can easily extend the netdev work
    to also service driver-defined events. For advanced drivers
    this is probably not a perfect match, but it makes running
    deferred work easier in simple cases.

    Expose the netdev_work facility to drivers. Add helpers
    to schedule work and a dedicated ndo to perform the driver-
    -scheduled actions.

    Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
    Acked-by: Stanislav Fomichev <sdf@fomichev.me>
    Link: https://patch.msgid.link/20260624182018.2445732-3-kuba@kernel.org
    Signed-off-by: Jakub Kicinski <kuba@kernel.org>

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 732506787db3..9981d637f8b5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1131,6 +1131,9 @@ struct netdev_net_notifier {
  *	netdev_hw_addr_list_for_each(ha, uc). Return 0 on success or a
  *	negative errno to request a retry via the core backoff.
  *
+ * void (*ndo_work)(struct net_device *dev, unsigned long events);
+ *	Run deferred work scheduled with netdev_work_sched(@events).
+ *
  * int (*ndo_set_mac_address)(struct net_device *dev, void *addr);
  *	This function  is called when the Media Access Control address
  *	needs to be changed. If this interface is not defined, the
@@ -1460,6 +1463,8 @@ struct net_device_ops {
 					struct net_device *dev,
 					struct netdev_hw_addr_list *uc,
 					struct netdev_hw_addr_list *mc);
+	void			(*ndo_work)(struct net_device *dev,
+					    unsigned long events);
 	int			(*ndo_set_mac_address)(struct net_device *dev,
 						       void *addr);
 	int			(*ndo_validate_addr)(struct net_device *dev);
@@ -1932,6 +1937,8 @@ enum netdev_reg_state {
  *				does not implement ndo_set_rx_mode()
  *	@work_node:		List entry for async netdev_work processing
  *	@work_tracker:		Refcount tracker for async netdev_work
+ *	@work_pending:		Driver-defined pending netdev_work, passed to
+ *				ndo_work() (see netdev_work_sched())
  *	@work_core_pending:	Core-defined pending netdev_work (NETDEV_WORK_*)
  *	@rx_mode_addr_cache:	Recycled snapshot entries for rx_mode work
  *	@rx_mode_retry_timer:	Timer that re-queues rx_mode work after failure
@@ -2329,6 +2336,7 @@ struct net_device {
 	bool			uc_promisc;
 	struct list_head	work_node;
 	netdevice_tracker	work_tracker;
+	unsigned long		work_pending;
 	unsigned long		work_core_pending;
 	struct netdev_hw_addr_list	rx_mode_addr_cache;
 	struct timer_list	rx_mode_retry_timer;
@@ -5178,6 +5186,9 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
 			   const struct pcpu_sw_netstats __percpu *netstats);
 void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s);

+void netdev_work_sched(struct net_device *dev, unsigned long events);
+unsigned long netdev_work_cancel(struct net_device *dev, unsigned long mask);
+
 enum {
 	NESTED_SYNC_IMM_BIT,
 	NESTED_SYNC_TODO_BIT,
diff --git a/net/core/netdev_work.c b/net/core/netdev_work.c
index c121c24dc493..3109fae132ad 100644
--- a/net/core/netdev_work.c
+++ b/net/core/netdev_work.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-or-later

+#include <linux/export.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
@@ -16,32 +17,63 @@ static void netdev_work_proc(struct work_struct *work);
  *  - within the list entries (struct net_device fields):
  *	- work_node
  *	- work_tracker
+ *	- work_pending
  *	- work_core_pending
  */
 static LIST_HEAD(netdev_work_list);
 static DEFINE_SPINLOCK(netdev_work_lock);
 static DECLARE_WORK(netdev_work, netdev_work_proc);

-void __netdev_work_core_sched(struct net_device *dev, unsigned long event)
+static void netdev_work_enqueue(struct net_device *dev, unsigned long events,
+				unsigned long core)
 {
+	if (!events && !core)
+		return;
+
 	spin_lock_bh(&netdev_work_lock);
 	if (list_empty(&dev->work_node)) {
 		list_add_tail(&dev->work_node, &netdev_work_list);
 		netdev_hold(dev, &dev->work_tracker, GFP_ATOMIC);
 	}
-	dev->work_core_pending |= event;
+	dev->work_pending |= events;
+	dev->work_core_pending |= core;
 	spin_unlock_bh(&netdev_work_lock);

 	schedule_work(&netdev_work);
 }

+static unsigned long
+netdev_work_dequeue(struct net_device *dev, unsigned long *pending,
+		    unsigned long mask)
+{
+	unsigned long events;
+
+	spin_lock_bh(&netdev_work_lock);
+	events = *pending & mask;
+	*pending &= ~events;
+	if (!list_empty(&dev->work_node) &&
+	    !dev->work_pending && !dev->work_core_pending) {
+		list_del_init(&dev->work_node);
+		netdev_put(dev, &dev->work_tracker);
+	}
+	spin_unlock_bh(&netdev_work_lock);
+
+	return events;
+}
+
+void netdev_work_sched(struct net_device *dev, unsigned long events)
+{
+	netdev_work_enqueue(dev, events, 0);
+}
+EXPORT_SYMBOL(netdev_work_sched);
+
 /**
- * __netdev_work_core_cancel() - cancel selected core work for a netdev
+ * netdev_work_cancel() - cancel selected work for a netdev
  * @dev: net_device
  * @mask: events to cancel
  *
  * Clear @mask from the device's work pending mask. If no work is left pending
- * the device is dequeued.
+ * the device is dequeued and its ndo_work won't be called.
  *
  * No expectations on locking, but also no guarantees provided. If the caller
  * wants to touch @dev afterwards (e.g. call the work that got canceled)
@@ -50,21 +82,33 @@ void __netdev_work_core_sched(struct net_device *dev, unsigned long event)
  * Returns: the subset of @mask that was actually pending, so the caller can run
  * those events inline.
  */
+unsigned long netdev_work_cancel(struct net_device *dev, unsigned long mask)
+{
+	return netdev_work_dequeue(dev, &dev->work_pending, mask);
+}
+EXPORT_SYMBOL(netdev_work_cancel);
+
+void __netdev_work_core_sched(struct net_device *dev, unsigned long events)
+{
+	netdev_work_enqueue(dev, 0, events);
+}
+
 unsigned long
 __netdev_work_core_cancel(struct net_device *dev, unsigned long mask)
 {
-	unsigned long event;
+	return netdev_work_dequeue(dev, &dev->work_core_pending, mask);
+}

-	spin_lock_bh(&netdev_work_lock);
-	event = dev->work_core_pending & mask;
-	dev->work_core_pending &= ~mask;
-	if (!list_empty(&dev->work_node) && !dev->work_core_pending) {
-		list_del_init(&dev->work_node);
-		netdev_put(dev, &dev->work_tracker);
-	}
-	spin_unlock_bh(&netdev_work_lock);
+static void netdev_work_run(struct net_device *dev, unsigned long events,
+			    unsigned long core)
+{
+	if (!netif_device_present(dev))
+		return;

-	return event;
+	if (core & NETDEV_WORK_RX_MODE)
+		netif_rx_mode_run(dev);
+	if (events && dev->netdev_ops->ndo_work)
+		dev->netdev_ops->ndo_work(dev, events);
 }

 static void netdev_work_proc(struct work_struct *work)
@@ -72,9 +116,9 @@ static void netdev_work_proc(struct work_struct *work)
 	rtnl_lock();

 	while (true) {
+		unsigned long events = 0, core = 0;
 		netdevice_tracker tracker;
 		struct net_device *dev;
-		unsigned long core = 0;

 		spin_lock_bh(&netdev_work_lock);
 		if (list_empty(&netdev_work_list)) {
@@ -98,16 +142,17 @@ static void netdev_work_proc(struct work_struct *work)
 			list_del_init(&dev->work_node);
 			core = dev->work_core_pending;
 			dev->work_core_pending = 0;
+			events = dev->work_pending;
+			dev->work_pending = 0;
 			/* We took another ref above */
 			netdev_put(dev, &dev->work_tracker);

 			if (!dev_isalive(dev))
-				core = 0;
+				core = events = 0;
 		}
 		spin_unlock_bh(&netdev_work_lock);

-		if (core & NETDEV_WORK_RX_MODE)
-			netif_rx_mode_run(dev);
+		netdev_work_run(dev, events, core);
 		netdev_unlock_ops(dev);

 		netdev_put(dev, &tracker);