Commit 129cdce9da9e for kernel
commit 129cdce9da9e44c52d38889e0411be9817bca114
Author: Jakub Kicinski <kuba@kernel.org>
Date: Wed Jun 24 11:20:16 2026 -0700
net: add the driver-facing netdev_work scheduling API
With an extra event mask we can easily extend the netdev work
to also service driver-defined events. For advanced drivers
this is probably not a perfect match, but it makes running
deferred work easier in simple cases.
Expose the netdev_work facility to drivers. Add helpers
to schedule work and a dedicated ndo to perform the driver-
-scheduled actions.
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20260624182018.2445732-3-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 732506787db3..9981d637f8b5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1131,6 +1131,9 @@ struct netdev_net_notifier {
* netdev_hw_addr_list_for_each(ha, uc). Return 0 on success or a
* negative errno to request a retry via the core backoff.
*
+ * void (*ndo_work)(struct net_device *dev, unsigned long events);
+ * Run deferred work scheduled with netdev_work_sched(@events).
+ *
* int (*ndo_set_mac_address)(struct net_device *dev, void *addr);
* This function is called when the Media Access Control address
* needs to be changed. If this interface is not defined, the
@@ -1460,6 +1463,8 @@ struct net_device_ops {
struct net_device *dev,
struct netdev_hw_addr_list *uc,
struct netdev_hw_addr_list *mc);
+ void (*ndo_work)(struct net_device *dev,
+ unsigned long events);
int (*ndo_set_mac_address)(struct net_device *dev,
void *addr);
int (*ndo_validate_addr)(struct net_device *dev);
@@ -1932,6 +1937,8 @@ enum netdev_reg_state {
* does not implement ndo_set_rx_mode()
* @work_node: List entry for async netdev_work processing
* @work_tracker: Refcount tracker for async netdev_work
+ * @work_pending: Driver-defined pending netdev_work, passed to
+ * ndo_work() (see netdev_work_sched())
* @work_core_pending: Core-defined pending netdev_work (NETDEV_WORK_*)
* @rx_mode_addr_cache: Recycled snapshot entries for rx_mode work
* @rx_mode_retry_timer: Timer that re-queues rx_mode work after failure
@@ -2329,6 +2336,7 @@ struct net_device {
bool uc_promisc;
struct list_head work_node;
netdevice_tracker work_tracker;
+ unsigned long work_pending;
unsigned long work_core_pending;
struct netdev_hw_addr_list rx_mode_addr_cache;
struct timer_list rx_mode_retry_timer;
@@ -5178,6 +5186,9 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
const struct pcpu_sw_netstats __percpu *netstats);
void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s);
+void netdev_work_sched(struct net_device *dev, unsigned long events);
+unsigned long netdev_work_cancel(struct net_device *dev, unsigned long mask);
+
enum {
NESTED_SYNC_IMM_BIT,
NESTED_SYNC_TODO_BIT,
diff --git a/net/core/netdev_work.c b/net/core/netdev_work.c
index c121c24dc493..3109fae132ad 100644
--- a/net/core/netdev_work.c
+++ b/net/core/netdev_work.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/export.h>
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
@@ -16,32 +17,63 @@ static void netdev_work_proc(struct work_struct *work);
* - within the list entries (struct net_device fields):
* - work_node
* - work_tracker
+ * - work_pending
* - work_core_pending
*/
static LIST_HEAD(netdev_work_list);
static DEFINE_SPINLOCK(netdev_work_lock);
static DECLARE_WORK(netdev_work, netdev_work_proc);
-void __netdev_work_core_sched(struct net_device *dev, unsigned long event)
+static void netdev_work_enqueue(struct net_device *dev, unsigned long events,
+ unsigned long core)
{
+ if (!events && !core)
+ return;
+
spin_lock_bh(&netdev_work_lock);
if (list_empty(&dev->work_node)) {
list_add_tail(&dev->work_node, &netdev_work_list);
netdev_hold(dev, &dev->work_tracker, GFP_ATOMIC);
}
- dev->work_core_pending |= event;
+ dev->work_pending |= events;
+ dev->work_core_pending |= core;
spin_unlock_bh(&netdev_work_lock);
schedule_work(&netdev_work);
}
+static unsigned long
+netdev_work_dequeue(struct net_device *dev, unsigned long *pending,
+ unsigned long mask)
+{
+ unsigned long events;
+
+ spin_lock_bh(&netdev_work_lock);
+ events = *pending & mask;
+ *pending &= ~events;
+ if (!list_empty(&dev->work_node) &&
+ !dev->work_pending && !dev->work_core_pending) {
+ list_del_init(&dev->work_node);
+ netdev_put(dev, &dev->work_tracker);
+ }
+ spin_unlock_bh(&netdev_work_lock);
+
+ return events;
+}
+
+void netdev_work_sched(struct net_device *dev, unsigned long events)
+{
+ netdev_work_enqueue(dev, events, 0);
+}
+EXPORT_SYMBOL(netdev_work_sched);
+
/**
- * __netdev_work_core_cancel() - cancel selected core work for a netdev
+ * netdev_work_cancel() - cancel selected work for a netdev
* @dev: net_device
* @mask: events to cancel
*
* Clear @mask from the device's work pending mask. If no work is left pending
- * the device is dequeued.
+ * the device is dequeued and its ndo_work won't be called.
*
* No expectations on locking, but also no guarantees provided. If the caller
* wants to touch @dev afterwards (e.g. call the work that got canceled)
@@ -50,21 +82,33 @@ void __netdev_work_core_sched(struct net_device *dev, unsigned long event)
* Returns: the subset of @mask that was actually pending, so the caller can run
* those events inline.
*/
+unsigned long netdev_work_cancel(struct net_device *dev, unsigned long mask)
+{
+ return netdev_work_dequeue(dev, &dev->work_pending, mask);
+}
+EXPORT_SYMBOL(netdev_work_cancel);
+
+void __netdev_work_core_sched(struct net_device *dev, unsigned long events)
+{
+ netdev_work_enqueue(dev, 0, events);
+}
+
unsigned long
__netdev_work_core_cancel(struct net_device *dev, unsigned long mask)
{
- unsigned long event;
+ return netdev_work_dequeue(dev, &dev->work_core_pending, mask);
+}
- spin_lock_bh(&netdev_work_lock);
- event = dev->work_core_pending & mask;
- dev->work_core_pending &= ~mask;
- if (!list_empty(&dev->work_node) && !dev->work_core_pending) {
- list_del_init(&dev->work_node);
- netdev_put(dev, &dev->work_tracker);
- }
- spin_unlock_bh(&netdev_work_lock);
+static void netdev_work_run(struct net_device *dev, unsigned long events,
+ unsigned long core)
+{
+ if (!netif_device_present(dev))
+ return;
- return event;
+ if (core & NETDEV_WORK_RX_MODE)
+ netif_rx_mode_run(dev);
+ if (events && dev->netdev_ops->ndo_work)
+ dev->netdev_ops->ndo_work(dev, events);
}
static void netdev_work_proc(struct work_struct *work)
@@ -72,9 +116,9 @@ static void netdev_work_proc(struct work_struct *work)
rtnl_lock();
while (true) {
+ unsigned long events = 0, core = 0;
netdevice_tracker tracker;
struct net_device *dev;
- unsigned long core = 0;
spin_lock_bh(&netdev_work_lock);
if (list_empty(&netdev_work_list)) {
@@ -98,16 +142,17 @@ static void netdev_work_proc(struct work_struct *work)
list_del_init(&dev->work_node);
core = dev->work_core_pending;
dev->work_core_pending = 0;
+ events = dev->work_pending;
+ dev->work_pending = 0;
/* We took another ref above */
netdev_put(dev, &dev->work_tracker);
if (!dev_isalive(dev))
- core = 0;
+ core = events = 0;
}
spin_unlock_bh(&netdev_work_lock);
- if (core & NETDEV_WORK_RX_MODE)
- netif_rx_mode_run(dev);
+ netdev_work_run(dev, events, core);
netdev_unlock_ops(dev);
netdev_put(dev, &tracker);