Commit 2700b7e603af for kernel

commit 2700b7e603af39ca55fe9fc876ca123efd44680f
Author: Shay Drory <shayd@nvidia.com>
Date:   Tue Feb 24 13:46:48 2026 +0200

    net/mlx5: DR, Fix circular locking dependency in dump

    Fix a circular locking dependency between dbg_mutex and the domain
    rx/tx mutexes that could lead to a deadlock.

    The dump path in dr_dump_domain_all() was acquiring locks in the order:
      dbg_mutex -> rx.mutex -> tx.mutex

    While the table/matcher creation paths acquire locks in the order:
      rx.mutex -> tx.mutex -> dbg_mutex

    This inverted lock ordering creates a circular dependency. Fix this by
    changing dr_dump_domain_all() to acquire the domain lock before
    dbg_mutex, matching the order used in mlx5dr_table_create() and
    mlx5dr_matcher_create().

    Lockdep splat:
     ======================================================
     WARNING: possible circular locking dependency detected
     6.19.0-rc6net_next_e817c4e #1 Not tainted
     ------------------------------------------------------
     sos/30721 is trying to acquire lock:
     ffff888102df5900 (&dmn->info.rx.mutex){+.+.}-{4:4}, at:
    dr_dump_start+0x131/0x450 [mlx5_core]

     but task is already holding lock:
     ffff888102df5bc0 (&dmn->dump_info.dbg_mutex){+.+.}-{4:4}, at:
    dr_dump_start+0x10b/0x450 [mlx5_core]

     which lock already depends on the new lock.

     the existing dependency chain (in reverse order) is:

     -> #2 (&dmn->dump_info.dbg_mutex){+.+.}-{4:4}:
            __mutex_lock+0x91/0x1060
            mlx5dr_matcher_create+0x377/0x5e0 [mlx5_core]
            mlx5_cmd_dr_create_flow_group+0x62/0xd0 [mlx5_core]
            mlx5_create_flow_group+0x113/0x1c0 [mlx5_core]
            mlx5_chains_create_prio+0x453/0x2290 [mlx5_core]
            mlx5_chains_get_table+0x2e2/0x980 [mlx5_core]
            esw_chains_create+0x1e6/0x3b0 [mlx5_core]
            esw_create_offloads_fdb_tables.cold+0x62/0x63f [mlx5_core]
            esw_offloads_enable+0x76f/0xd20 [mlx5_core]
            mlx5_eswitch_enable_locked+0x35a/0x500 [mlx5_core]
            mlx5_devlink_eswitch_mode_set+0x561/0x950 [mlx5_core]
            devlink_nl_eswitch_set_doit+0x67/0xe0
            genl_family_rcv_msg_doit+0xe0/0x130
            genl_rcv_msg+0x188/0x290
            netlink_rcv_skb+0x4b/0xf0
            genl_rcv+0x24/0x40
            netlink_unicast+0x1ed/0x2c0
            netlink_sendmsg+0x210/0x450
            __sock_sendmsg+0x38/0x60
            __sys_sendto+0x119/0x180
            __x64_sys_sendto+0x20/0x30
            do_syscall_64+0x70/0xd00
            entry_SYSCALL_64_after_hwframe+0x4b/0x53

     -> #1 (&dmn->info.tx.mutex){+.+.}-{4:4}:
            __mutex_lock+0x91/0x1060
            mlx5dr_table_create+0x11d/0x530 [mlx5_core]
            mlx5_cmd_dr_create_flow_table+0x62/0x140 [mlx5_core]
            __mlx5_create_flow_table+0x46f/0x960 [mlx5_core]
            mlx5_create_flow_table+0x16/0x20 [mlx5_core]
            esw_create_offloads_fdb_tables+0x136/0x240 [mlx5_core]
            esw_offloads_enable+0x76f/0xd20 [mlx5_core]
            mlx5_eswitch_enable_locked+0x35a/0x500 [mlx5_core]
            mlx5_devlink_eswitch_mode_set+0x561/0x950 [mlx5_core]
            devlink_nl_eswitch_set_doit+0x67/0xe0
            genl_family_rcv_msg_doit+0xe0/0x130
            genl_rcv_msg+0x188/0x290
            netlink_rcv_skb+0x4b/0xf0
            genl_rcv+0x24/0x40
            netlink_unicast+0x1ed/0x2c0
            netlink_sendmsg+0x210/0x450
            __sock_sendmsg+0x38/0x60
            __sys_sendto+0x119/0x180
            __x64_sys_sendto+0x20/0x30
            do_syscall_64+0x70/0xd00
            entry_SYSCALL_64_after_hwframe+0x4b/0x53

     -> #0 (&dmn->info.rx.mutex){+.+.}-{4:4}:
            __lock_acquire+0x18b6/0x2eb0
            lock_acquire+0xd3/0x2c0
            __mutex_lock+0x91/0x1060
            dr_dump_start+0x131/0x450 [mlx5_core]
            seq_read_iter+0xe3/0x410
            seq_read+0xfb/0x130
            full_proxy_read+0x53/0x80
            vfs_read+0xba/0x330
            ksys_read+0x65/0xe0
            do_syscall_64+0x70/0xd00
            entry_SYSCALL_64_after_hwframe+0x4b/0x53

      Possible unsafe locking scenario:

            CPU0                    CPU1
            ----                    ----
       lock(&dmn->dump_info.dbg_mutex);
                                    lock(&dmn->info.tx.mutex);
                                    lock(&dmn->dump_info.dbg_mutex);
       lock(&dmn->info.rx.mutex);

                       *** DEADLOCK ***

    Fixes: 9222f0b27da2 ("net/mlx5: DR, Add support for dumping steering info")
    Signed-off-by: Shay Drory <shayd@nvidia.com>
    Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
    Reviewed-by: Alex Vesker <valex@nvidia.com>
    Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
    Reviewed-by: Simon Horman <horms@kernel.org>
    Link: https://patch.msgid.link/20260224114652.1787431-2-tariqt@nvidia.com
    Signed-off-by: Jakub Kicinski <kuba@kernel.org>

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c
index 8803fa071c50..18362e9c3314 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_dbg.c
@@ -1051,8 +1051,8 @@ static int dr_dump_domain_all(struct seq_file *file, struct mlx5dr_domain *dmn)
 	struct mlx5dr_table *tbl;
 	int ret;

-	mutex_lock(&dmn->dump_info.dbg_mutex);
 	mlx5dr_domain_lock(dmn);
+	mutex_lock(&dmn->dump_info.dbg_mutex);

 	ret = dr_dump_domain(file, dmn);
 	if (ret < 0)
@@ -1065,8 +1065,8 @@ static int dr_dump_domain_all(struct seq_file *file, struct mlx5dr_domain *dmn)
 	}

 unlock_mutex:
-	mlx5dr_domain_unlock(dmn);
 	mutex_unlock(&dmn->dump_info.dbg_mutex);
+	mlx5dr_domain_unlock(dmn);
 	return ret;
 }