patches for DPDK stable branches
 help / color / mirror / Atom feed
From: Rongwei Liu <rongweil@nvidia.com>
To: <dev@dpdk.org>, <matan@nvidia.com>, <viacheslavo@nvidia.com>,
	<orika@nvidia.com>, <suanmingm@nvidia.com>, <thomas@monjalon.net>
Cc: <rongweil@nvidia.com>, <stable@dpdk.org>,
	Dariusz Sosnowski <dsosnowski@nvidia.com>,
	Bing Zhao <bingz@nvidia.com>
Subject: [PATCH v1] net/mlx5: fix probe optimization race condition
Date: Thu, 28 Aug 2025 06:21:34 +0300	[thread overview]
Message-ID: <20250828032134.167999-1-rongweil@nvidia.com> (raw)

With dedicated RDMA link monitor, there are two threads
which can update the IB device port information.

Add a new flag to avoid the race condition. Update should
go through RDMA link monitor once ready.

Fixes: 51fb5c40c826 ("net/mlx5: optimize device probing")
Cc: rongweil@nvidia.com
Cc: stable@dpdk.org
Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/common/mlx5/linux/mlx5_nl.c     |  7 ++-
 drivers/common/mlx5/mlx5_common.h       |  1 +
 drivers/net/mlx5/linux/mlx5_ethdev_os.c | 69 ++++---------------------
 drivers/net/mlx5/linux/mlx5_os.c        |  9 +++-
 4 files changed, 25 insertions(+), 61 deletions(-)

diff --git a/drivers/common/mlx5/linux/mlx5_nl.c b/drivers/common/mlx5/linux/mlx5_nl.c
index dd69e229e3..84c12efdc7 100644
--- a/drivers/common/mlx5/linux/mlx5_nl.c
+++ b/drivers/common/mlx5/linux/mlx5_nl.c
@@ -1171,8 +1171,12 @@ mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex, struct mlx5_dev_info
 			data.ibindex = dev_info->ibindex;
 	}
 
+	/* Update should be done via monitor thread to avoid race condition */
+	if (dev_info->async_mon_ready) {
+		rte_errno = ENODEV;
+		return 0;
+	}
 	ret = mlx5_nl_port_info(nl, pindex, &data);
-
 	if (dev_info->probe_opt && !strcmp(dev_info->ibname, name)) {
 		if ((!ret || ret == -ENODEV) && dev_info->port_info &&
 		    pindex <= dev_info->port_num) {
@@ -1182,7 +1186,6 @@ mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex, struct mlx5_dev_info
 			dev_info->port_info[pindex].valid = 1;
 		}
 	}
-
 	return ret ? 0 : data.ifindex;
 }
 
diff --git a/drivers/common/mlx5/mlx5_common.h b/drivers/common/mlx5/mlx5_common.h
index bea1382911..b49f0c850e 100644
--- a/drivers/common/mlx5/mlx5_common.h
+++ b/drivers/common/mlx5/mlx5_common.h
@@ -185,6 +185,7 @@ struct mlx5_dev_info {
 	uint32_t ibindex;
 	char ibname[MLX5_FS_NAME_MAX];
 	uint8_t probe_opt;
+	uint8_t async_mon_ready;
 	struct mlx5_port_nl_info *port_info;
 };
 
diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
index a371c2c747..180fd60f3a 100644
--- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c
+++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
@@ -704,59 +704,6 @@ mlx5_link_update_bond(struct rte_eth_dev *dev)
 		((ifr.ifr_flags & IFF_UP) && (ifr.ifr_flags & IFF_RUNNING));
 }
 
-static void
-mlx5_handle_port_info_update(struct mlx5_dev_info *dev_info, uint32_t if_index,
-			     uint16_t msg_type)
-{
-	struct mlx5_switch_info info = {
-		.master = 0,
-		.representor = 0,
-		.name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET,
-		.port_name = 0,
-		.switch_id = 0,
-	};
-	uint32_t i;
-	int nl_route;
-
-	if (dev_info->port_num <= 1 || dev_info->port_info == NULL)
-		return;
-
-	DRV_LOG(DEBUG, "IB device %s ifindex %u received netlink event %u",
-			dev_info->ibname, if_index, msg_type);
-	for (i = 1; i <= dev_info->port_num; i++) {
-		if (!dev_info->port_info[i].valid)
-			continue;
-		if (dev_info->port_info[i].ifindex == if_index)
-			break;
-	}
-	if (msg_type == RTM_NEWLINK && i > dev_info->port_num) {
-		nl_route = mlx5_nl_init(NETLINK_ROUTE, 0);
-		if  (nl_route < 0)
-			goto flush_all;
-
-		if (mlx5_nl_switch_info(nl_route, if_index, &info)) {
-			if (mlx5_sysfs_switch_info(if_index, &info))
-				goto flush_all;
-		}
-
-		if (info.name_type == MLX5_PHYS_PORT_NAME_TYPE_PFSF ||
-		    info.name_type == MLX5_PHYS_PORT_NAME_TYPE_PFVF)
-			goto flush_all;
-		close(nl_route);
-	} else if (msg_type == RTM_DELLINK && i <= dev_info->port_num) {
-		memset(dev_info->port_info + i, 0, sizeof(struct mlx5_port_nl_info));
-	}
-
-	return;
-flush_all:
-	if (nl_route >= 0)
-		close(nl_route);
-	for (i = 1; i <= dev_info->port_num; i++) {
-		if (!dev_info->port_info[i].ifindex)
-			dev_info->port_info[i].valid = 0;
-	}
-}
-
 static void
 mlx5_dev_interrupt_nl_cb(struct nlmsghdr *hdr, void *cb_arg)
 {
@@ -766,8 +713,6 @@ mlx5_dev_interrupt_nl_cb(struct nlmsghdr *hdr, void *cb_arg)
 
 	if (mlx5_nl_parse_link_status_update(hdr, &if_index) < 0)
 		return;
-	if (sh->cdev->config.probe_opt && sh->cdev->dev_info.port_num > 1 && !sh->rdma_monitor_supp)
-		mlx5_handle_port_info_update(&sh->cdev->dev_info, if_index, hdr->nlmsg_type);
 
 	for (i = 0; i < sh->max_port; i++) {
 		struct mlx5_dev_shared_port *port = &sh->port[i];
@@ -970,10 +915,18 @@ mlx5_dev_interrupt_handler_ib(void *arg)
 		return;
 
 	if (data.event_type == MLX5_NL_RDMA_NETDEV_ATTACH_EVENT &&
-	    !(data.flags & MLX5_NL_CMD_GET_NET_INDEX))
+	    !(data.flags & MLX5_NL_CMD_GET_NET_INDEX)) {
+		DRV_LOG(WARNING, "Incomplete RDMA ATTACH event for ibdev[%d]",
+			dev_info->ibindex);
+		if (data.flags & MLX5_NL_CMD_GET_PORT_INDEX)
+			memset(dev_info->port_info + data.portnum, 0,
+			       sizeof(struct mlx5_port_nl_info));
+		else
+			goto flush_all;
 		return;
+	}
 
-	DRV_LOG(DEBUG, "Event info: type %d, ibindex %d, ifindex %d, portnum %d,",
+	DRV_LOG(INFO, "Event info: type %d, ibindex %d, ifindex %d, portnum %d,",
 		data.event_type, data.ibindex, data.ifindex, data.portnum);
 
 	/* Changes found in number of SF/VF ports. All information is likely unreliable. */
@@ -992,7 +945,7 @@ mlx5_dev_interrupt_handler_ib(void *arg)
 				goto flush_all;
 		}
 	} else if (data.event_type == MLX5_NL_RDMA_NETDEV_DETACH_EVENT) {
-		memset(dev_info->port_info + data.portnum, 0, sizeof(struct mlx5_port_nl_info));
+		dev_info->port_info[data.portnum].ifindex = 0;
 	}
 	return;
 
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 85b3fabaf5..edfe61ea55 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -3051,7 +3051,7 @@ mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh)
 		DRV_LOG(ERR, "Failed to allocate intr_handle.");
 		return;
 	}
-	if (sh->cdev->config.probe_opt &&
+	if (sh->cdev->dev_info.probe_opt &&
 	    sh->cdev->dev_info.port_num > 1 &&
 	    !sh->rdma_monitor_supp) {
 		nlsk_fd = mlx5_nl_rdma_monitor_init();
@@ -3076,8 +3076,15 @@ mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh)
 				close(nlsk_fd);
 				return;
 			}
+			sh->cdev->dev_info.async_mon_ready = 1;
 		} else {
 			close(nlsk_fd);
+			if (sh->cdev->dev_info.probe_opt) {
+				DRV_LOG(INFO, "Failed to create rdma link monitor, disable probe optimization");
+				sh->cdev->dev_info.probe_opt = 0;
+				mlx5_free(sh->cdev->dev_info.port_info);
+				sh->cdev->dev_info.port_info = NULL;
+			}
 		}
 	}
 	nlsk_fd = mlx5_nl_init(NETLINK_ROUTE, RTMGRP_LINK);
-- 
2.27.0


             reply	other threads:[~2025-08-28  3:23 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-28  3:21 Rongwei Liu [this message]
2025-08-28  4:40 ` Stephen Hemminger
2025-08-28  4:49   ` rongwei liu
2025-08-28 13:37     ` Thomas Monjalon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250828032134.167999-1-rongweil@nvidia.com \
    --to=rongweil@nvidia.com \
    --cc=bingz@nvidia.com \
    --cc=dev@dpdk.org \
    --cc=dsosnowski@nvidia.com \
    --cc=matan@nvidia.com \
    --cc=orika@nvidia.com \
    --cc=stable@dpdk.org \
    --cc=suanmingm@nvidia.com \
    --cc=thomas@monjalon.net \
    --cc=viacheslavo@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).