DPDK patches and discussions
 help / color / mirror / Atom feed
From: Rongwei Liu <rongweil@nvidia.com>
To: <dev@dpdk.org>, <matan@nvidia.com>, <viacheslavo@nvidia.com>,
	<orika@nvidia.com>, <suanmingm@nvidia.com>, <thomas@monjalon.net>
Cc: <rongweil@nvidia.com>, <stable@dpdk.org>,
	Dariusz Sosnowski <dsosnowski@nvidia.com>,
	Bing Zhao <bingz@nvidia.com>
Subject: [PATCH v2] net/mlx5: fix probe optimization race condition
Date: Fri, 29 Aug 2025 08:35:32 +0300	[thread overview]
Message-ID: <20250829053532.445865-1-rongweil@nvidia.com> (raw)
In-Reply-To: <23111985.hxa6pUQ8Du@thomas>

With dedicated RDMA link monitor, there are two threads
which can update the IB device port information.

Add a new flag to avoid the race condition. Update should
go through RDMA link monitor once ready.

Current logic is:
1. Update all port information in probing thread.
2. Probe thread initiates the dedicated rdma monitor thread.
   Once ready, port information update will be handled by this thread.
3. Next probing won't trigger PMD port information update.

No lock is required.

Fixes: 51fb5c40c826 ("common/mlx5: optimize device probing")
Cc: rongweil@nvidia.com
Cc: stable@dpdk.org
Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/common/mlx5/linux/mlx5_nl.c     |  7 ++-
 drivers/common/mlx5/mlx5_common.h       |  1 +
 drivers/net/mlx5/linux/mlx5_ethdev_os.c | 69 ++++---------------------
 drivers/net/mlx5/linux/mlx5_os.c        |  9 +++-
 4 files changed, 25 insertions(+), 61 deletions(-)

diff --git a/drivers/common/mlx5/linux/mlx5_nl.c b/drivers/common/mlx5/linux/mlx5_nl.c
index dd69e229e3..84c12efdc7 100644
--- a/drivers/common/mlx5/linux/mlx5_nl.c
+++ b/drivers/common/mlx5/linux/mlx5_nl.c
@@ -1171,8 +1171,12 @@ mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex, struct mlx5_dev_info
 			data.ibindex = dev_info->ibindex;
 	}
 
+	/* Update should be done via monitor thread to avoid race condition */
+	if (dev_info->async_mon_ready) {
+		rte_errno = ENODEV;
+		return 0;
+	}
 	ret = mlx5_nl_port_info(nl, pindex, &data);
-
 	if (dev_info->probe_opt && !strcmp(dev_info->ibname, name)) {
 		if ((!ret || ret == -ENODEV) && dev_info->port_info &&
 		    pindex <= dev_info->port_num) {
@@ -1182,7 +1186,6 @@ mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex, struct mlx5_dev_info
 			dev_info->port_info[pindex].valid = 1;
 		}
 	}
-
 	return ret ? 0 : data.ifindex;
 }
 
diff --git a/drivers/common/mlx5/mlx5_common.h b/drivers/common/mlx5/mlx5_common.h
index bea1382911..b49f0c850e 100644
--- a/drivers/common/mlx5/mlx5_common.h
+++ b/drivers/common/mlx5/mlx5_common.h
@@ -185,6 +185,7 @@ struct mlx5_dev_info {
 	uint32_t ibindex;
 	char ibname[MLX5_FS_NAME_MAX];
 	uint8_t probe_opt;
+	uint8_t async_mon_ready;
 	struct mlx5_port_nl_info *port_info;
 };
 
diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
index a371c2c747..180fd60f3a 100644
--- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c
+++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
@@ -704,59 +704,6 @@ mlx5_link_update_bond(struct rte_eth_dev *dev)
 		((ifr.ifr_flags & IFF_UP) && (ifr.ifr_flags & IFF_RUNNING));
 }
 
-static void
-mlx5_handle_port_info_update(struct mlx5_dev_info *dev_info, uint32_t if_index,
-			     uint16_t msg_type)
-{
-	struct mlx5_switch_info info = {
-		.master = 0,
-		.representor = 0,
-		.name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET,
-		.port_name = 0,
-		.switch_id = 0,
-	};
-	uint32_t i;
-	int nl_route;
-
-	if (dev_info->port_num <= 1 || dev_info->port_info == NULL)
-		return;
-
-	DRV_LOG(DEBUG, "IB device %s ifindex %u received netlink event %u",
-			dev_info->ibname, if_index, msg_type);
-	for (i = 1; i <= dev_info->port_num; i++) {
-		if (!dev_info->port_info[i].valid)
-			continue;
-		if (dev_info->port_info[i].ifindex == if_index)
-			break;
-	}
-	if (msg_type == RTM_NEWLINK && i > dev_info->port_num) {
-		nl_route = mlx5_nl_init(NETLINK_ROUTE, 0);
-		if  (nl_route < 0)
-			goto flush_all;
-
-		if (mlx5_nl_switch_info(nl_route, if_index, &info)) {
-			if (mlx5_sysfs_switch_info(if_index, &info))
-				goto flush_all;
-		}
-
-		if (info.name_type == MLX5_PHYS_PORT_NAME_TYPE_PFSF ||
-		    info.name_type == MLX5_PHYS_PORT_NAME_TYPE_PFVF)
-			goto flush_all;
-		close(nl_route);
-	} else if (msg_type == RTM_DELLINK && i <= dev_info->port_num) {
-		memset(dev_info->port_info + i, 0, sizeof(struct mlx5_port_nl_info));
-	}
-
-	return;
-flush_all:
-	if (nl_route >= 0)
-		close(nl_route);
-	for (i = 1; i <= dev_info->port_num; i++) {
-		if (!dev_info->port_info[i].ifindex)
-			dev_info->port_info[i].valid = 0;
-	}
-}
-
 static void
 mlx5_dev_interrupt_nl_cb(struct nlmsghdr *hdr, void *cb_arg)
 {
@@ -766,8 +713,6 @@ mlx5_dev_interrupt_nl_cb(struct nlmsghdr *hdr, void *cb_arg)
 
 	if (mlx5_nl_parse_link_status_update(hdr, &if_index) < 0)
 		return;
-	if (sh->cdev->config.probe_opt && sh->cdev->dev_info.port_num > 1 && !sh->rdma_monitor_supp)
-		mlx5_handle_port_info_update(&sh->cdev->dev_info, if_index, hdr->nlmsg_type);
 
 	for (i = 0; i < sh->max_port; i++) {
 		struct mlx5_dev_shared_port *port = &sh->port[i];
@@ -970,10 +915,18 @@ mlx5_dev_interrupt_handler_ib(void *arg)
 		return;
 
 	if (data.event_type == MLX5_NL_RDMA_NETDEV_ATTACH_EVENT &&
-	    !(data.flags & MLX5_NL_CMD_GET_NET_INDEX))
+	    !(data.flags & MLX5_NL_CMD_GET_NET_INDEX)) {
+		DRV_LOG(WARNING, "Incomplete RDMA ATTACH event for ibdev[%d]",
+			dev_info->ibindex);
+		if (data.flags & MLX5_NL_CMD_GET_PORT_INDEX)
+			memset(dev_info->port_info + data.portnum, 0,
+			       sizeof(struct mlx5_port_nl_info));
+		else
+			goto flush_all;
 		return;
+	}
 
-	DRV_LOG(DEBUG, "Event info: type %d, ibindex %d, ifindex %d, portnum %d,",
+	DRV_LOG(INFO, "Event info: type %d, ibindex %d, ifindex %d, portnum %d,",
 		data.event_type, data.ibindex, data.ifindex, data.portnum);
 
 	/* Changes found in number of SF/VF ports. All information is likely unreliable. */
@@ -992,7 +945,7 @@ mlx5_dev_interrupt_handler_ib(void *arg)
 				goto flush_all;
 		}
 	} else if (data.event_type == MLX5_NL_RDMA_NETDEV_DETACH_EVENT) {
-		memset(dev_info->port_info + data.portnum, 0, sizeof(struct mlx5_port_nl_info));
+		dev_info->port_info[data.portnum].ifindex = 0;
 	}
 	return;
 
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 85b3fabaf5..edfe61ea55 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -3051,7 +3051,7 @@ mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh)
 		DRV_LOG(ERR, "Failed to allocate intr_handle.");
 		return;
 	}
-	if (sh->cdev->config.probe_opt &&
+	if (sh->cdev->dev_info.probe_opt &&
 	    sh->cdev->dev_info.port_num > 1 &&
 	    !sh->rdma_monitor_supp) {
 		nlsk_fd = mlx5_nl_rdma_monitor_init();
@@ -3076,8 +3076,15 @@ mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh)
 				close(nlsk_fd);
 				return;
 			}
+			sh->cdev->dev_info.async_mon_ready = 1;
 		} else {
 			close(nlsk_fd);
+			if (sh->cdev->dev_info.probe_opt) {
+				DRV_LOG(INFO, "Failed to create rdma link monitor, disable probe optimization");
+				sh->cdev->dev_info.probe_opt = 0;
+				mlx5_free(sh->cdev->dev_info.port_info);
+				sh->cdev->dev_info.port_info = NULL;
+			}
 		}
 	}
 	nlsk_fd = mlx5_nl_init(NETLINK_ROUTE, RTMGRP_LINK);
-- 
2.27.0


      reply	other threads:[~2025-08-29  5:37 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-28  3:21 [PATCH v1] " Rongwei Liu
2025-08-28  4:40 ` Stephen Hemminger
2025-08-28  4:49   ` rongwei liu
2025-08-28 13:37     ` Thomas Monjalon
2025-08-29  5:35       ` Rongwei Liu [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250829053532.445865-1-rongweil@nvidia.com \
    --to=rongweil@nvidia.com \
    --cc=bingz@nvidia.com \
    --cc=dev@dpdk.org \
    --cc=dsosnowski@nvidia.com \
    --cc=matan@nvidia.com \
    --cc=orika@nvidia.com \
    --cc=stable@dpdk.org \
    --cc=suanmingm@nvidia.com \
    --cc=thomas@monjalon.net \
    --cc=viacheslavo@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).