patches for DPDK stable branches
 help / color / mirror / Atom feed
* [PATCH v1] net/mlx5: fix probe optimization race condition
@ 2025-08-28  3:21 Rongwei Liu
  2025-08-28  4:40 ` Stephen Hemminger
  0 siblings, 1 reply; 4+ messages in thread
From: Rongwei Liu @ 2025-08-28  3:21 UTC (permalink / raw)
  To: dev, matan, viacheslavo, orika, suanmingm, thomas
  Cc: rongweil, stable, Dariusz Sosnowski, Bing Zhao

With dedicated RDMA link monitor, there are two threads
which can update the IB device port information.

Add a new flag to avoid the race condition. Update should
go through RDMA link monitor once ready.

Fixes: 51fb5c40c826 ("net/mlx5: optimize device probing")
Cc: rongweil@nvidia.com
Cc: stable@dpdk.org
Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/common/mlx5/linux/mlx5_nl.c     |  7 ++-
 drivers/common/mlx5/mlx5_common.h       |  1 +
 drivers/net/mlx5/linux/mlx5_ethdev_os.c | 69 ++++---------------------
 drivers/net/mlx5/linux/mlx5_os.c        |  9 +++-
 4 files changed, 25 insertions(+), 61 deletions(-)

diff --git a/drivers/common/mlx5/linux/mlx5_nl.c b/drivers/common/mlx5/linux/mlx5_nl.c
index dd69e229e3..84c12efdc7 100644
--- a/drivers/common/mlx5/linux/mlx5_nl.c
+++ b/drivers/common/mlx5/linux/mlx5_nl.c
@@ -1171,8 +1171,12 @@ mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex, struct mlx5_dev_info
 			data.ibindex = dev_info->ibindex;
 	}
 
+	/* Update should be done via monitor thread to avoid race condition */
+	if (dev_info->async_mon_ready) {
+		rte_errno = ENODEV;
+		return 0;
+	}
 	ret = mlx5_nl_port_info(nl, pindex, &data);
-
 	if (dev_info->probe_opt && !strcmp(dev_info->ibname, name)) {
 		if ((!ret || ret == -ENODEV) && dev_info->port_info &&
 		    pindex <= dev_info->port_num) {
@@ -1182,7 +1186,6 @@ mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex, struct mlx5_dev_info
 			dev_info->port_info[pindex].valid = 1;
 		}
 	}
-
 	return ret ? 0 : data.ifindex;
 }
 
diff --git a/drivers/common/mlx5/mlx5_common.h b/drivers/common/mlx5/mlx5_common.h
index bea1382911..b49f0c850e 100644
--- a/drivers/common/mlx5/mlx5_common.h
+++ b/drivers/common/mlx5/mlx5_common.h
@@ -185,6 +185,7 @@ struct mlx5_dev_info {
 	uint32_t ibindex;
 	char ibname[MLX5_FS_NAME_MAX];
 	uint8_t probe_opt;
+	uint8_t async_mon_ready;
 	struct mlx5_port_nl_info *port_info;
 };
 
diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
index a371c2c747..180fd60f3a 100644
--- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c
+++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
@@ -704,59 +704,6 @@ mlx5_link_update_bond(struct rte_eth_dev *dev)
 		((ifr.ifr_flags & IFF_UP) && (ifr.ifr_flags & IFF_RUNNING));
 }
 
-static void
-mlx5_handle_port_info_update(struct mlx5_dev_info *dev_info, uint32_t if_index,
-			     uint16_t msg_type)
-{
-	struct mlx5_switch_info info = {
-		.master = 0,
-		.representor = 0,
-		.name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET,
-		.port_name = 0,
-		.switch_id = 0,
-	};
-	uint32_t i;
-	int nl_route;
-
-	if (dev_info->port_num <= 1 || dev_info->port_info == NULL)
-		return;
-
-	DRV_LOG(DEBUG, "IB device %s ifindex %u received netlink event %u",
-			dev_info->ibname, if_index, msg_type);
-	for (i = 1; i <= dev_info->port_num; i++) {
-		if (!dev_info->port_info[i].valid)
-			continue;
-		if (dev_info->port_info[i].ifindex == if_index)
-			break;
-	}
-	if (msg_type == RTM_NEWLINK && i > dev_info->port_num) {
-		nl_route = mlx5_nl_init(NETLINK_ROUTE, 0);
-		if  (nl_route < 0)
-			goto flush_all;
-
-		if (mlx5_nl_switch_info(nl_route, if_index, &info)) {
-			if (mlx5_sysfs_switch_info(if_index, &info))
-				goto flush_all;
-		}
-
-		if (info.name_type == MLX5_PHYS_PORT_NAME_TYPE_PFSF ||
-		    info.name_type == MLX5_PHYS_PORT_NAME_TYPE_PFVF)
-			goto flush_all;
-		close(nl_route);
-	} else if (msg_type == RTM_DELLINK && i <= dev_info->port_num) {
-		memset(dev_info->port_info + i, 0, sizeof(struct mlx5_port_nl_info));
-	}
-
-	return;
-flush_all:
-	if (nl_route >= 0)
-		close(nl_route);
-	for (i = 1; i <= dev_info->port_num; i++) {
-		if (!dev_info->port_info[i].ifindex)
-			dev_info->port_info[i].valid = 0;
-	}
-}
-
 static void
 mlx5_dev_interrupt_nl_cb(struct nlmsghdr *hdr, void *cb_arg)
 {
@@ -766,8 +713,6 @@ mlx5_dev_interrupt_nl_cb(struct nlmsghdr *hdr, void *cb_arg)
 
 	if (mlx5_nl_parse_link_status_update(hdr, &if_index) < 0)
 		return;
-	if (sh->cdev->config.probe_opt && sh->cdev->dev_info.port_num > 1 && !sh->rdma_monitor_supp)
-		mlx5_handle_port_info_update(&sh->cdev->dev_info, if_index, hdr->nlmsg_type);
 
 	for (i = 0; i < sh->max_port; i++) {
 		struct mlx5_dev_shared_port *port = &sh->port[i];
@@ -970,10 +915,18 @@ mlx5_dev_interrupt_handler_ib(void *arg)
 		return;
 
 	if (data.event_type == MLX5_NL_RDMA_NETDEV_ATTACH_EVENT &&
-	    !(data.flags & MLX5_NL_CMD_GET_NET_INDEX))
+	    !(data.flags & MLX5_NL_CMD_GET_NET_INDEX)) {
+		DRV_LOG(WARNING, "Incomplete RDMA ATTACH event for ibdev[%d]",
+			dev_info->ibindex);
+		if (data.flags & MLX5_NL_CMD_GET_PORT_INDEX)
+			memset(dev_info->port_info + data.portnum, 0,
+			       sizeof(struct mlx5_port_nl_info));
+		else
+			goto flush_all;
 		return;
+	}
 
-	DRV_LOG(DEBUG, "Event info: type %d, ibindex %d, ifindex %d, portnum %d,",
+	DRV_LOG(INFO, "Event info: type %d, ibindex %d, ifindex %d, portnum %d,",
 		data.event_type, data.ibindex, data.ifindex, data.portnum);
 
 	/* Changes found in number of SF/VF ports. All information is likely unreliable. */
@@ -992,7 +945,7 @@ mlx5_dev_interrupt_handler_ib(void *arg)
 				goto flush_all;
 		}
 	} else if (data.event_type == MLX5_NL_RDMA_NETDEV_DETACH_EVENT) {
-		memset(dev_info->port_info + data.portnum, 0, sizeof(struct mlx5_port_nl_info));
+		dev_info->port_info[data.portnum].ifindex = 0;
 	}
 	return;
 
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 85b3fabaf5..edfe61ea55 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -3051,7 +3051,7 @@ mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh)
 		DRV_LOG(ERR, "Failed to allocate intr_handle.");
 		return;
 	}
-	if (sh->cdev->config.probe_opt &&
+	if (sh->cdev->dev_info.probe_opt &&
 	    sh->cdev->dev_info.port_num > 1 &&
 	    !sh->rdma_monitor_supp) {
 		nlsk_fd = mlx5_nl_rdma_monitor_init();
@@ -3076,8 +3076,15 @@ mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh)
 				close(nlsk_fd);
 				return;
 			}
+			sh->cdev->dev_info.async_mon_ready = 1;
 		} else {
 			close(nlsk_fd);
+			if (sh->cdev->dev_info.probe_opt) {
+				DRV_LOG(INFO, "Failed to create rdma link monitor, disable probe optimization");
+				sh->cdev->dev_info.probe_opt = 0;
+				mlx5_free(sh->cdev->dev_info.port_info);
+				sh->cdev->dev_info.port_info = NULL;
+			}
 		}
 	}
 	nlsk_fd = mlx5_nl_init(NETLINK_ROUTE, RTMGRP_LINK);
-- 
2.27.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v1] net/mlx5: fix probe optimization race condition
  2025-08-28  3:21 [PATCH v1] net/mlx5: fix probe optimization race condition Rongwei Liu
@ 2025-08-28  4:40 ` Stephen Hemminger
  2025-08-28  4:49   ` rongwei liu
  0 siblings, 1 reply; 4+ messages in thread
From: Stephen Hemminger @ 2025-08-28  4:40 UTC (permalink / raw)
  To: Rongwei Liu
  Cc: dev, matan, viacheslavo, orika, suanmingm, thomas, stable,
	Dariusz Sosnowski, Bing Zhao

On Thu, 28 Aug 2025 06:21:34 +0300
Rongwei Liu <rongweil@nvidia.com> wrote:

> With dedicated RDMA link monitor, there are two threads
> which can update the IB device port information.
> 
> Add a new flag to avoid the race condition. Update should
> go through RDMA link monitor once ready.
> 
> Fixes: 51fb5c40c826 ("net/mlx5: optimize device probing")
> Cc: rongweil@nvidia.com
> Cc: stable@dpdk.org
> Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
> Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
> ---

If variable is modified (with out locking) on two threads it
needs to atomic or volatile.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v1] net/mlx5: fix probe optimization race condition
  2025-08-28  4:40 ` Stephen Hemminger
@ 2025-08-28  4:49   ` rongwei liu
  2025-08-28 13:37     ` Thomas Monjalon
  0 siblings, 1 reply; 4+ messages in thread
From: rongwei liu @ 2025-08-28  4:49 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, matan, viacheslavo, orika, suanmingm, thomas, stable,
	Dariusz Sosnowski, Bing Zhao



On 2025/8/28 12:40, Stephen Hemminger wrote:
> On Thu, 28 Aug 2025 06:21:34 +0300
> Rongwei Liu <rongweil@nvidia.com> wrote:
> 
>> With dedicated RDMA link monitor, there are two threads
>> which can update the IB device port information.
>>
>> Add a new flag to avoid the race condition. Update should
>> go through RDMA link monitor once ready.
>>
>> Fixes: 51fb5c40c826 ("net/mlx5: optimize device probing")
>> Cc: rongweil@nvidia.com
>> Cc: stable@dpdk.org
>> Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
>> Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
>> ---
> 
> If variable is modified (with out locking) on two threads it
> needs to atomic or volatile.
Exactly. Before this patch, it' user responsebility to seperate probe probing and sf manipualtion.
Obviously, customer didn't follow this very well.
Now logic change to:
1. Update all port information in probing thread.
2. Probe thread initiate the dedicated rdma monitor thread. Once ready, all port update will go to this thread.
3. Next port probing won't trigger PMD port information update.

No lock is required then.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v1] net/mlx5: fix probe optimization race condition
  2025-08-28  4:49   ` rongwei liu
@ 2025-08-28 13:37     ` Thomas Monjalon
  0 siblings, 0 replies; 4+ messages in thread
From: Thomas Monjalon @ 2025-08-28 13:37 UTC (permalink / raw)
  To: rongwei liu
  Cc: Stephen Hemminger, dev, matan, viacheslavo, orika, suanmingm,
	stable, Dariusz Sosnowski, Bing Zhao

28/08/2025 06:49, rongwei liu:
> 
> On 2025/8/28 12:40, Stephen Hemminger wrote:
> > On Thu, 28 Aug 2025 06:21:34 +0300
> > Rongwei Liu <rongweil@nvidia.com> wrote:
> > 
> >> With dedicated RDMA link monitor, there are two threads
> >> which can update the IB device port information.
> >>
> >> Add a new flag to avoid the race condition. Update should
> >> go through RDMA link monitor once ready.
> >>
> >> Fixes: 51fb5c40c826 ("net/mlx5: optimize device probing")
> >> Cc: rongweil@nvidia.com
> >> Cc: stable@dpdk.org
> >> Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
> >> Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
> >> ---
> > 
> > If variable is modified (with out locking) on two threads it
> > needs to atomic or volatile.
> Exactly. Before this patch, it' user responsebility to seperate probe probing and sf manipualtion.
> Obviously, customer didn't follow this very well.
> Now logic change to:
> 1. Update all port information in probing thread.
> 2. Probe thread initiate the dedicated rdma monitor thread. Once ready, all port update will go to this thread.
> 3. Next port probing won't trigger PMD port information update.
> 
> No lock is required then.

This is the kind of info we need in a commit log.
Please make a v2, thanks.



^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2025-08-28 13:38 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-08-28  3:21 [PATCH v1] net/mlx5: fix probe optimization race condition Rongwei Liu
2025-08-28  4:40 ` Stephen Hemminger
2025-08-28  4:49   ` rongwei liu
2025-08-28 13:37     ` Thomas Monjalon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).