From: Rongwei Liu <rongweil@nvidia.com>
To: <dev@dpdk.org>, <matan@nvidia.com>, <viacheslavo@nvidia.com>,
<orika@nvidia.com>, <suanmingm@nvidia.com>, <thomas@monjalon.net>
Cc: <rongweil@nvidia.com>, <stable@dpdk.org>,
Dariusz Sosnowski <dsosnowski@nvidia.com>,
Bing Zhao <bingz@nvidia.com>
Subject: [PATCH v1] net/mlx5: fix probe optimization race condition
Date: Thu, 28 Aug 2025 06:21:34 +0300 [thread overview]
Message-ID: <20250828032134.167999-1-rongweil@nvidia.com> (raw)
With dedicated RDMA link monitor, there are two threads
which can update the IB device port information.
Add a new flag to avoid the race condition. Update should
go through RDMA link monitor once ready.
Fixes: 51fb5c40c826 ("net/mlx5: optimize device probing")
Cc: rongweil@nvidia.com
Cc: stable@dpdk.org
Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
drivers/common/mlx5/linux/mlx5_nl.c | 7 ++-
drivers/common/mlx5/mlx5_common.h | 1 +
drivers/net/mlx5/linux/mlx5_ethdev_os.c | 69 ++++---------------------
drivers/net/mlx5/linux/mlx5_os.c | 9 +++-
4 files changed, 25 insertions(+), 61 deletions(-)
diff --git a/drivers/common/mlx5/linux/mlx5_nl.c b/drivers/common/mlx5/linux/mlx5_nl.c
index dd69e229e3..84c12efdc7 100644
--- a/drivers/common/mlx5/linux/mlx5_nl.c
+++ b/drivers/common/mlx5/linux/mlx5_nl.c
@@ -1171,8 +1171,12 @@ mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex, struct mlx5_dev_info
data.ibindex = dev_info->ibindex;
}
+ /* Update should be done via monitor thread to avoid race condition */
+ if (dev_info->async_mon_ready) {
+ rte_errno = ENODEV;
+ return 0;
+ }
ret = mlx5_nl_port_info(nl, pindex, &data);
-
if (dev_info->probe_opt && !strcmp(dev_info->ibname, name)) {
if ((!ret || ret == -ENODEV) && dev_info->port_info &&
pindex <= dev_info->port_num) {
@@ -1182,7 +1186,6 @@ mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex, struct mlx5_dev_info
dev_info->port_info[pindex].valid = 1;
}
}
-
return ret ? 0 : data.ifindex;
}
diff --git a/drivers/common/mlx5/mlx5_common.h b/drivers/common/mlx5/mlx5_common.h
index bea1382911..b49f0c850e 100644
--- a/drivers/common/mlx5/mlx5_common.h
+++ b/drivers/common/mlx5/mlx5_common.h
@@ -185,6 +185,7 @@ struct mlx5_dev_info {
uint32_t ibindex;
char ibname[MLX5_FS_NAME_MAX];
uint8_t probe_opt;
+ uint8_t async_mon_ready;
struct mlx5_port_nl_info *port_info;
};
diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
index a371c2c747..180fd60f3a 100644
--- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c
+++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
@@ -704,59 +704,6 @@ mlx5_link_update_bond(struct rte_eth_dev *dev)
((ifr.ifr_flags & IFF_UP) && (ifr.ifr_flags & IFF_RUNNING));
}
-static void
-mlx5_handle_port_info_update(struct mlx5_dev_info *dev_info, uint32_t if_index,
- uint16_t msg_type)
-{
- struct mlx5_switch_info info = {
- .master = 0,
- .representor = 0,
- .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET,
- .port_name = 0,
- .switch_id = 0,
- };
- uint32_t i;
- int nl_route;
-
- if (dev_info->port_num <= 1 || dev_info->port_info == NULL)
- return;
-
- DRV_LOG(DEBUG, "IB device %s ifindex %u received netlink event %u",
- dev_info->ibname, if_index, msg_type);
- for (i = 1; i <= dev_info->port_num; i++) {
- if (!dev_info->port_info[i].valid)
- continue;
- if (dev_info->port_info[i].ifindex == if_index)
- break;
- }
- if (msg_type == RTM_NEWLINK && i > dev_info->port_num) {
- nl_route = mlx5_nl_init(NETLINK_ROUTE, 0);
- if (nl_route < 0)
- goto flush_all;
-
- if (mlx5_nl_switch_info(nl_route, if_index, &info)) {
- if (mlx5_sysfs_switch_info(if_index, &info))
- goto flush_all;
- }
-
- if (info.name_type == MLX5_PHYS_PORT_NAME_TYPE_PFSF ||
- info.name_type == MLX5_PHYS_PORT_NAME_TYPE_PFVF)
- goto flush_all;
- close(nl_route);
- } else if (msg_type == RTM_DELLINK && i <= dev_info->port_num) {
- memset(dev_info->port_info + i, 0, sizeof(struct mlx5_port_nl_info));
- }
-
- return;
-flush_all:
- if (nl_route >= 0)
- close(nl_route);
- for (i = 1; i <= dev_info->port_num; i++) {
- if (!dev_info->port_info[i].ifindex)
- dev_info->port_info[i].valid = 0;
- }
-}
-
static void
mlx5_dev_interrupt_nl_cb(struct nlmsghdr *hdr, void *cb_arg)
{
@@ -766,8 +713,6 @@ mlx5_dev_interrupt_nl_cb(struct nlmsghdr *hdr, void *cb_arg)
if (mlx5_nl_parse_link_status_update(hdr, &if_index) < 0)
return;
- if (sh->cdev->config.probe_opt && sh->cdev->dev_info.port_num > 1 && !sh->rdma_monitor_supp)
- mlx5_handle_port_info_update(&sh->cdev->dev_info, if_index, hdr->nlmsg_type);
for (i = 0; i < sh->max_port; i++) {
struct mlx5_dev_shared_port *port = &sh->port[i];
@@ -970,10 +915,18 @@ mlx5_dev_interrupt_handler_ib(void *arg)
return;
if (data.event_type == MLX5_NL_RDMA_NETDEV_ATTACH_EVENT &&
- !(data.flags & MLX5_NL_CMD_GET_NET_INDEX))
+ !(data.flags & MLX5_NL_CMD_GET_NET_INDEX)) {
+ DRV_LOG(WARNING, "Incomplete RDMA ATTACH event for ibdev[%d]",
+ dev_info->ibindex);
+ if (data.flags & MLX5_NL_CMD_GET_PORT_INDEX)
+ memset(dev_info->port_info + data.portnum, 0,
+ sizeof(struct mlx5_port_nl_info));
+ else
+ goto flush_all;
return;
+ }
- DRV_LOG(DEBUG, "Event info: type %d, ibindex %d, ifindex %d, portnum %d,",
+ DRV_LOG(INFO, "Event info: type %d, ibindex %d, ifindex %d, portnum %d,",
data.event_type, data.ibindex, data.ifindex, data.portnum);
/* Changes found in number of SF/VF ports. All information is likely unreliable. */
@@ -992,7 +945,7 @@ mlx5_dev_interrupt_handler_ib(void *arg)
goto flush_all;
}
} else if (data.event_type == MLX5_NL_RDMA_NETDEV_DETACH_EVENT) {
- memset(dev_info->port_info + data.portnum, 0, sizeof(struct mlx5_port_nl_info));
+ dev_info->port_info[data.portnum].ifindex = 0;
}
return;
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 85b3fabaf5..edfe61ea55 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -3051,7 +3051,7 @@ mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh)
DRV_LOG(ERR, "Failed to allocate intr_handle.");
return;
}
- if (sh->cdev->config.probe_opt &&
+ if (sh->cdev->dev_info.probe_opt &&
sh->cdev->dev_info.port_num > 1 &&
!sh->rdma_monitor_supp) {
nlsk_fd = mlx5_nl_rdma_monitor_init();
@@ -3076,8 +3076,15 @@ mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh)
close(nlsk_fd);
return;
}
+ sh->cdev->dev_info.async_mon_ready = 1;
} else {
close(nlsk_fd);
+ if (sh->cdev->dev_info.probe_opt) {
+ DRV_LOG(INFO, "Failed to create rdma link monitor, disable probe optimization");
+ sh->cdev->dev_info.probe_opt = 0;
+ mlx5_free(sh->cdev->dev_info.port_info);
+ sh->cdev->dev_info.port_info = NULL;
+ }
}
}
nlsk_fd = mlx5_nl_init(NETLINK_ROUTE, RTMGRP_LINK);
--
2.27.0
next reply other threads:[~2025-08-28 3:23 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-28 3:21 Rongwei Liu [this message]
2025-08-28 4:40 ` Stephen Hemminger
2025-08-28 4:49 ` rongwei liu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250828032134.167999-1-rongweil@nvidia.com \
--to=rongweil@nvidia.com \
--cc=bingz@nvidia.com \
--cc=dev@dpdk.org \
--cc=dsosnowski@nvidia.com \
--cc=matan@nvidia.com \
--cc=orika@nvidia.com \
--cc=stable@dpdk.org \
--cc=suanmingm@nvidia.com \
--cc=thomas@monjalon.net \
--cc=viacheslavo@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).