DPDK patches and discussions
 help / color / mirror / Atom feed
From: "Minggang Li(Gavin)" <gavinl@nvidia.com>
To: <matan@nvidia.com>, <viacheslavo@nvidia.com>, <orika@nvidia.com>,
	<thomas@monjalon.net>, Dariusz Sosnowski <dsosnowski@nvidia.com>,
	Bing Zhao <bingz@nvidia.com>, Suanming Mou <suanmingm@nvidia.com>
Cc: <dev@dpdk.org>, <rasland@nvidia.com>
Subject: [PATCH 7/7] mlx5: add backward compatibility for RDMA monitor
Date: Mon, 23 Dec 2024 12:11:01 +0200	[thread overview]
Message-ID: <20241223101101.677449-8-gavinl@nvidia.com> (raw)
In-Reply-To: <20241223101101.677449-1-gavinl@nvidia.com>

Fallback to the old way to update port information if the kernel driver
does not support RDMA monitor.

Signed-off-by: Minggang Li(Gavin) <gavinl@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 doc/guides/rel_notes/release_24_11.rst  | 14 +++++
 drivers/common/mlx5/linux/mlx5_nl.c     | 73 +++++++++++++++++++++++++
 drivers/common/mlx5/version.map         |  1 +
 drivers/net/mlx5/linux/mlx5_ethdev_os.c |  2 +-
 drivers/net/mlx5/linux/mlx5_os.c        | 27 +++++++--
 drivers/net/mlx5/mlx5.h                 |  1 +
 6 files changed, 111 insertions(+), 7 deletions(-)

diff --git a/doc/guides/rel_notes/release_24_11.rst b/doc/guides/rel_notes/release_24_11.rst
index 8486cd986f..567ac42663 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -288,6 +288,20 @@ New Features
   Added ability for a node to advertise and update multiple xstat counters,
   that can be retrieved using ``rte_graph_cluster_stats_get``.
 
+* **Updated NVIDIA mlx5 driver.**
+
+  Optimized port probe in large scale.
+  This feature enhances the efficiency of probing VF/SFs on a large scale
+  by significantly reducing the probing time. To activate this feature,
+  set ``probe_opt_en`` to a non-zero value during device probing. It
+  leverages a capability from the RDMA driver, expected to be released in
+  the upcoming kernel version 6.12 or its equivalent in OFED 24.10,
+  specifically the RDMA monitor. For additional details on the limitations
+  of devargs, refer to "doc/guides/nics/mlx5.rst".
+
+  If there are lots of VFs/SFs to be probed by the application, eg, 300
+  VFs/SFs, the option should be enabled to save probing time.
+
 
 Removed Items
 -------------
diff --git a/drivers/common/mlx5/linux/mlx5_nl.c b/drivers/common/mlx5/linux/mlx5_nl.c
index ce1c2a8e75..12f1a620f3 100644
--- a/drivers/common/mlx5/linux/mlx5_nl.c
+++ b/drivers/common/mlx5/linux/mlx5_nl.c
@@ -2152,3 +2152,76 @@ mlx5_nl_rdma_monitor_info_get(struct nlmsghdr *hdr, struct mlx5_nl_port_info *da
 error:
 	rte_errno = EINVAL;
 }
+
+static int
+mlx5_nl_rdma_monitor_cap_get_cb(struct nlmsghdr *hdr, void *arg)
+{
+	size_t off = NLMSG_HDRLEN;
+	uint8_t *cap = arg;
+
+	if (hdr->nlmsg_type != RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_SYS_GET))
+		goto error;
+
+	*cap = 0;
+	while (off < hdr->nlmsg_len) {
+		struct nlattr *na = (void *)((uintptr_t)hdr + off);
+		void *payload = (void *)((uintptr_t)na + NLA_HDRLEN);
+
+		if (na->nla_len > hdr->nlmsg_len - off)
+			goto error;
+		switch (na->nla_type) {
+		case RDMA_NLDEV_SYS_ATTR_MONITOR_MODE:
+			*cap = *(uint8_t *)payload;
+			return 0;
+		default:
+			break;
+		}
+		off += NLA_ALIGN(na->nla_len);
+	}
+
+	return 0;
+
+error:
+	return -EINVAL;
+}
+
+/**
+ * Get RDMA monitor support in driver.
+ *
+ *
+ * @param nl
+ *   Netlink socket of the RDMA kind (NETLINK_RDMA).
+ * @param[out] cap
+ *   Pointer to port info.
+ * @return
+ *   0 on success, negative on error and rte_errno is set.
+ */
+int
+mlx5_nl_rdma_monitor_cap_get(int nl, uint8_t *cap)
+{
+	union {
+		struct nlmsghdr nh;
+		uint8_t buf[NLMSG_HDRLEN];
+	} req = {
+		.nh = {
+			.nlmsg_len = NLMSG_LENGTH(0),
+			.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+						       RDMA_NLDEV_CMD_SYS_GET),
+			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+		},
+	};
+	uint32_t sn = MLX5_NL_SN_GENERATE;
+	int ret;
+
+	ret = mlx5_nl_send(nl, &req.nh, sn);
+	if (ret < 0) {
+		rte_errno = -ret;
+		return ret;
+	}
+	ret = mlx5_nl_recv(nl, sn, mlx5_nl_rdma_monitor_cap_get_cb, cap);
+	if (ret < 0) {
+		rte_errno = -ret;
+		return ret;
+	}
+	return 0;
+}
diff --git a/drivers/common/mlx5/version.map b/drivers/common/mlx5/version.map
index 5230576006..8301485839 100644
--- a/drivers/common/mlx5/version.map
+++ b/drivers/common/mlx5/version.map
@@ -148,6 +148,7 @@ INTERNAL {
 	mlx5_nl_vlan_vmwa_delete; # WINDOWS_NO_EXPORT
 	mlx5_nl_rdma_monitor_init; # WINDOWS_NO_EXPORT
 	mlx5_nl_rdma_monitor_info_get; # WINDOWS_NO_EXPORT
+	mlx5_nl_rdma_monitor_cap_get; # WINDOWS_NO_EXPORT
 
 	mlx5_os_umem_dereg;
 	mlx5_os_umem_reg;
diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
index 5156d96b3a..6b2c25a7c2 100644
--- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c
+++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
@@ -736,7 +736,7 @@ mlx5_dev_interrupt_nl_cb(struct nlmsghdr *hdr, void *cb_arg)
 
 	if (mlx5_nl_parse_link_status_update(hdr, &if_index) < 0)
 		return;
-	if (sh->cdev->config.probe_opt && sh->cdev->dev_info.port_num > 1)
+	if (sh->cdev->config.probe_opt && sh->cdev->dev_info.port_num > 1 && !sh->rdma_monitor_supp)
 		mlx5_handle_port_info_update(&sh->cdev->dev_info, if_index, hdr->nlmsg_type);
 
 	for (i = 0; i < sh->max_port; i++) {
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 16b275c71e..d3fd77af58 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -3017,6 +3017,7 @@ mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh)
 {
 	struct ibv_context *ctx = sh->cdev->ctx;
 	int nlsk_fd;
+	uint8_t rdma_monitor_supp = 0;
 
 	sh->intr_handle = mlx5_os_interrupt_handler_create
 		(RTE_INTR_INSTANCE_F_SHARED, true,
@@ -3025,20 +3026,34 @@ mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh)
 		DRV_LOG(ERR, "Failed to allocate intr_handle.");
 		return;
 	}
-	if (sh->cdev->config.probe_opt && sh->cdev->dev_info.port_num > 1) {
+	if (sh->cdev->config.probe_opt &&
+	    sh->cdev->dev_info.port_num > 1 &&
+	    !sh->rdma_monitor_supp) {
 		nlsk_fd = mlx5_nl_rdma_monitor_init();
 		if (nlsk_fd < 0) {
 			DRV_LOG(ERR, "Failed to create a socket for RDMA Netlink events: %s",
 				rte_strerror(rte_errno));
 			return;
 		}
-		sh->intr_handle_ib = mlx5_os_interrupt_handler_create
-			(RTE_INTR_INSTANCE_F_SHARED, true,
-			 nlsk_fd, mlx5_dev_interrupt_handler_ib, sh);
-		if (sh->intr_handle_ib == NULL) {
-			DRV_LOG(ERR, "Fail to allocate intr_handle");
+		if (mlx5_nl_rdma_monitor_cap_get(nlsk_fd, &rdma_monitor_supp)) {
+			DRV_LOG(ERR, "Failed to query RDMA monitor support: %s",
+				rte_strerror(rte_errno));
+			close(nlsk_fd);
 			return;
 		}
+		sh->rdma_monitor_supp = rdma_monitor_supp;
+		if (sh->rdma_monitor_supp) {
+			sh->intr_handle_ib = mlx5_os_interrupt_handler_create
+				(RTE_INTR_INSTANCE_F_SHARED, true,
+				 nlsk_fd, mlx5_dev_interrupt_handler_ib, sh);
+			if (sh->intr_handle_ib == NULL) {
+				DRV_LOG(ERR, "Fail to allocate intr_handle");
+				close(nlsk_fd);
+				return;
+			}
+		} else {
+			close(nlsk_fd);
+		}
 	}
 	nlsk_fd = mlx5_nl_init(NETLINK_ROUTE, RTMGRP_LINK);
 	if (nlsk_fd < 0) {
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 688a7270ca..ab604042b9 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1545,6 +1545,7 @@ struct mlx5_dev_ctx_shared {
 	uint32_t lag_rx_port_affinity_en:1;
 	/* lag_rx_port_affinity is supported. */
 	uint32_t hws_max_log_bulk_sz:5;
+	uint32_t rdma_monitor_supp:1;
 	/* Log of minimal HWS counters created hard coded. */
 	uint32_t hws_max_nb_counters; /* Maximal number for HWS counters. */
 	uint32_t max_port; /* Maximal IB device port index. */
-- 
2.34.1


      parent reply	other threads:[~2024-12-23 10:12 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-23 10:10 [PATCH V1 0/7] port probe time optimization Minggang Li(Gavin)
2024-12-23 10:10 ` [PATCH 1/7] mailmap: update user name Minggang Li(Gavin)
2024-12-23 10:10 ` [PATCH 2/7] net/mlx5: optimize device probing Minggang Li(Gavin)
2024-12-23 10:10 ` [PATCH 3/7] net/mlx5: add new devargs to control probe optimization Minggang Li(Gavin)
2024-12-23 10:10 ` [PATCH 4/7] common/mlx5: fix Netlink socket leak Minggang Li(Gavin)
2024-12-23 10:10 ` [PATCH 5/7] common/mlx5: add RDMA monitor event awareness Minggang Li(Gavin)
2024-12-23 10:11 ` [PATCH 6/7] mlx5: use RDMA Netlink to update port information Minggang Li(Gavin)
2024-12-23 10:11 ` Minggang Li(Gavin) [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241223101101.677449-8-gavinl@nvidia.com \
    --to=gavinl@nvidia.com \
    --cc=bingz@nvidia.com \
    --cc=dev@dpdk.org \
    --cc=dsosnowski@nvidia.com \
    --cc=matan@nvidia.com \
    --cc=orika@nvidia.com \
    --cc=rasland@nvidia.com \
    --cc=suanmingm@nvidia.com \
    --cc=thomas@monjalon.net \
    --cc=viacheslavo@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).