DPDK patches and discussions
 help / color / mirror / Atom feed
From: Rongwei Liu <rongweil@nvidia.com>
To: <matan@nvidia.com>, <viacheslavo@nvidia.com>, <orika@nvidia.com>,
	<thomas@monjalon.net>, Michael Baum <michaelba@nvidia.com>
Cc: <dev@dpdk.org>, <rasland@nvidia.com>, <stable@dpdk.org>
Subject: [PATCH v1] net/mlx5: fix probe failure with secondary bonding member
Date: Wed, 6 Apr 2022 10:12:24 +0300	[thread overview]
Message-ID: <20220406071224.870035-1-rongweil@nvidia.com> (raw)

Users can probe primary or secondary PCIe id when bonding is
configured.
1. -a 0a:00.0,representor=pf[0-1]vf[0-1], PMD probes 5 ports
totally: bonding device plus 4 representor ports.
2. -a 0a:00.1,representor=pf[0-1]vf[0-1], PMD only probes 2
representor ports.

Under the 2nd condition, bonding IB device doesn't have the same
PCIe id and PMD needs to check bonding relationship otherwise
probe failure.

Fixes: 6856efa54eea ("net/mlx5: fix PF leak on PCI probing failure")
Cc: stable@dpdk.org

Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/common/mlx5/linux/mlx5_common_os.c | 26 ++++++++++++++++------
 drivers/net/mlx5/linux/mlx5_os.c           | 24 ++++++++++----------
 2 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/drivers/common/mlx5/linux/mlx5_common_os.c b/drivers/common/mlx5/linux/mlx5_common_os.c
index 030ceb561f..d40cfd5cd1 100644
--- a/drivers/common/mlx5/linux/mlx5_common_os.c
+++ b/drivers/common/mlx5/linux/mlx5_common_os.c
@@ -559,21 +559,33 @@ mlx5_os_get_ibv_device(const struct rte_pci_addr *addr)
 	int n;
 	struct ibv_device **ibv_list = mlx5_glue->get_device_list(&n);
 	struct ibv_device *ibv_match = NULL;
+	uint8_t guid1[32] = {0};
+	uint8_t guid2[32] = {0};
+	int ret1, ret2 = -1;
+	struct rte_pci_addr paddr;
 
-	if (ibv_list == NULL) {
+	if (ibv_list == NULL || !n) {
 		rte_errno = ENOSYS;
+		if (ibv_list)
+			mlx5_glue->free_device_list(ibv_list);
 		return NULL;
 	}
+	ret1 = mlx5_get_device_guid(addr, guid1, sizeof(guid1));
 	while (n-- > 0) {
-		struct rte_pci_addr paddr;
-
 		DRV_LOG(DEBUG, "Checking device \"%s\"..", ibv_list[n]->name);
 		if (mlx5_get_pci_addr(ibv_list[n]->ibdev_path, &paddr) != 0)
 			continue;
-		if (rte_pci_addr_cmp(addr, &paddr) != 0)
-			continue;
-		ibv_match = ibv_list[n];
-		break;
+		if (ret1 > 0)
+			ret2 = mlx5_get_device_guid(&paddr, guid2, sizeof(guid2));
+		/* Bond device can bond secondary PCIe */
+		if ((strstr(ibv_list[n]->name, "bond") &&
+		    ((ret1 > 0 && ret2 > 0 && !memcmp(guid1, guid2, sizeof(guid1))) ||
+		    (addr->domain == paddr.domain && addr->bus == paddr.bus &&
+		     addr->devid == paddr.devid))) ||
+		     !rte_pci_addr_cmp(addr, &paddr)) {
+			ibv_match = ibv_list[n];
+			break;
+		}
 	}
 	if (ibv_match == NULL) {
 		DRV_LOG(WARNING,
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index ff65efb2a2..a821153b35 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1979,9 +1979,9 @@ mlx5_os_pci_probe_pf(struct mlx5_common_device *cdev,
 	if (!nd) {
 		/* No device matches, just complain and bail out. */
 		DRV_LOG(WARNING,
-			"No Verbs device matches PCI device " PCI_PRI_FMT ","
+			"PF %u doesn't have Verbs device matches PCI device " PCI_PRI_FMT ","
 			" are kernel drivers loaded?",
-			owner_pci.domain, owner_pci.bus,
+			owner_id, owner_pci.domain, owner_pci.bus,
 			owner_pci.devid, owner_pci.function);
 		rte_errno = ENOENT;
 		ret = -rte_errno;
@@ -2387,16 +2387,16 @@ mlx5_os_pci_probe(struct mlx5_common_device *cdev,
 		for (p = 0; p < eth_da.nb_ports; p++) {
 			ret = mlx5_os_pci_probe_pf(cdev, &eth_da,
 						   eth_da.ports[p], mkvlist);
-			if (ret)
-				break;
-		}
-		if (ret) {
-			DRV_LOG(ERR, "Probe of PCI device " PCI_PRI_FMT " "
-				"aborted due to prodding failure of PF %u",
-				pci_dev->addr.domain, pci_dev->addr.bus,
-				pci_dev->addr.devid, pci_dev->addr.function,
-				eth_da.ports[p]);
-			mlx5_net_remove(cdev);
+			if (ret) {
+				DRV_LOG(INFO, "Probe of PCI device " PCI_PRI_FMT " "
+					"aborted due to proding failure of PF %u",
+					pci_dev->addr.domain, pci_dev->addr.bus,
+					pci_dev->addr.devid, pci_dev->addr.function,
+					eth_da.ports[p]);
+				mlx5_net_remove(cdev);
+				if (p != 0)
+					break;
+			}
 		}
 	} else {
 		ret = mlx5_os_pci_probe_pf(cdev, &eth_da, 0, mkvlist);
-- 
2.27.0


             reply	other threads:[~2022-04-06  7:12 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-06  7:12 Rongwei Liu [this message]
2022-04-14  7:19 ` Raslan Darawsheh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220406071224.870035-1-rongweil@nvidia.com \
    --to=rongweil@nvidia.com \
    --cc=dev@dpdk.org \
    --cc=matan@nvidia.com \
    --cc=michaelba@nvidia.com \
    --cc=orika@nvidia.com \
    --cc=rasland@nvidia.com \
    --cc=stable@dpdk.org \
    --cc=thomas@monjalon.net \
    --cc=viacheslavo@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).