From: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
To: dev@dpdk.org
Cc: matan@mellanox.com, rasland@mellanox.com
Subject: [dpdk-dev] [PATCH 04/12] net/mlx5: add VF LAG mode bonding device recognition
Date: Wed, 25 Sep 2019 07:53:27 +0000 [thread overview]
Message-ID: <1569398015-6027-5-git-send-email-viacheslavo@mellanox.com> (raw)
In-Reply-To: <1569398015-6027-1-git-send-email-viacheslavo@mellanox.com>
The Mellanox NICs starting from ConnectX-5 support LAG over
NIC ports internally, implemented by the NIC firmware and hardware.
The multiport NIC presents multiple physical PCI functions (PF),
with SR-IOV multiple virtual PCI functions (VFs) might be presented.
With switchdev mode the VF representors are engaged and PFs and their
VFs are connected by internal E-Switch feature. Each PF and related VFs
have dedicated E-Switch and belong to dedicated switch domain.
If NIC ports are combined to support NIC the kernel drivers introduce
the single unified Infiniband multiport devices, and all only one
unified E-Switch with single switch domain combines master PF
all all VFs. No extra DPDK bonding device is needed.
Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
---
drivers/net/mlx5/mlx5.c | 160 +++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 159 insertions(+), 1 deletion(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 701da7e..12eed13 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -169,6 +169,7 @@ struct mlx5_dev_spawn_data {
uint32_t ifindex; /**< Network interface index. */
uint32_t max_port; /**< IB device maximal port index. */
uint32_t ibv_port; /**< IB device physical port index. */
+ int pf_bond; /**< bonding device PF index. < 0 - no bonding */
struct mlx5_switch_info info; /**< Switch information. */
struct ibv_device *ibv_dev; /**< Associated IB device. */
struct rte_eth_dev *eth_dev; /**< Associated Ethernet device. */
@@ -2119,6 +2120,108 @@ struct mlx5_dev_spawn_data {
}
/**
+ * Match PCI information for possible slaves of bonding device.
+ *
+ * @param[in] ibv_dev
+ * Pointer to Infiniband device structure.
+ * @param[in] pci_dev
+ * Pointer to PCI device structure to match PCI address.
+ * @param[in] nl_rdma
+ * Netlink RDMA group socket handle.
+ *
+ * @return
+ * negative value if no bonding device found, otherwise
+ * positive index of slave PF in bonding.
+ */
+static int
+mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev,
+ const struct rte_pci_device *pci_dev,
+ int nl_rdma)
+{
+ char ifname[IF_NAMESIZE + 1];
+ unsigned int ifindex;
+ unsigned int np, i;
+ FILE *file = NULL;
+ int pf = -1;
+
+ /*
+ * Try to get master device name. If something goes
+ * wrong suppose the lack of kernel support and no
+ * bonding devices.
+ */
+ if (nl_rdma < 0)
+ return -1;
+ if (!strstr(ibv_dev->name, "bond"))
+ return -1;
+ np = mlx5_nl_portnum(nl_rdma, ibv_dev->name);
+ if (!np)
+ return -1;
+ /*
+ * The Master device might not be on the predefined
+ * port (not on port index 1, it is not garanted),
+ * we have to scan all Infiniband device port and
+ * find master.
+ */
+ for (i = 1; i <= np; ++i) {
+ /* Check whether Infiniband port is populated. */
+ ifindex = mlx5_nl_ifindex(nl_rdma, ibv_dev->name, i);
+ if (!ifindex)
+ continue;
+ if (!if_indextoname(ifindex, ifname))
+ continue;
+ /* Try to read bonding slave names from sysfs. */
+ MKSTR(slaves,
+ "/sys/class/net/%s/master/bonding/slaves", ifname);
+ file = fopen(slaves, "r");
+ if (file)
+ break;
+ }
+ if (!file)
+ return -1;
+ MKSTR(format, "%c%us", '%', (unsigned int)(sizeof(ifname) - 1));
+
+ /* Use safe format to check maximal buffer length. */
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+ while (fscanf(file, format, ifname) == 1) {
+#pragma GCC diagnostic error "-Wformat-nonliteral"
+ char tmp_str[IF_NAMESIZE + 32];
+ struct rte_pci_addr pci_addr;
+ struct mlx5_switch_info info;
+
+ /* Process slave interface names in the loop. */
+ snprintf(tmp_str, sizeof(tmp_str),
+ "/sys/class/net/%s", ifname);
+ if (mlx5_dev_to_pci_addr(tmp_str, &pci_addr)) {
+ DRV_LOG(WARNING, "can not get PCI address"
+ " for netdev \"%s\"", ifname);
+ continue;
+ }
+ if (pci_dev->addr.domain != pci_addr.domain ||
+ pci_dev->addr.bus != pci_addr.bus ||
+ pci_dev->addr.devid != pci_addr.devid ||
+ pci_dev->addr.function != pci_addr.function)
+ continue;
+ /* Slave interface PCI address match found. */
+ fclose(file);
+ snprintf(tmp_str, sizeof(tmp_str),
+ "/sys/class/net/%s/phys_port_name", ifname);
+ file = fopen(tmp_str, "rb");
+ if (!file)
+ break;
+ info.name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET;
+ if (fscanf(file, "%32s", tmp_str) == 1)
+ mlx5_translate_port_name(tmp_str, &info);
+ if (info.name_type == MLX5_PHYS_PORT_NAME_TYPE_LEGACY ||
+ info.name_type == MLX5_PHYS_PORT_NAME_TYPE_UPLINK)
+ pf = info.port_name;
+ break;
+ }
+ if (file)
+ fclose(file);
+ return pf;
+}
+
+/**
* DPDK callback to register a PCI device.
*
* This function spawns Ethernet devices out of a given PCI device.
@@ -2154,6 +2257,12 @@ struct mlx5_dev_spawn_data {
* Actually this is the number of iterations to spawn.
*/
unsigned int ns = 0;
+ /*
+ * Bonding device
+ * < 0 - no bonding device (single one)
+ * >= 0 - bonding device (value is slave PF index)
+ */
+ int bd = -1;
struct mlx5_dev_spawn_data *list = NULL;
struct mlx5_dev_config dev_config;
int ret;
@@ -2185,6 +2294,30 @@ struct mlx5_dev_spawn_data {
struct rte_pci_addr pci_addr;
DRV_LOG(DEBUG, "checking device \"%s\"", ibv_list[ret]->name);
+ bd = mlx5_device_bond_pci_match
+ (ibv_list[ret], pci_dev, nl_rdma);
+ if (bd >= 0) {
+ /*
+ * Bonding device detected. Only one match is allowed,
+ * the bonding is supported over multi-port IB device,
+ * there should be no matches on representor PCI
+ * functions or non VF LAG bonding devices with
+ * specified address.
+ */
+ if (nd) {
+ DRV_LOG(ERR,
+ "multiple PCI match on bonding device"
+ "\"%s\" found", ibv_list[ret]->name);
+ rte_errno = ENOENT;
+ ret = -rte_errno;
+ goto exit;
+ }
+ DRV_LOG(INFO, "PCI information matches for"
+ " slave %d bonding device \"%s\"",
+ bd, ibv_list[ret]->name);
+ ibv_match[nd++] = ibv_list[ret];
+ break;
+ }
if (mlx5_dev_to_pci_addr
(ibv_list[ret]->ibdev_path, &pci_addr))
continue;
@@ -2220,6 +2353,13 @@ struct mlx5_dev_spawn_data {
if (!np)
DRV_LOG(WARNING, "can not get IB device \"%s\""
" ports number", ibv_match[0]->name);
+ if (bd >= 0 && !np) {
+ DRV_LOG(ERR, "can not get ports"
+ " for bonding device");
+ rte_errno = ENOENT;
+ ret = -rte_errno;
+ goto exit;
+ }
}
/*
* Now we can determine the maximal
@@ -2235,7 +2375,7 @@ struct mlx5_dev_spawn_data {
ret = -rte_errno;
goto exit;
}
- if (np > 1) {
+ if (bd >= 0 || np > 1) {
/*
* Single IB device with multiple ports found,
* it may be E-Switch master device and representors.
@@ -2244,12 +2384,14 @@ struct mlx5_dev_spawn_data {
assert(nl_rdma >= 0);
assert(ns == 0);
assert(nd == 1);
+ assert(np);
for (i = 1; i <= np; ++i) {
list[ns].max_port = np;
list[ns].ibv_port = i;
list[ns].ibv_dev = ibv_match[0];
list[ns].eth_dev = NULL;
list[ns].pci_dev = pci_dev;
+ list[ns].pf_bond = bd;
list[ns].ifindex = mlx5_nl_ifindex
(nl_rdma, list[ns].ibv_dev->name, i);
if (!list[ns].ifindex) {
@@ -2279,6 +2421,21 @@ struct mlx5_dev_spawn_data {
(list[ns].ifindex,
&list[ns].info);
}
+ if (!ret && bd >= 0) {
+ switch (list[ns].info.name_type) {
+ case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
+ if (list[ns].info.port_name == bd)
+ ns++;
+ break;
+ case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
+ if (list[ns].info.pf_num == bd)
+ ns++;
+ break;
+ default:
+ break;
+ }
+ continue;
+ }
if (!ret && (list[ns].info.representor ^
list[ns].info.master))
ns++;
@@ -2317,6 +2474,7 @@ struct mlx5_dev_spawn_data {
list[ns].ibv_dev = ibv_match[i];
list[ns].eth_dev = NULL;
list[ns].pci_dev = pci_dev;
+ list[ns].pf_bond = -1;
list[ns].ifindex = 0;
if (nl_rdma >= 0)
list[ns].ifindex = mlx5_nl_ifindex
--
1.8.3.1
next prev parent reply other threads:[~2019-09-25 7:54 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-09-25 7:53 [dpdk-dev] [PATCH 00/12] net/mlx5: add bonding configuration support Viacheslav Ovsiienko
2019-09-25 7:53 ` [dpdk-dev] [PATCH 01/12] net/mlx5: move backing PCI device to private context Viacheslav Ovsiienko
2019-09-25 7:53 ` [dpdk-dev] [PATCH 02/12] net/mlx5: update PCI address retrieving routine Viacheslav Ovsiienko
2019-09-25 7:53 ` [dpdk-dev] [PATCH 03/12] net/mlx5: allocate device list explicitly Viacheslav Ovsiienko
2019-09-25 7:53 ` Viacheslav Ovsiienko [this message]
2019-09-30 10:34 ` [dpdk-dev] [PATCH 04/12] net/mlx5: add VF LAG mode bonding device recognition Ferruh Yigit
2019-10-01 9:02 ` Slava Ovsiienko
2019-09-25 7:53 ` [dpdk-dev] [PATCH 05/12] net/mlx5: generate bonding device name Viacheslav Ovsiienko
2019-09-25 7:53 ` [dpdk-dev] [PATCH 06/12] net/mlx5: check the kernel support for VF LAG bonding Viacheslav Ovsiienko
2019-09-25 7:53 ` [dpdk-dev] [PATCH 07/12] net/mlx5: query vport index match mode and parameters Viacheslav Ovsiienko
2019-09-25 7:53 ` [dpdk-dev] [PATCH 08/12] net/mlx5: elaborate E-Switch port parameters query Viacheslav Ovsiienko
2019-09-25 7:53 ` [dpdk-dev] [PATCH 09/12] net/mlx5: update source and destination vport translations Viacheslav Ovsiienko
2019-09-25 7:53 ` [dpdk-dev] [PATCH 10/12] net/mlx5: extend switch domain searching range Viacheslav Ovsiienko
2019-09-25 7:53 ` [dpdk-dev] [PATCH 11/12] net/mlx5: update switch port ID in bonding configuration Viacheslav Ovsiienko
2019-09-25 7:53 ` [dpdk-dev] [PATCH 12/12] net/mlx5: check sibling device configurations mismatch Viacheslav Ovsiienko
2019-09-25 10:29 ` [dpdk-dev] [PATCH 00/12] net/mlx5: add bonding configuration support Matan Azrad
2019-09-29 11:47 ` Raslan Darawsheh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1569398015-6027-5-git-send-email-viacheslavo@mellanox.com \
--to=viacheslavo@mellanox.com \
--cc=dev@dpdk.org \
--cc=matan@mellanox.com \
--cc=rasland@mellanox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).