patches for DPDK stable branches
 help / color / mirror / Atom feed
From: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
To: stable@dpdk.org
Cc: ktraynor@redhat.com
Subject: [dpdk-stable] [PATCH] [18.11] net/mlx5: fix mlx5 devices port naming
Date: Thu,  2 Jul 2020 14:37:50 +0000
Message-ID: <1593700670-25730-1-git-send-email-viacheslavo@mellanox.com> (raw)

The newer kernel changed the physical port naming for mlx5 devices.
This patch provides the integrated support for new naming recognition.

The following port naming formats are supported:

  - missing physical port name (no sysfs/netlink key) at all,
    master is assumed

  - decimal digits (for example "12"), representor is
    assumed, the value is the index of attached VF

  - "p" followed by decimal digits, for example "p2", master
    is assumed

Switchdev mode (representors and master) are recognized and
supported only for older kernels (<= 4.19) and OFED <= 4.4,
without multiport IB device support. Adding the multiport
IB device support would require adding the entire feature
and shared context approach would be introduced.

Fixes: 26c08b979d26 ("net/mlx5: add port representor awareness")

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
---
 drivers/net/mlx5/mlx5.c        |   2 +
 drivers/net/mlx5/mlx5.h        |  14 ++++
 drivers/net/mlx5/mlx5_ethdev.c | 144 +++++++++++++++++++++++++++++++++++++++--
 drivers/net/mlx5/mlx5_nl.c     | 104 ++++++++++++++++++++++++-----
 4 files changed, 240 insertions(+), 24 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 649faad..f25f5b8 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1501,6 +1501,8 @@ struct mlx5_dev_spawn_data {
 	for (i = 0; i != n; ++i) {
 		uint32_t restore;
 
+		if (!list[i].ifindex)
+			continue;
 		list[i].eth_dev = mlx5_dev_spawn(&pci_dev->device,
 						 list[i].ibv_dev, dev_config,
 						 &list[i].info, list[i].ifindex);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 26cbdbc..c91907d 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -54,10 +54,22 @@ enum {
 	PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF = 0xa2d3,
 };
 
+/* Recognized Infiniband device physical port name types. */
+enum mlx5_nl_phys_port_name_type {
+	MLX5_PHYS_PORT_NAME_TYPE_NOTSET = 0, /* Not set. */
+	MLX5_PHYS_PORT_NAME_TYPE_LEGACY, /* before kernel ver < 5.0 */
+	MLX5_PHYS_PORT_NAME_TYPE_UPLINK, /* p0, kernel ver >= 5.0 */
+	MLX5_PHYS_PORT_NAME_TYPE_PFVF, /* pf0vf0, kernel ver >= 5.0 */
+	MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN, /* Unrecognized. */
+};
+
+
 /** Switch information returned by mlx5_nl_switch_info(). */
 struct mlx5_switch_info {
 	uint32_t master:1; /**< Master device. */
 	uint32_t representor:1; /**< Representor device. */
+	enum mlx5_nl_phys_port_name_type name_type; /** < Port name type. */
+	int32_t pf_num; /**< PF number (valid for pfxvfx format only). */
 	int32_t port_name; /**< Representor port name. */
 	uint64_t switch_id; /**< Switch identifier. */
 };
@@ -290,6 +302,8 @@ unsigned int mlx5_dev_to_port_id(const struct rte_device *dev,
 				 unsigned int port_list_n);
 int mlx5_sysfs_switch_info(unsigned int ifindex,
 			   struct mlx5_switch_info *info);
+void mlx5_translate_port_name(const char *port_name_in,
+			      struct mlx5_switch_info *port_info_out);
 
 /* mlx5_mac.c */
 
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index d49cb59..d9ea74c 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1312,6 +1312,110 @@ struct ethtool_link_settings {
 }
 
 /**
+ * Extract port name, as a number, from sysfs or netlink information.
+ *
+ * @param[in] port_name_in
+ *   String representing the port name.
+ * @param[out] port_info_out
+ *   Port information, including port name as a number and port name
+ *   type if recognized
+ *
+ * @return
+ *   port_name field set according to recognized name format.
+ */
+void
+mlx5_translate_port_name(const char *port_name_in,
+			 struct mlx5_switch_info *port_info_out)
+{
+	char pf_c1, pf_c2, vf_c1, vf_c2;
+	char *end;
+	int sc_items;
+
+	/*
+	 * Check for port-name as a string of the form pf0vf0
+	 * (support kernel ver >= 5.0 or OFED ver >= 4.6).
+	 */
+	sc_items = sscanf(port_name_in, "%c%c%d%c%c%d",
+			  &pf_c1, &pf_c2, &port_info_out->pf_num,
+			  &vf_c1, &vf_c2, &port_info_out->port_name);
+	if (sc_items == 6 &&
+	    pf_c1 == 'p' && pf_c2 == 'f' &&
+	    vf_c1 == 'v' && vf_c2 == 'f') {
+		port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFVF;
+		return;
+	}
+	/*
+	 * Check for port-name as a string of the form p0
+	 * (support kernel ver >= 5.0, or OFED ver >= 4.6).
+	 */
+	sc_items = sscanf(port_name_in, "%c%d",
+			  &pf_c1, &port_info_out->port_name);
+	if (sc_items == 2 && pf_c1 == 'p') {
+		port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK;
+		return;
+	}
+	/* Check for port-name as a number (support kernel ver < 5.0 */
+	errno = 0;
+	port_info_out->port_name = strtol(port_name_in, &end, 0);
+	if (!errno &&
+	    (size_t)(end - port_name_in) == strlen(port_name_in)) {
+		port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY;
+		return;
+	}
+	port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN;
+}
+
+/**
+ * Analyze gathered port parameters via sysfs to recognize master
+ * and representor devices for E-Switch configuration.
+ *
+ * @param[in] device_dir
+ *   flag of presence of "device" directory under port device key.
+ * @param[inout] switch_info
+ *   Port information, including port name as a number and port name
+ *   type if recognized
+ *
+ * @return
+ *   master and representor flags are set in switch_info according to
+ *   recognized parameters (if any).
+ */
+static void
+mlx5_sysfs_check_switch_info(bool device_dir,
+			     struct mlx5_switch_info *switch_info)
+{
+	switch (switch_info->name_type) {
+	case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
+		/*
+		 * Name is not recognized, assume the master,
+		 * check the device directory presence.
+		 */
+		switch_info->master = device_dir;
+		break;
+	case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
+		/*
+		 * Name is not set, this assumes the legacy naming
+		 * schema for master, just check if there is
+		 * a device directory.
+		 */
+		switch_info->master = device_dir;
+		break;
+	case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
+		/* New uplink naming schema recognized. */
+		switch_info->master = 1;
+		break;
+	case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
+		/* Legacy representors naming schema. */
+		switch_info->representor = !device_dir;
+		break;
+	case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
+		/* New representors naming schema. */
+		switch_info->representor = 1;
+		break;
+	}
+}
+
+
+/**
  * Get switch information associated with network interface.
  *
  * @param ifindex
@@ -1326,11 +1430,20 @@ struct ethtool_link_settings {
 mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info)
 {
 	char ifname[IF_NAMESIZE];
+	char port_name[IF_NAMESIZE];
 	FILE *file;
-	struct mlx5_switch_info data = { .master = 0, };
-	bool port_name_set = false;
+	struct mlx5_switch_info data = {
+		.master = 0,
+		.representor = 0,
+		.name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET,
+		.port_name = 0,
+		.switch_id = 0,
+	};
+	DIR *dir;
 	bool port_switch_id_set = false;
+	bool device_dir = false;
 	char c;
+	int ret;
 
 	if (!if_indextoname(ifindex, ifname)) {
 		rte_errno = errno;
@@ -1341,13 +1454,15 @@ struct ethtool_link_settings {
 	      ifname);
 	MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id",
 	      ifname);
+	MKSTR(pci_device, "/sys/class/net/%s/device",
+	      ifname);
 
 	file = fopen(phys_port_name, "rb");
 	if (file != NULL) {
-		port_name_set =
-			fscanf(file, "%d%c", &data.port_name, &c) == 2 &&
-			c == '\n';
+		ret = fscanf(file, "%s", port_name);
 		fclose(file);
+		if (ret == 1)
+			mlx5_translate_port_name(port_name, &data);
 	}
 	file = fopen(phys_switch_id, "rb");
 	if (file == NULL) {
@@ -1358,8 +1473,23 @@ struct ethtool_link_settings {
 		fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 &&
 		c == '\n';
 	fclose(file);
-	data.master = port_switch_id_set && !port_name_set;
-	data.representor = port_switch_id_set && port_name_set;
+	dir = opendir(pci_device);
+	if (dir != NULL) {
+		closedir(dir);
+		device_dir = true;
+	}
+	if (port_switch_id_set) {
+		/* We have some E-Switch configuration. */
+		mlx5_sysfs_check_switch_info(device_dir, &data);
+	}
 	*info = data;
+	assert(!(data.master && data.representor));
+	if (data.master && data.representor) {
+		DRV_LOG(ERR, "ifindex %u device is recognized as master"
+			     " and as representor", ifindex);
+		rte_errno = ENODEV;
+		return -rte_errno;
+	}
 	return 0;
 }
+
diff --git a/drivers/net/mlx5/mlx5_nl.c b/drivers/net/mlx5/mlx5_nl.c
index fe5a274..7098d31 100644
--- a/drivers/net/mlx5/mlx5_nl.c
+++ b/drivers/net/mlx5/mlx5_nl.c
@@ -84,6 +84,7 @@ struct mlx5_nl_ifindex_data {
 	const char *name; /**< IB device name (in). */
 	uint32_t ibindex; /**< IB device index (out). */
 	uint32_t ifindex; /**< Network interface index (out). */
+	uint32_t ibfound; /**< Found IB index for matching device. */
 };
 
 /**
@@ -695,7 +696,7 @@ struct mlx5_nl_ifindex_data {
 	size_t off = NLMSG_HDRLEN;
 	uint32_t ibindex = 0;
 	uint32_t ifindex = 0;
-	int found = 0;
+	uint32_t found = 0, ibfound = 0;
 
 	if (nh->nlmsg_type !=
 	    RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) &&
@@ -711,6 +712,7 @@ struct mlx5_nl_ifindex_data {
 		switch (na->nla_type) {
 		case RDMA_NLDEV_ATTR_DEV_INDEX:
 			ibindex = *(uint32_t *)payload;
+			ibfound = 1;
 			break;
 		case RDMA_NLDEV_ATTR_DEV_NAME:
 			if (!strcmp(payload, data->name))
@@ -727,6 +729,7 @@ struct mlx5_nl_ifindex_data {
 	if (found) {
 		data->ibindex = ibindex;
 		data->ifindex = ifindex;
+		data->ibfound = ibfound;
 	}
 	return 0;
 error:
@@ -759,6 +762,7 @@ struct mlx5_nl_ifindex_data {
 		.name = name,
 		.ibindex = 0, /* Determined during first pass. */
 		.ifindex = 0, /* Determined during second pass. */
+		.ibfound = 0,
 	};
 	union {
 		struct nlmsghdr nh;
@@ -782,7 +786,7 @@ struct mlx5_nl_ifindex_data {
 	ret = mlx5_nl_recv(nl, seq, mlx5_nl_ifindex_cb, &data);
 	if (ret < 0)
 		return 0;
-	if (!data.ibindex)
+	if (!data.ibfound)
 		goto error;
 	++seq;
 	req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
@@ -814,6 +818,55 @@ struct mlx5_nl_ifindex_data {
 }
 
 /**
+ * Analyze gathered port parameters via Netlink to recognize master
+ * and representor devices for E-Switch configuration.
+ *
+ * @param[in] num_vf_set
+ *   flag of presence of number of VFs port attribute.
+ * @param[inout] switch_info
+ *   Port information, including port name as a number and port name
+ *   type if recognized
+ *
+ * @return
+ *   master and representor flags are set in switch_info according to
+ *   recognized parameters (if any).
+ */
+static void
+mlx5_nl_check_switch_info(bool num_vf_set,
+			  struct mlx5_switch_info *switch_info)
+{
+	switch (switch_info->name_type) {
+	case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
+		/*
+		 * Name is not recognized, assume the master,
+		 * check the number of VFs key presence.
+		 */
+		switch_info->master = num_vf_set;
+		break;
+	case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
+		/*
+		 * Name is not set, this assumes the legacy naming
+		 * schema for master, just check if there is a
+		 * number of VFs key.
+		 */
+		switch_info->master = num_vf_set;
+		break;
+	case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
+		/* New uplink naming schema recognized. */
+		switch_info->master = 1;
+		break;
+	case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
+		/* Legacy representors naming schema. */
+		switch_info->representor = !num_vf_set;
+		break;
+	case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
+		/* New representors naming schema. */
+		switch_info->representor = 1;
+		break;
+	}
+}
+
+/**
  * Process switch information from Netlink message.
  *
  * @param nh
@@ -830,31 +883,29 @@ struct mlx5_nl_ifindex_data {
 	struct mlx5_switch_info info = {
 		.master = 0,
 		.representor = 0,
+		.name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET,
 		.port_name = 0,
 		.switch_id = 0,
 	};
 	size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg));
-	bool port_name_set = false;
 	bool switch_id_set = false;
+	bool num_vf_set = false;
 
 	if (nh->nlmsg_type != RTM_NEWLINK)
 		goto error;
 	while (off < nh->nlmsg_len) {
 		struct rtattr *ra = (void *)((uintptr_t)nh + off);
 		void *payload = RTA_DATA(ra);
-		char *end;
 		unsigned int i;
 
 		if (ra->rta_len > nh->nlmsg_len - off)
 			goto error;
 		switch (ra->rta_type) {
+		case IFLA_NUM_VF:
+			num_vf_set = true;
+			break;
 		case IFLA_PHYS_PORT_NAME:
-			errno = 0;
-			info.port_name = strtol(payload, &end, 0);
-			if (errno ||
-			    (size_t)(end - (char *)payload) != strlen(payload))
-				goto error;
-			port_name_set = true;
+			mlx5_translate_port_name((char *)payload, &info);
 			break;
 		case IFLA_PHYS_SWITCH_ID:
 			info.switch_id = 0;
@@ -867,8 +918,11 @@ struct mlx5_nl_ifindex_data {
 		}
 		off += RTA_ALIGN(ra->rta_len);
 	}
-	info.master = switch_id_set && !port_name_set;
-	info.representor = switch_id_set && port_name_set;
+	if (switch_id_set) {
+		/* We have some E-Switch configuration. */
+		mlx5_nl_check_switch_info(num_vf_set, &info);
+	}
+	assert(!(info.master && info.representor));
 	memcpy(arg, &info, sizeof(info));
 	return 0;
 error:
@@ -890,15 +944,19 @@ struct mlx5_nl_ifindex_data {
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_nl_switch_info(int nl, unsigned int ifindex, struct mlx5_switch_info *info)
+mlx5_nl_switch_info(int nl, unsigned int ifindex,
+		    struct mlx5_switch_info *info)
 {
-	uint32_t seq = random();
 	struct {
 		struct nlmsghdr nh;
 		struct ifinfomsg info;
+		struct rtattr rta;
+		uint32_t extmask;
 	} req = {
 		.nh = {
-			.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)),
+			.nlmsg_len = NLMSG_LENGTH
+					(sizeof(req.info) +
+					 RTA_LENGTH(sizeof(uint32_t))),
 			.nlmsg_type = RTM_GETLINK,
 			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
 		},
@@ -906,11 +964,23 @@ struct mlx5_nl_ifindex_data {
 			.ifi_family = AF_UNSPEC,
 			.ifi_index = ifindex,
 		},
+		.rta = {
+			.rta_type = IFLA_EXT_MASK,
+			.rta_len = RTA_LENGTH(sizeof(int32_t)),
+		},
+		.extmask = RTE_LE32(1),
 	};
+	uint32_t sn = random();
 	int ret;
 
-	ret = mlx5_nl_send(nl, &req.nh, seq);
+	ret = mlx5_nl_send(nl, &req.nh, sn);
 	if (ret >= 0)
-		ret = mlx5_nl_recv(nl, seq, mlx5_nl_switch_info_cb, info);
+		ret = mlx5_nl_recv(nl, sn, mlx5_nl_switch_info_cb, info);
+	if (info->master && info->representor) {
+		DRV_LOG(ERR, "ifindex %u device is recognized as master"
+			     " and as representor", ifindex);
+		rte_errno = ENODEV;
+		ret = -rte_errno;
+	}
 	return ret;
 }
-- 
1.8.3.1


             reply	other threads:[~2020-07-02 14:37 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-02 14:37 Viacheslav Ovsiienko [this message]
2020-07-03 10:27 ` Kevin Traynor

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1593700670-25730-1-git-send-email-viacheslavo@mellanox.com \
    --to=viacheslavo@mellanox.com \
    --cc=ktraynor@redhat.com \
    --cc=stable@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

patches for DPDK stable branches

This inbox may be cloned and mirrored by anyone:

	git clone --mirror http://inbox.dpdk.org/stable/0 stable/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 stable stable/ http://inbox.dpdk.org/stable \
		stable@dpdk.org
	public-inbox-index stable

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.stable


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git