From: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
To: stable@dpdk.org
Cc: ktraynor@redhat.com
Subject: [dpdk-stable] [PATCH] [18.11] net/mlx5: fix mlx5 devices port naming
Date: Thu, 2 Jul 2020 14:37:50 +0000 [thread overview]
Message-ID: <1593700670-25730-1-git-send-email-viacheslavo@mellanox.com> (raw)
The newer kernel changed the physical port naming for mlx5 devices.
This patch provides the integrated support for new naming recognition.
The following port naming formats are supported:
- missing physical port name (no sysfs/netlink key) at all,
master is assumed
- decimal digits (for example "12"), representor is
assumed, the value is the index of attached VF
- "p" followed by decimal digits, for example "p2", master
is assumed
Switchdev mode (representors and master) are recognized and
supported only for older kernels (<= 4.19) and OFED <= 4.4,
without multiport IB device support. Adding the multiport
IB device support would require adding the entire feature
and shared context approach would be introduced.
Fixes: 26c08b979d26 ("net/mlx5: add port representor awareness")
Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
---
drivers/net/mlx5/mlx5.c | 2 +
drivers/net/mlx5/mlx5.h | 14 ++++
drivers/net/mlx5/mlx5_ethdev.c | 144 +++++++++++++++++++++++++++++++++++++++--
drivers/net/mlx5/mlx5_nl.c | 104 ++++++++++++++++++++++++-----
4 files changed, 240 insertions(+), 24 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 649faad..f25f5b8 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1501,6 +1501,8 @@ struct mlx5_dev_spawn_data {
for (i = 0; i != n; ++i) {
uint32_t restore;
+ if (!list[i].ifindex)
+ continue;
list[i].eth_dev = mlx5_dev_spawn(&pci_dev->device,
list[i].ibv_dev, dev_config,
&list[i].info, list[i].ifindex);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 26cbdbc..c91907d 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -54,10 +54,22 @@ enum {
PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF = 0xa2d3,
};
+/* Recognized Infiniband device physical port name types. */
+enum mlx5_nl_phys_port_name_type {
+ MLX5_PHYS_PORT_NAME_TYPE_NOTSET = 0, /* Not set. */
+ MLX5_PHYS_PORT_NAME_TYPE_LEGACY, /* before kernel ver < 5.0 */
+ MLX5_PHYS_PORT_NAME_TYPE_UPLINK, /* p0, kernel ver >= 5.0 */
+ MLX5_PHYS_PORT_NAME_TYPE_PFVF, /* pf0vf0, kernel ver >= 5.0 */
+ MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN, /* Unrecognized. */
+};
+
+
/** Switch information returned by mlx5_nl_switch_info(). */
struct mlx5_switch_info {
uint32_t master:1; /**< Master device. */
uint32_t representor:1; /**< Representor device. */
+ enum mlx5_nl_phys_port_name_type name_type; /** < Port name type. */
+ int32_t pf_num; /**< PF number (valid for pfxvfx format only). */
int32_t port_name; /**< Representor port name. */
uint64_t switch_id; /**< Switch identifier. */
};
@@ -290,6 +302,8 @@ unsigned int mlx5_dev_to_port_id(const struct rte_device *dev,
unsigned int port_list_n);
int mlx5_sysfs_switch_info(unsigned int ifindex,
struct mlx5_switch_info *info);
+void mlx5_translate_port_name(const char *port_name_in,
+ struct mlx5_switch_info *port_info_out);
/* mlx5_mac.c */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index d49cb59..d9ea74c 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -1312,6 +1312,110 @@ struct ethtool_link_settings {
}
/**
+ * Extract port name, as a number, from sysfs or netlink information.
+ *
+ * @param[in] port_name_in
+ * String representing the port name.
+ * @param[out] port_info_out
+ * Port information, including port name as a number and port name
+ * type if recognized
+ *
+ * @return
+ * port_name field set according to recognized name format.
+ */
+void
+mlx5_translate_port_name(const char *port_name_in,
+ struct mlx5_switch_info *port_info_out)
+{
+ char pf_c1, pf_c2, vf_c1, vf_c2;
+ char *end;
+ int sc_items;
+
+ /*
+ * Check for port-name as a string of the form pf0vf0
+ * (support kernel ver >= 5.0 or OFED ver >= 4.6).
+ */
+ sc_items = sscanf(port_name_in, "%c%c%d%c%c%d",
+ &pf_c1, &pf_c2, &port_info_out->pf_num,
+ &vf_c1, &vf_c2, &port_info_out->port_name);
+ if (sc_items == 6 &&
+ pf_c1 == 'p' && pf_c2 == 'f' &&
+ vf_c1 == 'v' && vf_c2 == 'f') {
+ port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFVF;
+ return;
+ }
+ /*
+ * Check for port-name as a string of the form p0
+ * (support kernel ver >= 5.0, or OFED ver >= 4.6).
+ */
+ sc_items = sscanf(port_name_in, "%c%d",
+ &pf_c1, &port_info_out->port_name);
+ if (sc_items == 2 && pf_c1 == 'p') {
+ port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK;
+ return;
+ }
+ /* Check for port-name as a number (support kernel ver < 5.0 */
+ errno = 0;
+ port_info_out->port_name = strtol(port_name_in, &end, 0);
+ if (!errno &&
+ (size_t)(end - port_name_in) == strlen(port_name_in)) {
+ port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY;
+ return;
+ }
+ port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN;
+}
+
+/**
+ * Analyze gathered port parameters via sysfs to recognize master
+ * and representor devices for E-Switch configuration.
+ *
+ * @param[in] device_dir
+ * flag of presence of "device" directory under port device key.
+ * @param[inout] switch_info
+ * Port information, including port name as a number and port name
+ * type if recognized
+ *
+ * @return
+ * master and representor flags are set in switch_info according to
+ * recognized parameters (if any).
+ */
+static void
+mlx5_sysfs_check_switch_info(bool device_dir,
+ struct mlx5_switch_info *switch_info)
+{
+ switch (switch_info->name_type) {
+ case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
+ /*
+ * Name is not recognized, assume the master,
+ * check the device directory presence.
+ */
+ switch_info->master = device_dir;
+ break;
+ case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
+ /*
+ * Name is not set, this assumes the legacy naming
+ * schema for master, just check if there is
+ * a device directory.
+ */
+ switch_info->master = device_dir;
+ break;
+ case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
+ /* New uplink naming schema recognized. */
+ switch_info->master = 1;
+ break;
+ case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
+ /* Legacy representors naming schema. */
+ switch_info->representor = !device_dir;
+ break;
+ case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
+ /* New representors naming schema. */
+ switch_info->representor = 1;
+ break;
+ }
+}
+
+
+/**
* Get switch information associated with network interface.
*
* @param ifindex
@@ -1326,11 +1430,20 @@ struct ethtool_link_settings {
mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info)
{
char ifname[IF_NAMESIZE];
+ char port_name[IF_NAMESIZE];
FILE *file;
- struct mlx5_switch_info data = { .master = 0, };
- bool port_name_set = false;
+ struct mlx5_switch_info data = {
+ .master = 0,
+ .representor = 0,
+ .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET,
+ .port_name = 0,
+ .switch_id = 0,
+ };
+ DIR *dir;
bool port_switch_id_set = false;
+ bool device_dir = false;
char c;
+ int ret;
if (!if_indextoname(ifindex, ifname)) {
rte_errno = errno;
@@ -1341,13 +1454,15 @@ struct ethtool_link_settings {
ifname);
MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id",
ifname);
+ MKSTR(pci_device, "/sys/class/net/%s/device",
+ ifname);
file = fopen(phys_port_name, "rb");
if (file != NULL) {
- port_name_set =
- fscanf(file, "%d%c", &data.port_name, &c) == 2 &&
- c == '\n';
+ ret = fscanf(file, "%s", port_name);
fclose(file);
+ if (ret == 1)
+ mlx5_translate_port_name(port_name, &data);
}
file = fopen(phys_switch_id, "rb");
if (file == NULL) {
@@ -1358,8 +1473,23 @@ struct ethtool_link_settings {
fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 &&
c == '\n';
fclose(file);
- data.master = port_switch_id_set && !port_name_set;
- data.representor = port_switch_id_set && port_name_set;
+ dir = opendir(pci_device);
+ if (dir != NULL) {
+ closedir(dir);
+ device_dir = true;
+ }
+ if (port_switch_id_set) {
+ /* We have some E-Switch configuration. */
+ mlx5_sysfs_check_switch_info(device_dir, &data);
+ }
*info = data;
+ assert(!(data.master && data.representor));
+ if (data.master && data.representor) {
+ DRV_LOG(ERR, "ifindex %u device is recognized as master"
+ " and as representor", ifindex);
+ rte_errno = ENODEV;
+ return -rte_errno;
+ }
return 0;
}
+
diff --git a/drivers/net/mlx5/mlx5_nl.c b/drivers/net/mlx5/mlx5_nl.c
index fe5a274..7098d31 100644
--- a/drivers/net/mlx5/mlx5_nl.c
+++ b/drivers/net/mlx5/mlx5_nl.c
@@ -84,6 +84,7 @@ struct mlx5_nl_ifindex_data {
const char *name; /**< IB device name (in). */
uint32_t ibindex; /**< IB device index (out). */
uint32_t ifindex; /**< Network interface index (out). */
+ uint32_t ibfound; /**< Found IB index for matching device. */
};
/**
@@ -695,7 +696,7 @@ struct mlx5_nl_ifindex_data {
size_t off = NLMSG_HDRLEN;
uint32_t ibindex = 0;
uint32_t ifindex = 0;
- int found = 0;
+ uint32_t found = 0, ibfound = 0;
if (nh->nlmsg_type !=
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) &&
@@ -711,6 +712,7 @@ struct mlx5_nl_ifindex_data {
switch (na->nla_type) {
case RDMA_NLDEV_ATTR_DEV_INDEX:
ibindex = *(uint32_t *)payload;
+ ibfound = 1;
break;
case RDMA_NLDEV_ATTR_DEV_NAME:
if (!strcmp(payload, data->name))
@@ -727,6 +729,7 @@ struct mlx5_nl_ifindex_data {
if (found) {
data->ibindex = ibindex;
data->ifindex = ifindex;
+ data->ibfound = ibfound;
}
return 0;
error:
@@ -759,6 +762,7 @@ struct mlx5_nl_ifindex_data {
.name = name,
.ibindex = 0, /* Determined during first pass. */
.ifindex = 0, /* Determined during second pass. */
+ .ibfound = 0,
};
union {
struct nlmsghdr nh;
@@ -782,7 +786,7 @@ struct mlx5_nl_ifindex_data {
ret = mlx5_nl_recv(nl, seq, mlx5_nl_ifindex_cb, &data);
if (ret < 0)
return 0;
- if (!data.ibindex)
+ if (!data.ibfound)
goto error;
++seq;
req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
@@ -814,6 +818,55 @@ struct mlx5_nl_ifindex_data {
}
/**
+ * Analyze gathered port parameters via Netlink to recognize master
+ * and representor devices for E-Switch configuration.
+ *
+ * @param[in] num_vf_set
+ * flag of presence of number of VFs port attribute.
+ * @param[inout] switch_info
+ * Port information, including port name as a number and port name
+ * type if recognized
+ *
+ * @return
+ * master and representor flags are set in switch_info according to
+ * recognized parameters (if any).
+ */
+static void
+mlx5_nl_check_switch_info(bool num_vf_set,
+ struct mlx5_switch_info *switch_info)
+{
+ switch (switch_info->name_type) {
+ case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
+ /*
+ * Name is not recognized, assume the master,
+ * check the number of VFs key presence.
+ */
+ switch_info->master = num_vf_set;
+ break;
+ case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
+ /*
+ * Name is not set, this assumes the legacy naming
+ * schema for master, just check if there is a
+ * number of VFs key.
+ */
+ switch_info->master = num_vf_set;
+ break;
+ case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
+ /* New uplink naming schema recognized. */
+ switch_info->master = 1;
+ break;
+ case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
+ /* Legacy representors naming schema. */
+ switch_info->representor = !num_vf_set;
+ break;
+ case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
+ /* New representors naming schema. */
+ switch_info->representor = 1;
+ break;
+ }
+}
+
+/**
* Process switch information from Netlink message.
*
* @param nh
@@ -830,31 +883,29 @@ struct mlx5_nl_ifindex_data {
struct mlx5_switch_info info = {
.master = 0,
.representor = 0,
+ .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET,
.port_name = 0,
.switch_id = 0,
};
size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg));
- bool port_name_set = false;
bool switch_id_set = false;
+ bool num_vf_set = false;
if (nh->nlmsg_type != RTM_NEWLINK)
goto error;
while (off < nh->nlmsg_len) {
struct rtattr *ra = (void *)((uintptr_t)nh + off);
void *payload = RTA_DATA(ra);
- char *end;
unsigned int i;
if (ra->rta_len > nh->nlmsg_len - off)
goto error;
switch (ra->rta_type) {
+ case IFLA_NUM_VF:
+ num_vf_set = true;
+ break;
case IFLA_PHYS_PORT_NAME:
- errno = 0;
- info.port_name = strtol(payload, &end, 0);
- if (errno ||
- (size_t)(end - (char *)payload) != strlen(payload))
- goto error;
- port_name_set = true;
+ mlx5_translate_port_name((char *)payload, &info);
break;
case IFLA_PHYS_SWITCH_ID:
info.switch_id = 0;
@@ -867,8 +918,11 @@ struct mlx5_nl_ifindex_data {
}
off += RTA_ALIGN(ra->rta_len);
}
- info.master = switch_id_set && !port_name_set;
- info.representor = switch_id_set && port_name_set;
+ if (switch_id_set) {
+ /* We have some E-Switch configuration. */
+ mlx5_nl_check_switch_info(num_vf_set, &info);
+ }
+ assert(!(info.master && info.representor));
memcpy(arg, &info, sizeof(info));
return 0;
error:
@@ -890,15 +944,19 @@ struct mlx5_nl_ifindex_data {
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
-mlx5_nl_switch_info(int nl, unsigned int ifindex, struct mlx5_switch_info *info)
+mlx5_nl_switch_info(int nl, unsigned int ifindex,
+ struct mlx5_switch_info *info)
{
- uint32_t seq = random();
struct {
struct nlmsghdr nh;
struct ifinfomsg info;
+ struct rtattr rta;
+ uint32_t extmask;
} req = {
.nh = {
- .nlmsg_len = NLMSG_LENGTH(sizeof(req.info)),
+ .nlmsg_len = NLMSG_LENGTH
+ (sizeof(req.info) +
+ RTA_LENGTH(sizeof(uint32_t))),
.nlmsg_type = RTM_GETLINK,
.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
},
@@ -906,11 +964,23 @@ struct mlx5_nl_ifindex_data {
.ifi_family = AF_UNSPEC,
.ifi_index = ifindex,
},
+ .rta = {
+ .rta_type = IFLA_EXT_MASK,
+ .rta_len = RTA_LENGTH(sizeof(int32_t)),
+ },
+ .extmask = RTE_LE32(1),
};
+ uint32_t sn = random();
int ret;
- ret = mlx5_nl_send(nl, &req.nh, seq);
+ ret = mlx5_nl_send(nl, &req.nh, sn);
if (ret >= 0)
- ret = mlx5_nl_recv(nl, seq, mlx5_nl_switch_info_cb, info);
+ ret = mlx5_nl_recv(nl, sn, mlx5_nl_switch_info_cb, info);
+ if (info->master && info->representor) {
+ DRV_LOG(ERR, "ifindex %u device is recognized as master"
+ " and as representor", ifindex);
+ rte_errno = ENODEV;
+ ret = -rte_errno;
+ }
return ret;
}
--
1.8.3.1
next reply other threads:[~2020-07-02 14:37 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-02 14:37 Viacheslav Ovsiienko [this message]
2020-07-03 10:27 ` Kevin Traynor
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1593700670-25730-1-git-send-email-viacheslavo@mellanox.com \
--to=viacheslavo@mellanox.com \
--cc=ktraynor@redhat.com \
--cc=stable@dpdk.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).