From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 67D9FA0520 for ; Thu, 2 Jul 2020 16:37:57 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 327DC1D61B; Thu, 2 Jul 2020 16:37:57 +0200 (CEST) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by dpdk.org (Postfix) with ESMTP id 41B911D61B for ; Thu, 2 Jul 2020 16:37:55 +0200 (CEST) Received: from Internal Mail-Server by MTLPINE1 (envelope-from viacheslavo@mellanox.com) with SMTP; 2 Jul 2020 17:37:54 +0300 Received: from pegasus12.mtr.labs.mlnx (pegasus12.mtr.labs.mlnx [10.210.17.40]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 062Ebsm5010440; Thu, 2 Jul 2020 17:37:54 +0300 Received: from pegasus12.mtr.labs.mlnx (localhost [127.0.0.1]) by pegasus12.mtr.labs.mlnx (8.14.7/8.14.7) with ESMTP id 062EbsEH025768; Thu, 2 Jul 2020 14:37:54 GMT Received: (from viacheslavo@localhost) by pegasus12.mtr.labs.mlnx (8.14.7/8.14.7/Submit) id 062EbrTg025767; Thu, 2 Jul 2020 14:37:53 GMT X-Authentication-Warning: pegasus12.mtr.labs.mlnx: viacheslavo set sender to viacheslavo@mellanox.com using -f From: Viacheslav Ovsiienko To: stable@dpdk.org Cc: ktraynor@redhat.com Date: Thu, 2 Jul 2020 14:37:50 +0000 Message-Id: <1593700670-25730-1-git-send-email-viacheslavo@mellanox.com> X-Mailer: git-send-email 1.8.3.1 Subject: [dpdk-stable] [PATCH] [18.11] net/mlx5: fix mlx5 devices port naming X-BeenThere: stable@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches for DPDK stable branches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: stable-bounces@dpdk.org Sender: "stable" The newer kernel changed the physical port naming for mlx5 devices. This patch provides the integrated support for new naming recognition. The following port naming formats are supported: - missing physical port name (no sysfs/netlink key) at all, master is assumed - decimal digits (for example "12"), representor is assumed, the value is the index of attached VF - "p" followed by decimal digits, for example "p2", master is assumed Switchdev mode (representors and master) are recognized and supported only for older kernels (<= 4.19) and OFED <= 4.4, without multiport IB device support. Adding the multiport IB device support would require adding the entire feature and shared context approach would be introduced. Fixes: 26c08b979d26 ("net/mlx5: add port representor awareness") Signed-off-by: Viacheslav Ovsiienko --- drivers/net/mlx5/mlx5.c | 2 + drivers/net/mlx5/mlx5.h | 14 ++++ drivers/net/mlx5/mlx5_ethdev.c | 144 +++++++++++++++++++++++++++++++++++++++-- drivers/net/mlx5/mlx5_nl.c | 104 ++++++++++++++++++++++++----- 4 files changed, 240 insertions(+), 24 deletions(-) diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 649faad..f25f5b8 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -1501,6 +1501,8 @@ struct mlx5_dev_spawn_data { for (i = 0; i != n; ++i) { uint32_t restore; + if (!list[i].ifindex) + continue; list[i].eth_dev = mlx5_dev_spawn(&pci_dev->device, list[i].ibv_dev, dev_config, &list[i].info, list[i].ifindex); diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 26cbdbc..c91907d 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -54,10 +54,22 @@ enum { PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF = 0xa2d3, }; +/* Recognized Infiniband device physical port name types. */ +enum mlx5_nl_phys_port_name_type { + MLX5_PHYS_PORT_NAME_TYPE_NOTSET = 0, /* Not set. */ + MLX5_PHYS_PORT_NAME_TYPE_LEGACY, /* before kernel ver < 5.0 */ + MLX5_PHYS_PORT_NAME_TYPE_UPLINK, /* p0, kernel ver >= 5.0 */ + MLX5_PHYS_PORT_NAME_TYPE_PFVF, /* pf0vf0, kernel ver >= 5.0 */ + MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN, /* Unrecognized. */ +}; + + /** Switch information returned by mlx5_nl_switch_info(). */ struct mlx5_switch_info { uint32_t master:1; /**< Master device. */ uint32_t representor:1; /**< Representor device. */ + enum mlx5_nl_phys_port_name_type name_type; /** < Port name type. */ + int32_t pf_num; /**< PF number (valid for pfxvfx format only). */ int32_t port_name; /**< Representor port name. */ uint64_t switch_id; /**< Switch identifier. */ }; @@ -290,6 +302,8 @@ unsigned int mlx5_dev_to_port_id(const struct rte_device *dev, unsigned int port_list_n); int mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info); +void mlx5_translate_port_name(const char *port_name_in, + struct mlx5_switch_info *port_info_out); /* mlx5_mac.c */ diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index d49cb59..d9ea74c 100644 --- a/drivers/net/mlx5/mlx5_ethdev.c +++ b/drivers/net/mlx5/mlx5_ethdev.c @@ -1312,6 +1312,110 @@ struct ethtool_link_settings { } /** + * Extract port name, as a number, from sysfs or netlink information. + * + * @param[in] port_name_in + * String representing the port name. + * @param[out] port_info_out + * Port information, including port name as a number and port name + * type if recognized + * + * @return + * port_name field set according to recognized name format. + */ +void +mlx5_translate_port_name(const char *port_name_in, + struct mlx5_switch_info *port_info_out) +{ + char pf_c1, pf_c2, vf_c1, vf_c2; + char *end; + int sc_items; + + /* + * Check for port-name as a string of the form pf0vf0 + * (support kernel ver >= 5.0 or OFED ver >= 4.6). + */ + sc_items = sscanf(port_name_in, "%c%c%d%c%c%d", + &pf_c1, &pf_c2, &port_info_out->pf_num, + &vf_c1, &vf_c2, &port_info_out->port_name); + if (sc_items == 6 && + pf_c1 == 'p' && pf_c2 == 'f' && + vf_c1 == 'v' && vf_c2 == 'f') { + port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFVF; + return; + } + /* + * Check for port-name as a string of the form p0 + * (support kernel ver >= 5.0, or OFED ver >= 4.6). + */ + sc_items = sscanf(port_name_in, "%c%d", + &pf_c1, &port_info_out->port_name); + if (sc_items == 2 && pf_c1 == 'p') { + port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UPLINK; + return; + } + /* Check for port-name as a number (support kernel ver < 5.0 */ + errno = 0; + port_info_out->port_name = strtol(port_name_in, &end, 0); + if (!errno && + (size_t)(end - port_name_in) == strlen(port_name_in)) { + port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_LEGACY; + return; + } + port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN; +} + +/** + * Analyze gathered port parameters via sysfs to recognize master + * and representor devices for E-Switch configuration. + * + * @param[in] device_dir + * flag of presence of "device" directory under port device key. + * @param[inout] switch_info + * Port information, including port name as a number and port name + * type if recognized + * + * @return + * master and representor flags are set in switch_info according to + * recognized parameters (if any). + */ +static void +mlx5_sysfs_check_switch_info(bool device_dir, + struct mlx5_switch_info *switch_info) +{ + switch (switch_info->name_type) { + case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: + /* + * Name is not recognized, assume the master, + * check the device directory presence. + */ + switch_info->master = device_dir; + break; + case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: + /* + * Name is not set, this assumes the legacy naming + * schema for master, just check if there is + * a device directory. + */ + switch_info->master = device_dir; + break; + case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: + /* New uplink naming schema recognized. */ + switch_info->master = 1; + break; + case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: + /* Legacy representors naming schema. */ + switch_info->representor = !device_dir; + break; + case MLX5_PHYS_PORT_NAME_TYPE_PFVF: + /* New representors naming schema. */ + switch_info->representor = 1; + break; + } +} + + +/** * Get switch information associated with network interface. * * @param ifindex @@ -1326,11 +1430,20 @@ struct ethtool_link_settings { mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info) { char ifname[IF_NAMESIZE]; + char port_name[IF_NAMESIZE]; FILE *file; - struct mlx5_switch_info data = { .master = 0, }; - bool port_name_set = false; + struct mlx5_switch_info data = { + .master = 0, + .representor = 0, + .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, + .port_name = 0, + .switch_id = 0, + }; + DIR *dir; bool port_switch_id_set = false; + bool device_dir = false; char c; + int ret; if (!if_indextoname(ifindex, ifname)) { rte_errno = errno; @@ -1341,13 +1454,15 @@ struct ethtool_link_settings { ifname); MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id", ifname); + MKSTR(pci_device, "/sys/class/net/%s/device", + ifname); file = fopen(phys_port_name, "rb"); if (file != NULL) { - port_name_set = - fscanf(file, "%d%c", &data.port_name, &c) == 2 && - c == '\n'; + ret = fscanf(file, "%s", port_name); fclose(file); + if (ret == 1) + mlx5_translate_port_name(port_name, &data); } file = fopen(phys_switch_id, "rb"); if (file == NULL) { @@ -1358,8 +1473,23 @@ struct ethtool_link_settings { fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 && c == '\n'; fclose(file); - data.master = port_switch_id_set && !port_name_set; - data.representor = port_switch_id_set && port_name_set; + dir = opendir(pci_device); + if (dir != NULL) { + closedir(dir); + device_dir = true; + } + if (port_switch_id_set) { + /* We have some E-Switch configuration. */ + mlx5_sysfs_check_switch_info(device_dir, &data); + } *info = data; + assert(!(data.master && data.representor)); + if (data.master && data.representor) { + DRV_LOG(ERR, "ifindex %u device is recognized as master" + " and as representor", ifindex); + rte_errno = ENODEV; + return -rte_errno; + } return 0; } + diff --git a/drivers/net/mlx5/mlx5_nl.c b/drivers/net/mlx5/mlx5_nl.c index fe5a274..7098d31 100644 --- a/drivers/net/mlx5/mlx5_nl.c +++ b/drivers/net/mlx5/mlx5_nl.c @@ -84,6 +84,7 @@ struct mlx5_nl_ifindex_data { const char *name; /**< IB device name (in). */ uint32_t ibindex; /**< IB device index (out). */ uint32_t ifindex; /**< Network interface index (out). */ + uint32_t ibfound; /**< Found IB index for matching device. */ }; /** @@ -695,7 +696,7 @@ struct mlx5_nl_ifindex_data { size_t off = NLMSG_HDRLEN; uint32_t ibindex = 0; uint32_t ifindex = 0; - int found = 0; + uint32_t found = 0, ibfound = 0; if (nh->nlmsg_type != RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) && @@ -711,6 +712,7 @@ struct mlx5_nl_ifindex_data { switch (na->nla_type) { case RDMA_NLDEV_ATTR_DEV_INDEX: ibindex = *(uint32_t *)payload; + ibfound = 1; break; case RDMA_NLDEV_ATTR_DEV_NAME: if (!strcmp(payload, data->name)) @@ -727,6 +729,7 @@ struct mlx5_nl_ifindex_data { if (found) { data->ibindex = ibindex; data->ifindex = ifindex; + data->ibfound = ibfound; } return 0; error: @@ -759,6 +762,7 @@ struct mlx5_nl_ifindex_data { .name = name, .ibindex = 0, /* Determined during first pass. */ .ifindex = 0, /* Determined during second pass. */ + .ibfound = 0, }; union { struct nlmsghdr nh; @@ -782,7 +786,7 @@ struct mlx5_nl_ifindex_data { ret = mlx5_nl_recv(nl, seq, mlx5_nl_ifindex_cb, &data); if (ret < 0) return 0; - if (!data.ibindex) + if (!data.ibfound) goto error; ++seq; req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, @@ -814,6 +818,55 @@ struct mlx5_nl_ifindex_data { } /** + * Analyze gathered port parameters via Netlink to recognize master + * and representor devices for E-Switch configuration. + * + * @param[in] num_vf_set + * flag of presence of number of VFs port attribute. + * @param[inout] switch_info + * Port information, including port name as a number and port name + * type if recognized + * + * @return + * master and representor flags are set in switch_info according to + * recognized parameters (if any). + */ +static void +mlx5_nl_check_switch_info(bool num_vf_set, + struct mlx5_switch_info *switch_info) +{ + switch (switch_info->name_type) { + case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN: + /* + * Name is not recognized, assume the master, + * check the number of VFs key presence. + */ + switch_info->master = num_vf_set; + break; + case MLX5_PHYS_PORT_NAME_TYPE_NOTSET: + /* + * Name is not set, this assumes the legacy naming + * schema for master, just check if there is a + * number of VFs key. + */ + switch_info->master = num_vf_set; + break; + case MLX5_PHYS_PORT_NAME_TYPE_UPLINK: + /* New uplink naming schema recognized. */ + switch_info->master = 1; + break; + case MLX5_PHYS_PORT_NAME_TYPE_LEGACY: + /* Legacy representors naming schema. */ + switch_info->representor = !num_vf_set; + break; + case MLX5_PHYS_PORT_NAME_TYPE_PFVF: + /* New representors naming schema. */ + switch_info->representor = 1; + break; + } +} + +/** * Process switch information from Netlink message. * * @param nh @@ -830,31 +883,29 @@ struct mlx5_nl_ifindex_data { struct mlx5_switch_info info = { .master = 0, .representor = 0, + .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET, .port_name = 0, .switch_id = 0, }; size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg)); - bool port_name_set = false; bool switch_id_set = false; + bool num_vf_set = false; if (nh->nlmsg_type != RTM_NEWLINK) goto error; while (off < nh->nlmsg_len) { struct rtattr *ra = (void *)((uintptr_t)nh + off); void *payload = RTA_DATA(ra); - char *end; unsigned int i; if (ra->rta_len > nh->nlmsg_len - off) goto error; switch (ra->rta_type) { + case IFLA_NUM_VF: + num_vf_set = true; + break; case IFLA_PHYS_PORT_NAME: - errno = 0; - info.port_name = strtol(payload, &end, 0); - if (errno || - (size_t)(end - (char *)payload) != strlen(payload)) - goto error; - port_name_set = true; + mlx5_translate_port_name((char *)payload, &info); break; case IFLA_PHYS_SWITCH_ID: info.switch_id = 0; @@ -867,8 +918,11 @@ struct mlx5_nl_ifindex_data { } off += RTA_ALIGN(ra->rta_len); } - info.master = switch_id_set && !port_name_set; - info.representor = switch_id_set && port_name_set; + if (switch_id_set) { + /* We have some E-Switch configuration. */ + mlx5_nl_check_switch_info(num_vf_set, &info); + } + assert(!(info.master && info.representor)); memcpy(arg, &info, sizeof(info)); return 0; error: @@ -890,15 +944,19 @@ struct mlx5_nl_ifindex_data { * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -mlx5_nl_switch_info(int nl, unsigned int ifindex, struct mlx5_switch_info *info) +mlx5_nl_switch_info(int nl, unsigned int ifindex, + struct mlx5_switch_info *info) { - uint32_t seq = random(); struct { struct nlmsghdr nh; struct ifinfomsg info; + struct rtattr rta; + uint32_t extmask; } req = { .nh = { - .nlmsg_len = NLMSG_LENGTH(sizeof(req.info)), + .nlmsg_len = NLMSG_LENGTH + (sizeof(req.info) + + RTA_LENGTH(sizeof(uint32_t))), .nlmsg_type = RTM_GETLINK, .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, }, @@ -906,11 +964,23 @@ struct mlx5_nl_ifindex_data { .ifi_family = AF_UNSPEC, .ifi_index = ifindex, }, + .rta = { + .rta_type = IFLA_EXT_MASK, + .rta_len = RTA_LENGTH(sizeof(int32_t)), + }, + .extmask = RTE_LE32(1), }; + uint32_t sn = random(); int ret; - ret = mlx5_nl_send(nl, &req.nh, seq); + ret = mlx5_nl_send(nl, &req.nh, sn); if (ret >= 0) - ret = mlx5_nl_recv(nl, seq, mlx5_nl_switch_info_cb, info); + ret = mlx5_nl_recv(nl, sn, mlx5_nl_switch_info_cb, info); + if (info->master && info->representor) { + DRV_LOG(ERR, "ifindex %u device is recognized as master" + " and as representor", ifindex); + rte_errno = ENODEV; + ret = -rte_errno; + } return ret; } -- 1.8.3.1