From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by dpdk.org (Postfix) with ESMTP id 5580A2C39 for ; Mon, 25 Mar 2019 18:03:46 +0100 (CET) Received: from Internal Mail-Server by MTLPINE1 (envelope-from viacheslavo@mellanox.com) with ESMTPS (AES256-SHA encrypted); 25 Mar 2019 19:03:41 +0200 Received: from pegasus12.mtr.labs.mlnx. (pegasus12.mtr.labs.mlnx [10.210.17.40]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x2PH3fbX020888; Mon, 25 Mar 2019 19:03:41 +0200 From: Viacheslav Ovsiienko To: dev@dpdk.org Cc: shahafs@mellanox.com Date: Mon, 25 Mar 2019 17:03:22 +0000 Message-Id: <1553533414-9911-2-git-send-email-viacheslavo@mellanox.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1553533414-9911-1-git-send-email-viacheslavo@mellanox.com> References: <1553155888-27498-1-git-send-email-viacheslavo@mellanox.com> <1553533414-9911-1-git-send-email-viacheslavo@mellanox.com> Subject: [dpdk-dev] [PATCH v2 01/13] net/mlx5: add representor recognition on kernels 5.x X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 25 Mar 2019 17:03:47 -0000 The master device and VF representors were distinguished by presence of port name, master device did not have one. The new Linux kernels starting from 5.0 provide the port name for master device and the implemented representor recognizing method does not work. The new recognizing method is based on quiering the VF number, created on the base of the device. The IFLA_NUM_VF attribute is returned by kernel if IFLA_EXT_MASK attribute is specified in the Netlink request message. Also the presence of device symlink in device sysfs folder is added to distinguish representors with sysfs based method. Signed-off-by: Viacheslav Ovsiienko --- drivers/net/mlx5/Makefile | 10 ++++++++++ drivers/net/mlx5/meson.build | 4 ++++ drivers/net/mlx5/mlx5.c | 17 ++++++++++++++--- drivers/net/mlx5/mlx5.h | 1 + drivers/net/mlx5/mlx5_ethdev.c | 14 ++++++++++++-- drivers/net/mlx5/mlx5_nl.c | 37 ++++++++++++++++++++++++++++++++++--- 6 files changed, 75 insertions(+), 8 deletions(-) diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile index 1ed299d..3dd7e38 100644 --- a/drivers/net/mlx5/Makefile +++ b/drivers/net/mlx5/Makefile @@ -231,6 +231,16 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh enum RDMA_NLDEV_ATTR_NDEV_INDEX \ $(AUTOCONF_OUTPUT) $Q sh -- '$<' '$@' \ + HAVE_IFLA_NUM_VF \ + linux/if_link.h \ + enum IFLA_NUM_VF \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_IFLA_EXT_MASK \ + linux/if_link.h \ + enum IFLA_EXT_MASK \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ HAVE_IFLA_PHYS_SWITCH_ID \ linux/if_link.h \ enum IFLA_PHYS_SWITCH_ID \ diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build index 0cf2f08..e3cb9bc 100644 --- a/drivers/net/mlx5/meson.build +++ b/drivers/net/mlx5/meson.build @@ -133,6 +133,10 @@ if build 'ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT' ], [ 'HAVE_ETHTOOL_LINK_MODE_100G', 'linux/ethtool.h', 'ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT' ], + [ 'HAVE_IFLA_NUM_VF', 'linux/if_link.h', + 'IFLA_NUM_VF' ], + [ 'HAVE_IFLA_EXT_MASK', 'linux/if_link.h', + 'IFLA_EXT_MASK' ], [ 'HAVE_IFLA_PHYS_SWITCH_ID', 'linux/if_link.h', 'IFLA_PHYS_SWITCH_ID' ], [ 'HAVE_IFLA_PHYS_PORT_NAME', 'linux/if_link.h', diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index ad1975c..032f1f2 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -13,7 +13,6 @@ #include #include #include -#include #include /* Verbs header. */ @@ -1001,9 +1000,21 @@ priv->nl_socket_route = mlx5_nl_init(NETLINK_ROUTE); priv->nl_sn = 0; priv->representor = !!switch_info->representor; + priv->master = !!switch_info->master; priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID; - priv->representor_id = - switch_info->representor ? switch_info->port_name : -1; + /* + * Currently we support sinlge E-Switch per PF configurations + * only and representors_id field contains the vport index for + * corresponding VF, deduced from representor port name. + * For exapmple, let's have the IB device port 10, it has + * attached network device eth0, which has port name attribute + * pf0vf2, we can deduce the VF number as 2, and set vport index + * as 3 (2+1). This assigning schema should be changed if the + * multiple E-Switch instance per PF configurations or/and PCI + * subfunctions dded. + */ + priv->representor_id = switch_info->representor ? + switch_info->port_name + 1 : -1; /* * Look for sibling devices in order to reuse their switch domain * if any, otherwise allocate one. diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index a88cb4a..58bc37f 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -214,6 +214,7 @@ struct mlx5_priv { uint16_t mtu; /* Configured MTU. */ unsigned int isolated:1; /* Whether isolated mode is enabled. */ unsigned int representor:1; /* Device is a port representor. */ + unsigned int master:1; /* Device is a E-Switch master. */ uint16_t domain_id; /* Switch domain identifier. */ int32_t representor_id; /* Port representor identifier. */ /* RX/TX queues. */ diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index 2e8a296..64c6b6f 100644 --- a/drivers/net/mlx5/mlx5_ethdev.c +++ b/drivers/net/mlx5/mlx5_ethdev.c @@ -1362,8 +1362,10 @@ int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size) .port_name = 0, .switch_id = 0, }; + DIR *dir; bool port_name_set = false; bool port_switch_id_set = false; + bool device_dir = false; char c; int ret; @@ -1376,6 +1378,8 @@ int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size) ifname); MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id", ifname); + MKSTR(pci_device, "/sys/class/net/%s/device", + ifname); file = fopen(phys_port_name, "rb"); if (file != NULL) { @@ -1394,8 +1398,14 @@ int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size) fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 && c == '\n'; fclose(file); - data.master = port_switch_id_set && !port_name_set; - data.representor = port_switch_id_set && port_name_set; + dir = opendir(pci_device); + if (dir != NULL) { + closedir(dir); + device_dir = true; + } + data.master = port_switch_id_set && (!port_name_set || device_dir); + data.representor = port_switch_id_set && port_name_set && !device_dir; + assert(!(data.master && data.representor)); *info = data; return 0; } diff --git a/drivers/net/mlx5/mlx5_nl.c b/drivers/net/mlx5/mlx5_nl.c index 8a10109..9d0fcba 100644 --- a/drivers/net/mlx5/mlx5_nl.c +++ b/drivers/net/mlx5/mlx5_nl.c @@ -65,6 +65,12 @@ #endif /* These are normally found in linux/if_link.h. */ +#ifndef HAVE_IFLA_NUM_VF +#define IFLA_NUM_VF 21 +#endif +#ifndef HAVE_IFLA_EXT_MASK +#define IFLA_EXT_MASK 29 +#endif #ifndef HAVE_IFLA_PHYS_SWITCH_ID #define IFLA_PHYS_SWITCH_ID 36 #endif @@ -837,6 +843,7 @@ struct mlx5_nl_ifindex_data { size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg)); bool port_name_set = false; bool switch_id_set = false; + bool num_vf_set = false; if (nh->nlmsg_type != RTM_NEWLINK) goto error; @@ -848,6 +855,9 @@ struct mlx5_nl_ifindex_data { if (ra->rta_len > nh->nlmsg_len - off) goto error; switch (ra->rta_type) { + case IFLA_NUM_VF: + num_vf_set = true; + break; case IFLA_PHYS_PORT_NAME: port_name_set = mlx5_translate_port_name((char *)payload, @@ -864,8 +874,20 @@ struct mlx5_nl_ifindex_data { } off += RTA_ALIGN(ra->rta_len); } - info.master = switch_id_set && !port_name_set; - info.representor = switch_id_set && port_name_set; + if (switch_id_set) { + if (info.port_name_new) { + /* New representors naming schema. */ + if (port_name_set) { + info.master = (info.port_name == -1); + info.representor = (info.port_name != -1); + } + } else { + /* Legacy representors naming schema. */ + info.master = (!port_name_set || num_vf_set); + info.representor = port_name_set && !num_vf_set; + } + } + assert(!(data.master && data.representor)); memcpy(arg, &info, sizeof(info)); return 0; error: @@ -893,9 +915,13 @@ struct mlx5_nl_ifindex_data { struct { struct nlmsghdr nh; struct ifinfomsg info; + struct rtattr rta; + uint32_t extmask; } req = { .nh = { - .nlmsg_len = NLMSG_LENGTH(sizeof(req.info)), + .nlmsg_len = NLMSG_LENGTH + (sizeof(req.info) + + RTA_LENGTH(sizeof(uint32_t))), .nlmsg_type = RTM_GETLINK, .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, }, @@ -903,6 +929,11 @@ struct mlx5_nl_ifindex_data { .ifi_family = AF_UNSPEC, .ifi_index = ifindex, }, + .rta = { + .rta_type = IFLA_EXT_MASK, + .rta_len = RTA_LENGTH(sizeof(int32_t)), + }, + .extmask = RTE_LE32(1), }; int ret; -- 1.8.3.1 From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by dpdk.space (Postfix) with ESMTP id 3FF0BA05D3 for ; Mon, 25 Mar 2019 18:04:43 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 05CDF4CA0; Mon, 25 Mar 2019 18:04:02 +0100 (CET) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by dpdk.org (Postfix) with ESMTP id 5580A2C39 for ; Mon, 25 Mar 2019 18:03:46 +0100 (CET) Received: from Internal Mail-Server by MTLPINE1 (envelope-from viacheslavo@mellanox.com) with ESMTPS (AES256-SHA encrypted); 25 Mar 2019 19:03:41 +0200 Received: from pegasus12.mtr.labs.mlnx. (pegasus12.mtr.labs.mlnx [10.210.17.40]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x2PH3fbX020888; Mon, 25 Mar 2019 19:03:41 +0200 From: Viacheslav Ovsiienko To: dev@dpdk.org Cc: shahafs@mellanox.com Date: Mon, 25 Mar 2019 17:03:22 +0000 Message-Id: <1553533414-9911-2-git-send-email-viacheslavo@mellanox.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1553533414-9911-1-git-send-email-viacheslavo@mellanox.com> References: <1553155888-27498-1-git-send-email-viacheslavo@mellanox.com> <1553533414-9911-1-git-send-email-viacheslavo@mellanox.com> Subject: [dpdk-dev] [PATCH v2 01/13] net/mlx5: add representor recognition on kernels 5.x X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Content-Type: text/plain; charset="UTF-8" Message-ID: <20190325170322.p4SEPde8EofYgfdWCrEMTErsWBAdH54IyfDB5yt4CIk@z> The master device and VF representors were distinguished by presence of port name, master device did not have one. The new Linux kernels starting from 5.0 provide the port name for master device and the implemented representor recognizing method does not work. The new recognizing method is based on quiering the VF number, created on the base of the device. The IFLA_NUM_VF attribute is returned by kernel if IFLA_EXT_MASK attribute is specified in the Netlink request message. Also the presence of device symlink in device sysfs folder is added to distinguish representors with sysfs based method. Signed-off-by: Viacheslav Ovsiienko --- drivers/net/mlx5/Makefile | 10 ++++++++++ drivers/net/mlx5/meson.build | 4 ++++ drivers/net/mlx5/mlx5.c | 17 ++++++++++++++--- drivers/net/mlx5/mlx5.h | 1 + drivers/net/mlx5/mlx5_ethdev.c | 14 ++++++++++++-- drivers/net/mlx5/mlx5_nl.c | 37 ++++++++++++++++++++++++++++++++++--- 6 files changed, 75 insertions(+), 8 deletions(-) diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile index 1ed299d..3dd7e38 100644 --- a/drivers/net/mlx5/Makefile +++ b/drivers/net/mlx5/Makefile @@ -231,6 +231,16 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh enum RDMA_NLDEV_ATTR_NDEV_INDEX \ $(AUTOCONF_OUTPUT) $Q sh -- '$<' '$@' \ + HAVE_IFLA_NUM_VF \ + linux/if_link.h \ + enum IFLA_NUM_VF \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ + HAVE_IFLA_EXT_MASK \ + linux/if_link.h \ + enum IFLA_EXT_MASK \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ HAVE_IFLA_PHYS_SWITCH_ID \ linux/if_link.h \ enum IFLA_PHYS_SWITCH_ID \ diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build index 0cf2f08..e3cb9bc 100644 --- a/drivers/net/mlx5/meson.build +++ b/drivers/net/mlx5/meson.build @@ -133,6 +133,10 @@ if build 'ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT' ], [ 'HAVE_ETHTOOL_LINK_MODE_100G', 'linux/ethtool.h', 'ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT' ], + [ 'HAVE_IFLA_NUM_VF', 'linux/if_link.h', + 'IFLA_NUM_VF' ], + [ 'HAVE_IFLA_EXT_MASK', 'linux/if_link.h', + 'IFLA_EXT_MASK' ], [ 'HAVE_IFLA_PHYS_SWITCH_ID', 'linux/if_link.h', 'IFLA_PHYS_SWITCH_ID' ], [ 'HAVE_IFLA_PHYS_PORT_NAME', 'linux/if_link.h', diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index ad1975c..032f1f2 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -13,7 +13,6 @@ #include #include #include -#include #include /* Verbs header. */ @@ -1001,9 +1000,21 @@ priv->nl_socket_route = mlx5_nl_init(NETLINK_ROUTE); priv->nl_sn = 0; priv->representor = !!switch_info->representor; + priv->master = !!switch_info->master; priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID; - priv->representor_id = - switch_info->representor ? switch_info->port_name : -1; + /* + * Currently we support sinlge E-Switch per PF configurations + * only and representors_id field contains the vport index for + * corresponding VF, deduced from representor port name. + * For exapmple, let's have the IB device port 10, it has + * attached network device eth0, which has port name attribute + * pf0vf2, we can deduce the VF number as 2, and set vport index + * as 3 (2+1). This assigning schema should be changed if the + * multiple E-Switch instance per PF configurations or/and PCI + * subfunctions dded. + */ + priv->representor_id = switch_info->representor ? + switch_info->port_name + 1 : -1; /* * Look for sibling devices in order to reuse their switch domain * if any, otherwise allocate one. diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index a88cb4a..58bc37f 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -214,6 +214,7 @@ struct mlx5_priv { uint16_t mtu; /* Configured MTU. */ unsigned int isolated:1; /* Whether isolated mode is enabled. */ unsigned int representor:1; /* Device is a port representor. */ + unsigned int master:1; /* Device is a E-Switch master. */ uint16_t domain_id; /* Switch domain identifier. */ int32_t representor_id; /* Port representor identifier. */ /* RX/TX queues. */ diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index 2e8a296..64c6b6f 100644 --- a/drivers/net/mlx5/mlx5_ethdev.c +++ b/drivers/net/mlx5/mlx5_ethdev.c @@ -1362,8 +1362,10 @@ int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size) .port_name = 0, .switch_id = 0, }; + DIR *dir; bool port_name_set = false; bool port_switch_id_set = false; + bool device_dir = false; char c; int ret; @@ -1376,6 +1378,8 @@ int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size) ifname); MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id", ifname); + MKSTR(pci_device, "/sys/class/net/%s/device", + ifname); file = fopen(phys_port_name, "rb"); if (file != NULL) { @@ -1394,8 +1398,14 @@ int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size) fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 && c == '\n'; fclose(file); - data.master = port_switch_id_set && !port_name_set; - data.representor = port_switch_id_set && port_name_set; + dir = opendir(pci_device); + if (dir != NULL) { + closedir(dir); + device_dir = true; + } + data.master = port_switch_id_set && (!port_name_set || device_dir); + data.representor = port_switch_id_set && port_name_set && !device_dir; + assert(!(data.master && data.representor)); *info = data; return 0; } diff --git a/drivers/net/mlx5/mlx5_nl.c b/drivers/net/mlx5/mlx5_nl.c index 8a10109..9d0fcba 100644 --- a/drivers/net/mlx5/mlx5_nl.c +++ b/drivers/net/mlx5/mlx5_nl.c @@ -65,6 +65,12 @@ #endif /* These are normally found in linux/if_link.h. */ +#ifndef HAVE_IFLA_NUM_VF +#define IFLA_NUM_VF 21 +#endif +#ifndef HAVE_IFLA_EXT_MASK +#define IFLA_EXT_MASK 29 +#endif #ifndef HAVE_IFLA_PHYS_SWITCH_ID #define IFLA_PHYS_SWITCH_ID 36 #endif @@ -837,6 +843,7 @@ struct mlx5_nl_ifindex_data { size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg)); bool port_name_set = false; bool switch_id_set = false; + bool num_vf_set = false; if (nh->nlmsg_type != RTM_NEWLINK) goto error; @@ -848,6 +855,9 @@ struct mlx5_nl_ifindex_data { if (ra->rta_len > nh->nlmsg_len - off) goto error; switch (ra->rta_type) { + case IFLA_NUM_VF: + num_vf_set = true; + break; case IFLA_PHYS_PORT_NAME: port_name_set = mlx5_translate_port_name((char *)payload, @@ -864,8 +874,20 @@ struct mlx5_nl_ifindex_data { } off += RTA_ALIGN(ra->rta_len); } - info.master = switch_id_set && !port_name_set; - info.representor = switch_id_set && port_name_set; + if (switch_id_set) { + if (info.port_name_new) { + /* New representors naming schema. */ + if (port_name_set) { + info.master = (info.port_name == -1); + info.representor = (info.port_name != -1); + } + } else { + /* Legacy representors naming schema. */ + info.master = (!port_name_set || num_vf_set); + info.representor = port_name_set && !num_vf_set; + } + } + assert(!(data.master && data.representor)); memcpy(arg, &info, sizeof(info)); return 0; error: @@ -893,9 +915,13 @@ struct mlx5_nl_ifindex_data { struct { struct nlmsghdr nh; struct ifinfomsg info; + struct rtattr rta; + uint32_t extmask; } req = { .nh = { - .nlmsg_len = NLMSG_LENGTH(sizeof(req.info)), + .nlmsg_len = NLMSG_LENGTH + (sizeof(req.info) + + RTA_LENGTH(sizeof(uint32_t))), .nlmsg_type = RTM_GETLINK, .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, }, @@ -903,6 +929,11 @@ struct mlx5_nl_ifindex_data { .ifi_family = AF_UNSPEC, .ifi_index = ifindex, }, + .rta = { + .rta_type = IFLA_EXT_MASK, + .rta_len = RTA_LENGTH(sizeof(int32_t)), + }, + .extmask = RTE_LE32(1), }; int ret; -- 1.8.3.1