From: Xueming Li <xuemingl@nvidia.com>
To: <dev@dpdk.org>
Cc: <xuemingl@nvidia.com>,
Viacheslav Ovsiienko <viacheslavo@nvidia.com>,
Lior Margalit <lmargalit@nvidia.com>,
Matan Azrad <matan@nvidia.com>, "Ray Kinsella" <mdr@ashroe.eu>
Subject: [dpdk-dev] [PATCH v4 1/8] common/mlx5: add netlink API to get RDMA port state
Date: Fri, 22 Oct 2021 17:11:35 +0800 [thread overview]
Message-ID: <20211022091142.51397-2-xuemingl@nvidia.com> (raw)
In-Reply-To: <20211022091142.51397-1-xuemingl@nvidia.com>
Introduce netlink API to get rdma port state.
Port state is restrieved based on RDMA device name and port index.
Signed-off-by: Xueming Li <xuemingl@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
drivers/common/mlx5/linux/meson.build | 2 +
drivers/common/mlx5/linux/mlx5_nl.c | 136 +++++++++++++++++++-------
drivers/common/mlx5/linux/mlx5_nl.h | 2 +
drivers/common/mlx5/version.map | 1 +
4 files changed, 106 insertions(+), 35 deletions(-)
diff --git a/drivers/common/mlx5/linux/meson.build b/drivers/common/mlx5/linux/meson.build
index cbea58f557d..2dcd27b7786 100644
--- a/drivers/common/mlx5/linux/meson.build
+++ b/drivers/common/mlx5/linux/meson.build
@@ -175,6 +175,8 @@ has_sym_args = [
'RDMA_NLDEV_ATTR_DEV_NAME' ],
[ 'HAVE_RDMA_NLDEV_ATTR_PORT_INDEX', 'rdma/rdma_netlink.h',
'RDMA_NLDEV_ATTR_PORT_INDEX' ],
+ [ 'HAVE_RDMA_NLDEV_ATTR_PORT_STATE', 'rdma/rdma_netlink.h',
+ 'RDMA_NLDEV_ATTR_PORT_STATE' ],
[ 'HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX', 'rdma/rdma_netlink.h',
'RDMA_NLDEV_ATTR_NDEV_INDEX' ],
[ 'HAVE_MLX5_DR_FLOW_DUMP', 'infiniband/mlx5dv.h',
diff --git a/drivers/common/mlx5/linux/mlx5_nl.c b/drivers/common/mlx5/linux/mlx5_nl.c
index 530d491b660..fd4c2d26253 100644
--- a/drivers/common/mlx5/linux/mlx5_nl.c
+++ b/drivers/common/mlx5/linux/mlx5_nl.c
@@ -78,6 +78,9 @@
#ifndef HAVE_RDMA_NLDEV_ATTR_PORT_INDEX
#define RDMA_NLDEV_ATTR_PORT_INDEX 3
#endif
+#ifndef HAVE_RDMA_NLDEV_ATTR_PORT_STATE
+#define RDMA_NLDEV_ATTR_PORT_STATE 12
+#endif
#ifndef HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX
#define RDMA_NLDEV_ATTR_NDEV_INDEX 50
#endif
@@ -160,14 +163,16 @@ struct mlx5_nl_mac_addr {
#define MLX5_NL_CMD_GET_IB_INDEX (1 << 1)
#define MLX5_NL_CMD_GET_NET_INDEX (1 << 2)
#define MLX5_NL_CMD_GET_PORT_INDEX (1 << 3)
+#define MLX5_NL_CMD_GET_PORT_STATE (1 << 4)
/** Data structure used by mlx5_nl_cmdget_cb(). */
-struct mlx5_nl_ifindex_data {
+struct mlx5_nl_port_info {
const char *name; /**< IB device name (in). */
uint32_t flags; /**< found attribute flags (out). */
uint32_t ibindex; /**< IB device index (out). */
uint32_t ifindex; /**< Network interface index (out). */
uint32_t portnum; /**< IB device max port number (out). */
+ uint16_t state; /**< IB device port state (out). */
};
uint32_t atomic_sn;
@@ -966,8 +971,8 @@ mlx5_nl_allmulti(int nlsk_fd, unsigned int iface_idx, int enable)
static int
mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg)
{
- struct mlx5_nl_ifindex_data *data = arg;
- struct mlx5_nl_ifindex_data local = {
+ struct mlx5_nl_port_info *data = arg;
+ struct mlx5_nl_port_info local = {
.flags = 0,
};
size_t off = NLMSG_HDRLEN;
@@ -1000,6 +1005,10 @@ mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg)
local.portnum = *(uint32_t *)payload;
local.flags |= MLX5_NL_CMD_GET_PORT_INDEX;
break;
+ case RDMA_NLDEV_ATTR_PORT_STATE:
+ local.state = *(uint8_t *)payload;
+ local.flags |= MLX5_NL_CMD_GET_PORT_STATE;
+ break;
default:
break;
}
@@ -1016,6 +1025,7 @@ mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg)
data->ibindex = local.ibindex;
data->ifindex = local.ifindex;
data->portnum = local.portnum;
+ data->state = local.state;
}
return 0;
error:
@@ -1024,7 +1034,7 @@ mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg)
}
/**
- * Get index of network interface associated with some IB device.
+ * Get port info of network interface associated with some IB device.
*
* This is the only somewhat safe method to avoid resorting to heuristics
* when faced with port representors. Unfortunately it requires at least
@@ -1032,27 +1042,20 @@ mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg)
*
* @param nl
* Netlink socket of the RDMA kind (NETLINK_RDMA).
- * @param[in] name
- * IB device name.
* @param[in] pindex
* IB device port index, starting from 1
+ * @param[out] data
+ * Pointer to port info.
* @return
- * A valid (nonzero) interface index on success, 0 otherwise and rte_errno
- * is set.
+ * 0 on success, negative on error and rte_errno is set.
*/
-unsigned int
-mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex)
+static int
+mlx5_nl_port_info(int nl, uint32_t pindex, struct mlx5_nl_port_info *data)
{
- struct mlx5_nl_ifindex_data data = {
- .name = name,
- .flags = 0,
- .ibindex = 0, /* Determined during first pass. */
- .ifindex = 0, /* Determined during second pass. */
- };
union {
struct nlmsghdr nh;
uint8_t buf[NLMSG_HDRLEN +
- NLA_HDRLEN + NLA_ALIGN(sizeof(data.ibindex)) +
+ NLA_HDRLEN + NLA_ALIGN(sizeof(data->ibindex)) +
NLA_HDRLEN + NLA_ALIGN(sizeof(pindex))];
} req = {
.nh = {
@@ -1068,24 +1071,24 @@ mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex)
ret = mlx5_nl_send(nl, &req.nh, sn);
if (ret < 0)
- return 0;
- ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
+ return ret;
+ ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, data);
if (ret < 0)
- return 0;
- if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
- !(data.flags & MLX5_NL_CMD_GET_IB_INDEX))
+ return ret;
+ if (!(data->flags & MLX5_NL_CMD_GET_IB_NAME) ||
+ !(data->flags & MLX5_NL_CMD_GET_IB_INDEX))
goto error;
- data.flags = 0;
+ data->flags = 0;
sn = MLX5_NL_SN_GENERATE;
req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
RDMA_NLDEV_CMD_PORT_GET);
req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.buf) - NLMSG_HDRLEN);
na = (void *)((uintptr_t)req.buf + NLMSG_HDRLEN);
- na->nla_len = NLA_HDRLEN + sizeof(data.ibindex);
+ na->nla_len = NLA_HDRLEN + sizeof(data->ibindex);
na->nla_type = RDMA_NLDEV_ATTR_DEV_INDEX;
memcpy((void *)((uintptr_t)na + NLA_HDRLEN),
- &data.ibindex, sizeof(data.ibindex));
+ &data->ibindex, sizeof(data->ibindex));
na = (void *)((uintptr_t)na + NLA_ALIGN(na->nla_len));
na->nla_len = NLA_HDRLEN + sizeof(pindex);
na->nla_type = RDMA_NLDEV_ATTR_PORT_INDEX;
@@ -1093,19 +1096,82 @@ mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex)
&pindex, sizeof(pindex));
ret = mlx5_nl_send(nl, &req.nh, sn);
if (ret < 0)
- return 0;
- ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, &data);
+ return ret;
+ ret = mlx5_nl_recv(nl, sn, mlx5_nl_cmdget_cb, data);
if (ret < 0)
- return 0;
- if (!(data.flags & MLX5_NL_CMD_GET_IB_NAME) ||
- !(data.flags & MLX5_NL_CMD_GET_IB_INDEX) ||
- !(data.flags & MLX5_NL_CMD_GET_NET_INDEX) ||
- !data.ifindex)
+ return ret;
+ if (!(data->flags & MLX5_NL_CMD_GET_IB_NAME) ||
+ !(data->flags & MLX5_NL_CMD_GET_IB_INDEX) ||
+ !(data->flags & MLX5_NL_CMD_GET_NET_INDEX) ||
+ !data->ifindex)
goto error;
- return data.ifindex;
+ return 1;
error:
rte_errno = ENODEV;
- return 0;
+ return -rte_errno;
+}
+
+/**
+ * Get index of network interface associated with some IB device.
+ *
+ * This is the only somewhat safe method to avoid resorting to heuristics
+ * when faced with port representors. Unfortunately it requires at least
+ * Linux 4.17.
+ *
+ * @param nl
+ * Netlink socket of the RDMA kind (NETLINK_RDMA).
+ * @param[in] name
+ * IB device name.
+ * @param[in] pindex
+ * IB device port index, starting from 1
+ * @return
+ * A valid (nonzero) interface index on success, 0 otherwise and rte_errno
+ * is set.
+ */
+unsigned int
+mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex)
+{
+ struct mlx5_nl_port_info data = {
+ .ifindex = 0,
+ .name = name,
+ };
+
+ if (mlx5_nl_port_info(nl, pindex, &data) < 0)
+ return 0;
+ return data.ifindex;
+}
+
+/**
+ * Get IB device port state.
+ *
+ * This is the only somewhat safe method to get info for port number >= 255.
+ * Unfortunately it requires at least Linux 4.17.
+ *
+ * @param nl
+ * Netlink socket of the RDMA kind (NETLINK_RDMA).
+ * @param[in] name
+ * IB device name.
+ * @param[in] pindex
+ * IB device port index, starting from 1
+ * @return
+ * Port state (ibv_port_state) on success, negative on error
+ * and rte_errno is set.
+ */
+int
+mlx5_nl_port_state(int nl, const char *name, uint32_t pindex)
+{
+ struct mlx5_nl_port_info data = {
+ .state = 0,
+ .name = name,
+ };
+
+ if (mlx5_nl_port_info(nl, pindex, &data) < 0)
+ return -rte_errno;
+ if ((data.flags & MLX5_NL_CMD_GET_PORT_STATE) == 0) {
+ rte_errno = ENOTSUP;
+ return -rte_errno;
+ }
+ return (int)data.state;
}
/**
@@ -1123,7 +1189,7 @@ mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex)
unsigned int
mlx5_nl_portnum(int nl, const char *name)
{
- struct mlx5_nl_ifindex_data data = {
+ struct mlx5_nl_port_info data = {
.flags = 0,
.name = name,
.ifindex = 0,
diff --git a/drivers/common/mlx5/linux/mlx5_nl.h b/drivers/common/mlx5/linux/mlx5_nl.h
index 202849f52ad..2063c0deeb9 100644
--- a/drivers/common/mlx5/linux/mlx5_nl.h
+++ b/drivers/common/mlx5/linux/mlx5_nl.h
@@ -54,6 +54,8 @@ unsigned int mlx5_nl_portnum(int nl, const char *name);
__rte_internal
unsigned int mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex);
__rte_internal
+int mlx5_nl_port_state(int nl, const char *name, uint32_t pindex);
+__rte_internal
int mlx5_nl_vf_mac_addr_modify(int nlsk_fd, unsigned int iface_idx,
struct rte_ether_addr *mac, int vf_index);
__rte_internal
diff --git a/drivers/common/mlx5/version.map b/drivers/common/mlx5/version.map
index 1167fcd3236..7c95172fe87 100644
--- a/drivers/common/mlx5/version.map
+++ b/drivers/common/mlx5/version.map
@@ -120,6 +120,7 @@ INTERNAL {
mlx5_nl_mac_addr_flush; # WINDOWS_NO_EXPORT
mlx5_nl_mac_addr_remove; # WINDOWS_NO_EXPORT
mlx5_nl_mac_addr_sync; # WINDOWS_NO_EXPORT
+ mlx5_nl_port_state; # WINDOWS_NO_EXPORT
mlx5_nl_portnum; # WINDOWS_NO_EXPORT
mlx5_nl_promisc; # WINDOWS_NO_EXPORT
mlx5_nl_switch_info; # WINDOWS_NO_EXPORT
--
2.33.0
next prev parent reply other threads:[~2021-10-22 9:12 UTC|newest]
Thread overview: 47+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-09-27 8:32 [dpdk-dev] [PATCH 0/8] net/mlx5: support more than 255 representors Xueming Li
2021-09-27 8:32 ` [dpdk-dev] [PATCH 1/8] common/mlx5: add netlink API to get RDMA port state Xueming Li
2021-09-27 8:32 ` [dpdk-dev] [PATCH 2/8] net/mlx5: use netlink when IB port greater than 255 Xueming Li
2021-09-27 8:32 ` [dpdk-dev] [PATCH 3/8] net/mlx5: improve Verbs flow priority discover for scalable Xueming Li
2021-09-27 8:32 ` [dpdk-dev] [PATCH 4/8] net/mlx5: check DevX to support more Verb ports Xueming Li
2021-09-27 8:32 ` [dpdk-dev] [PATCH 5/8] net/mlx5: support flow item port of switch manager Xueming Li
2021-09-27 8:32 ` [dpdk-dev] [PATCH 6/8] net/mlx5: supports flow item of normal Tx queue Xueming Li
2021-09-27 8:32 ` [dpdk-dev] [PATCH 7/8] net/mlx5: fix internal root table flow priroity Xueming Li
2021-09-27 8:32 ` [dpdk-dev] [PATCH 8/8] net/mlx5: enable DevX Tx queue creation Xueming Li
2021-10-16 8:07 ` [dpdk-dev] [PATCH v2 0/8] net/mlx5: support more than 255 representors Xueming Li
2021-10-16 8:07 ` [dpdk-dev] [PATCH v2 1/8] common/mlx5: add netlink API to get RDMA port state Xueming Li
2021-10-19 8:23 ` Slava Ovsiienko
2021-10-16 8:07 ` [dpdk-dev] [PATCH v2 2/8] net/mlx5: use netlink when IB port greater than 255 Xueming Li
2021-10-19 8:24 ` Slava Ovsiienko
2021-10-16 8:07 ` [dpdk-dev] [PATCH v2 3/8] net/mlx5: improve Verbs flow priority discover for scalable Xueming Li
2021-10-19 8:26 ` Slava Ovsiienko
2021-10-16 8:07 ` [dpdk-dev] [PATCH v2 4/8] net/mlx5: support E-Switch manager egress traffic match Xueming Li
2021-10-19 8:26 ` Slava Ovsiienko
2021-10-16 8:07 ` [dpdk-dev] [PATCH v2 5/8] net/mlx5: supports flow item of normal Tx queue Xueming Li
2021-10-19 8:27 ` Slava Ovsiienko
2021-10-16 8:07 ` [dpdk-dev] [PATCH v2 6/8] net/mlx5: fix internal root table flow priroity Xueming Li
2021-10-19 8:28 ` Slava Ovsiienko
2021-10-16 8:07 ` [dpdk-dev] [PATCH v2 7/8] net/mlx5: enable DevX Tx queue creation Xueming Li
2021-10-19 8:29 ` Slava Ovsiienko
2021-10-16 8:07 ` [dpdk-dev] [PATCH v2 8/8] net/mlx5: check DevX to support more Verbs ports Xueming Li
2021-10-19 8:30 ` Slava Ovsiienko
2021-10-19 10:34 ` [dpdk-dev] [PATCH v3 0/8] net/mlx5: support more than 255 representors Xueming Li
2021-10-19 10:34 ` [dpdk-dev] [PATCH v3 1/8] common/mlx5: add netlink API to get RDMA port state Xueming Li
2021-10-21 13:34 ` Ferruh Yigit
2021-10-19 10:34 ` [dpdk-dev] [PATCH v3 2/8] net/mlx5: use netlink when IB port greater than 255 Xueming Li
2021-10-19 10:34 ` [dpdk-dev] [PATCH v3 3/8] net/mlx5: improve Verbs flow priority discover for scalable Xueming Li
2021-10-19 10:34 ` [dpdk-dev] [PATCH v3 4/8] net/mlx5: support E-Switch manager egress traffic match Xueming Li
2021-10-19 10:34 ` [dpdk-dev] [PATCH v3 5/8] net/mlx5: supports flow item of normal Tx queue Xueming Li
2021-10-19 10:34 ` [dpdk-dev] [PATCH v3 6/8] net/mlx5: fix internal root table flow priroity Xueming Li
2021-10-19 10:35 ` [dpdk-dev] [PATCH v3 7/8] net/mlx5: enable DevX Tx queue creation Xueming Li
2021-10-19 10:35 ` [dpdk-dev] [PATCH v3 8/8] net/mlx5: check DevX to support more Verbs ports Xueming Li
2021-10-20 13:40 ` [dpdk-dev] [PATCH v3 0/8] net/mlx5: support more than 255 representors Raslan Darawsheh
2021-10-20 16:00 ` Xueming(Steven) Li
2021-10-22 9:11 ` [dpdk-dev] [PATCH v4 " Xueming Li
2021-10-22 9:11 ` Xueming Li [this message]
2021-10-22 9:11 ` [dpdk-dev] [PATCH v4 2/8] net/mlx5: use netlink when IB port greater than 255 Xueming Li
2021-10-22 9:11 ` [dpdk-dev] [PATCH v4 3/8] net/mlx5: improve Verbs flow priority discover for scalable Xueming Li
2021-10-22 9:11 ` [dpdk-dev] [PATCH v4 4/8] net/mlx5: support E-Switch manager egress traffic match Xueming Li
2021-10-22 9:11 ` [dpdk-dev] [PATCH v4 5/8] net/mlx5: supports flow item of normal Tx queue Xueming Li
2021-10-22 9:11 ` [dpdk-dev] [PATCH v4 6/8] net/mlx5: fix internal root table flow priroity Xueming Li
2021-10-22 9:11 ` [dpdk-dev] [PATCH v4 7/8] net/mlx5: enable DevX Tx queue creation Xueming Li
2021-10-22 9:11 ` [dpdk-dev] [PATCH v4 8/8] net/mlx5: check DevX to support more Verbs ports Xueming Li
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20211022091142.51397-2-xuemingl@nvidia.com \
--to=xuemingl@nvidia.com \
--cc=dev@dpdk.org \
--cc=lmargalit@nvidia.com \
--cc=matan@nvidia.com \
--cc=mdr@ashroe.eu \
--cc=viacheslavo@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).