DPDK patches and discussions
 help / color / mirror / Atom feed
From: Adrien Mazarguil <adrien.mazarguil@6wind.com>
To: Shahaf Shuler <shahafs@mellanox.com>
Cc: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>,
	Yongseok Koh <yskoh@mellanox.com>,
	dev@dpdk.org
Subject: [dpdk-dev] [PATCH 1/6] net/mlx5: lay groundwork for switch offloads
Date: Wed, 27 Jun 2018 20:08:10 +0200	[thread overview]
Message-ID: <20180627173355.4718-2-adrien.mazarguil@6wind.com> (raw)
In-Reply-To: <20180627173355.4718-1-adrien.mazarguil@6wind.com>

With mlx5, unlike normal flow rules implemented through Verbs for traffic
emitted and received by the application, those targeting different logical
ports of the device (VF representors for instance) are offloaded at the
switch level and must be configured through Netlink (TC interface).

This patch adds preliminary support to manage such flow rules through the
flow API (rte_flow).

Instead of rewriting tons of Netlink helpers and as previously suggested by
Stephen [1], this patch introduces a new dependency to libmnl [2]
(LGPL-2.1) when compiling mlx5.

[1] https://mails.dpdk.org/archives/dev/2018-March/092676.html
[2] https://netfilter.org/projects/libmnl/

Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
---
 drivers/net/mlx5/Makefile       |   2 +
 drivers/net/mlx5/mlx5.c         |  32 ++++++++
 drivers/net/mlx5/mlx5.h         |  10 +++
 drivers/net/mlx5/mlx5_nl_flow.c | 139 +++++++++++++++++++++++++++++++++++
 mk/rte.app.mk                   |   2 +-
 5 files changed, 184 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 8a5229e61..3325eed06 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -33,6 +33,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl_flow.c
 
 ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y)
 INSTALL-$(CONFIG_RTE_LIBRTE_MLX5_PMD)-lib += $(LIB_GLUE)
@@ -56,6 +57,7 @@ LDLIBS += -ldl
 else
 LDLIBS += -libverbs -lmlx5
 endif
+LDLIBS += -lmnl
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
 LDLIBS += -lrte_bus_pci
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 665a3c31f..d9b9097b1 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -279,6 +279,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 		mlx5_nl_mac_addr_flush(dev);
 	if (priv->nl_socket >= 0)
 		close(priv->nl_socket);
+	if (priv->mnl_socket)
+		mlx5_nl_flow_socket_destroy(priv->mnl_socket);
 	ret = mlx5_hrxq_ibv_verify(dev);
 	if (ret)
 		DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
@@ -1077,6 +1079,34 @@ mlx5_dev_spawn_one(struct rte_device *dpdk_dev,
 			priv->nl_socket = -1;
 		mlx5_nl_mac_addr_sync(eth_dev);
 	}
+	priv->mnl_socket = mlx5_nl_flow_socket_create();
+	if (!priv->mnl_socket) {
+		err = -rte_errno;
+		DRV_LOG(WARNING,
+			"flow rules relying on switch offloads will not be"
+			" supported: cannot open libmnl socket: %s",
+			strerror(rte_errno));
+	} else {
+		struct rte_flow_error error;
+		unsigned int ifindex = mlx5_ifindex(eth_dev);
+
+		if (!ifindex) {
+			err = -rte_errno;
+			error.message =
+				"cannot retrieve network interface index";
+		} else {
+			err = mlx5_nl_flow_init(priv->mnl_socket, ifindex,
+						&error);
+		}
+		if (err) {
+			DRV_LOG(WARNING,
+				"flow rules relying on switch offloads will"
+				" not be supported: %s: %s",
+				error.message, strerror(rte_errno));
+			mlx5_nl_flow_socket_destroy(priv->mnl_socket);
+			priv->mnl_socket = NULL;
+		}
+	}
 	TAILQ_INIT(&priv->flows);
 	TAILQ_INIT(&priv->ctrl_flows);
 	/* Hint libmlx5 to use PMD allocator for data plane resources */
@@ -1127,6 +1157,8 @@ mlx5_dev_spawn_one(struct rte_device *dpdk_dev,
 	if (priv) {
 		unsigned int i;
 
+		if (priv->mnl_socket)
+			mlx5_nl_flow_socket_destroy(priv->mnl_socket);
 		i = mlx5_domain_to_port_id(priv->domain_id, NULL, 0);
 		if (i == 1)
 			claim_zero(rte_eth_switch_domain_free(priv->domain_id));
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 1d8e156c8..390249adb 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -148,6 +148,8 @@ struct mlx5_drop {
 	struct mlx5_rxq_ibv *rxq; /* Verbs Rx queue. */
 };
 
+struct mnl_socket;
+
 struct priv {
 	LIST_ENTRY(priv) mem_event_cb; /* Called by memory event callback. */
 	struct rte_eth_dev_data *dev_data;  /* Pointer to device data. */
@@ -207,6 +209,7 @@ struct priv {
 	/* Context for Verbs allocator. */
 	int nl_socket; /* Netlink socket. */
 	uint32_t nl_sn; /* Netlink message sequence number. */
+	struct mnl_socket *mnl_socket; /* Libmnl socket. */
 };
 
 #define PORT_ID(priv) ((priv)->dev_data->port_id)
@@ -369,4 +372,11 @@ void mlx5_nl_mac_addr_flush(struct rte_eth_dev *dev);
 int mlx5_nl_promisc(struct rte_eth_dev *dev, int enable);
 int mlx5_nl_allmulti(struct rte_eth_dev *dev, int enable);
 
+/* mlx5_nl_flow.c */
+
+int mlx5_nl_flow_init(struct mnl_socket *nl, unsigned int ifindex,
+		      struct rte_flow_error *error);
+struct mnl_socket *mlx5_nl_flow_socket_create(void);
+void mlx5_nl_flow_socket_destroy(struct mnl_socket *nl);
+
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_nl_flow.c b/drivers/net/mlx5/mlx5_nl_flow.c
new file mode 100644
index 000000000..7a8683b03
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_nl_flow.c
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 6WIND S.A.
+ * Copyright 2018 Mellanox Technologies, Ltd
+ */
+
+#include <errno.h>
+#include <libmnl/libmnl.h>
+#include <linux/netlink.h>
+#include <linux/pkt_sched.h>
+#include <linux/rtnetlink.h>
+#include <stdalign.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+
+#include <rte_errno.h>
+#include <rte_flow.h>
+
+#include "mlx5.h"
+
+/**
+ * Send Netlink message with acknowledgment.
+ *
+ * @param nl
+ *   Libmnl socket to use.
+ * @param nlh
+ *   Message to send. This function always raises the NLM_F_ACK flag before
+ *   sending.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_nl_flow_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
+{
+	alignas(struct nlmsghdr)
+	uint8_t ans[MNL_SOCKET_BUFFER_SIZE];
+	uint32_t seq = random();
+	int ret;
+
+	nlh->nlmsg_flags |= NLM_F_ACK;
+	nlh->nlmsg_seq = seq;
+	ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
+	if (ret != -1)
+		ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
+	if (ret != -1)
+		ret = mnl_cb_run
+			(ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
+	if (!ret)
+		return 0;
+	rte_errno = errno;
+	return -rte_errno;
+}
+
+/**
+ * Initialize ingress qdisc of a given network interface.
+ *
+ * @param nl
+ *   Libmnl socket of the @p NETLINK_ROUTE kind.
+ * @param ifindex
+ *   Index of network interface to initialize.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_nl_flow_init(struct mnl_socket *nl, unsigned int ifindex,
+		  struct rte_flow_error *error)
+{
+	uint8_t buf[MNL_SOCKET_BUFFER_SIZE];
+	struct nlmsghdr *nlh;
+	struct tcmsg *tcm;
+
+	/* Destroy existing ingress qdisc and everything attached to it. */
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type = RTM_DELQDISC;
+	nlh->nlmsg_flags = NLM_F_REQUEST;
+	tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm_ifindex = ifindex;
+	tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
+	tcm->tcm_parent = TC_H_INGRESS;
+	/* Ignore errors when qdisc is already absent. */
+	if (mlx5_nl_flow_nl_ack(nl, nlh) &&
+	    rte_errno != EINVAL && rte_errno != ENOENT)
+		return rte_flow_error_set
+			(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			 NULL, "netlink: failed to remove ingress qdisc");
+	/* Create fresh ingress qdisc. */
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type = RTM_NEWQDISC;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+	tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm_ifindex = ifindex;
+	tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
+	tcm->tcm_parent = TC_H_INGRESS;
+	mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
+	if (mlx5_nl_flow_nl_ack(nl, nlh))
+		return rte_flow_error_set
+			(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			 NULL, "netlink: failed to create ingress qdisc");
+	return 0;
+}
+
+/**
+ * Create and configure a libmnl socket for Netlink flow rules.
+ *
+ * @return
+ *   A valid libmnl socket object pointer on success, NULL otherwise and
+ *   rte_errno is set.
+ */
+struct mnl_socket *
+mlx5_nl_flow_socket_create(void)
+{
+	struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
+
+	if (nl &&
+	    !mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
+				   sizeof(int)) &&
+	    !mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
+		return nl;
+	rte_errno = errno;
+	if (nl)
+		mnl_socket_close(nl);
+	return NULL;
+}
+
+/**
+ * Destroy a libmnl socket.
+ */
+void
+mlx5_nl_flow_socket_destroy(struct mnl_socket *nl)
+{
+	mnl_socket_close(nl);
+}
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 7bcf6308d..414f1b967 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -145,7 +145,7 @@ endif
 ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD)       += -lrte_pmd_mlx5 -ldl
 else
-_LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD)       += -lrte_pmd_mlx5 -libverbs -lmlx5
+_LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD)       += -lrte_pmd_mlx5 -libverbs -lmlx5 -lmnl
 endif
 _LDLIBS-$(CONFIG_RTE_LIBRTE_MVPP2_PMD)      += -lrte_pmd_mvpp2 -L$(LIBMUSDK_PATH)/lib -lmusdk
 _LDLIBS-$(CONFIG_RTE_LIBRTE_NFP_PMD)        += -lrte_pmd_nfp
-- 
2.11.0

  reply	other threads:[~2018-06-27 18:08 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-27 18:08 [dpdk-dev] [PATCH 0/6] net/mlx5: add support for switch flow rules Adrien Mazarguil
2018-06-27 18:08 ` Adrien Mazarguil [this message]
2018-07-12  0:17   ` [dpdk-dev] [PATCH 1/6] net/mlx5: lay groundwork for switch offloads Yongseok Koh
2018-07-12 10:46     ` Adrien Mazarguil
2018-07-12 17:33       ` Yongseok Koh
2018-06-27 18:08 ` [dpdk-dev] [PATCH 2/6] net/mlx5: add framework for switch flow rules Adrien Mazarguil
2018-07-12  0:59   ` Yongseok Koh
2018-07-12 10:46     ` Adrien Mazarguil
2018-07-12 18:25       ` Yongseok Koh
2018-06-27 18:08 ` [dpdk-dev] [PATCH 3/6] net/mlx5: add fate actions to " Adrien Mazarguil
2018-07-12  1:00   ` Yongseok Koh
2018-06-27 18:08 ` [dpdk-dev] [PATCH 4/6] net/mlx5: add L2-L4 pattern items " Adrien Mazarguil
2018-07-12  1:02   ` Yongseok Koh
2018-06-27 18:08 ` [dpdk-dev] [PATCH 5/6] net/mlx5: add VLAN item and actions " Adrien Mazarguil
2018-07-12  1:10   ` Yongseok Koh
2018-07-12 10:47     ` Adrien Mazarguil
2018-07-12 18:49       ` Yongseok Koh
2018-06-27 18:08 ` [dpdk-dev] [PATCH 6/6] net/mlx5: add port ID pattern item " Adrien Mazarguil
2018-07-12  1:13   ` Yongseok Koh
2018-06-28  9:05 ` [dpdk-dev] [PATCH 0/6] net/mlx5: add support for " Nélio Laranjeiro
2018-07-13  9:40 ` [dpdk-dev] [PATCH v2 " Adrien Mazarguil
2018-07-13  9:40   ` [dpdk-dev] [PATCH v2 1/6] net/mlx5: lay groundwork for switch offloads Adrien Mazarguil
2018-07-14  1:29     ` Yongseok Koh
2018-07-23 21:40     ` Ferruh Yigit
2018-07-24  0:50       ` Stephen Hemminger
2018-07-24  4:35         ` Shahaf Shuler
2018-07-24 19:33           ` Stephen Hemminger
2018-07-13  9:40   ` [dpdk-dev] [PATCH v2 2/6] net/mlx5: add framework for switch flow rules Adrien Mazarguil
2018-07-13  9:40   ` [dpdk-dev] [PATCH v2 3/6] net/mlx5: add fate actions to " Adrien Mazarguil
2018-07-13  9:40   ` [dpdk-dev] [PATCH v2 4/6] net/mlx5: add L2-L4 pattern items " Adrien Mazarguil
2018-07-13  9:40   ` [dpdk-dev] [PATCH v2 5/6] net/mlx5: add VLAN item and actions " Adrien Mazarguil
2018-07-13  9:40   ` [dpdk-dev] [PATCH v2 6/6] net/mlx5: add port ID pattern item " Adrien Mazarguil
2018-07-22 11:21   ` [dpdk-dev] [PATCH v2 0/6] net/mlx5: add support for " Shahaf Shuler

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180627173355.4718-2-adrien.mazarguil@6wind.com \
    --to=adrien.mazarguil@6wind.com \
    --cc=dev@dpdk.org \
    --cc=nelio.laranjeiro@6wind.com \
    --cc=shahafs@mellanox.com \
    --cc=yskoh@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).