DPDK patches and discussions
 help / color / mirror / Atom feed
From: Qi Zhang <qi.z.zhang@intel.com>
To: dev@dpdk.org
Cc: magnus.karlsson@intel.com, bjorn.topel@intel.com,
	Qi Zhang <qi.z.zhang@intel.com>
Subject: [dpdk-dev] [RFC v2 6/7] net/af_xdp: load BPF file
Date: Thu,  8 Mar 2018 21:52:48 +0800	[thread overview]
Message-ID: <20180308135249.28187-7-qi.z.zhang@intel.com> (raw)
In-Reply-To: <20180308135249.28187-1-qi.z.zhang@intel.com>

Add libbpf and libelf dependency in Makefile.
Durring initialization, a bpf prog which call imm "xdpsk_redirect"
will be loaded. Then the driver will always try to link XDP fd with
DRV mode first, then SKB mode if failed in previoius. Link will be
released during dev_close.

Note: this is workaround solution, af_xdp may remove BPF dependency
in future.

Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
---
 drivers/net/af_xdp/Makefile         |   5 +-
 drivers/net/af_xdp/bpf_load.c       | 168 ++++++++++++++++++++++++++++++++++++
 drivers/net/af_xdp/bpf_load.h       |  11 +++
 drivers/net/af_xdp/rte_eth_af_xdp.c |  80 ++++++++++++++---
 mk/rte.app.mk                       |   2 +-
 5 files changed, 254 insertions(+), 12 deletions(-)
 create mode 100644 drivers/net/af_xdp/bpf_load.c
 create mode 100644 drivers/net/af_xdp/bpf_load.h

diff --git a/drivers/net/af_xdp/Makefile b/drivers/net/af_xdp/Makefile
index 990073655..f16b5306b 100644
--- a/drivers/net/af_xdp/Makefile
+++ b/drivers/net/af_xdp/Makefile
@@ -12,7 +12,9 @@ EXPORT_MAP := rte_pmd_af_xdp_version.map
 
 LIBABIVER := 1
 
-CFLAGS += -O3 -I/opt/af_xdp/linux_headers/include
+LINUX_HEADER_DIR := /opt/af_xdp/linux_headers/include
+
+CFLAGS += -O3 -I$(LINUX_HEADER_DIR)
 CFLAGS += $(WERROR_FLAGS)
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
@@ -22,5 +24,6 @@ LDLIBS += -lrte_bus_vdev
 # all source are stored in SRCS-y
 #
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP) += rte_eth_af_xdp.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP) += bpf_load.c
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/af_xdp/bpf_load.c b/drivers/net/af_xdp/bpf_load.c
new file mode 100644
index 000000000..255e67187
--- /dev/null
+++ b/drivers/net/af_xdp/bpf_load.c
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/types.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <poll.h>
+#include <ctype.h>
+#include <assert.h>
+#include "bpf_load.h"
+
+static char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+struct bpf_insn prog[] = {
+	{
+		.code = 0x85, //call imm
+		.dst_reg = 0,
+		.src_reg = 0,
+		.off = 0,
+		.imm = BPF_FUNC_xdpsk_redirect,
+	},
+	{
+		.code = 0x95, //exit
+		.dst_reg = 0,
+		.src_reg = 0,
+		.off = 0,
+		.imm = 0,
+	},
+};
+
+int load_bpf_file(void)
+{
+	int fd;
+
+	fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog,
+			      ARRAY_SIZE(prog),
+			      "GPL", 0,
+			      bpf_log_buf, BPF_LOG_BUF_SIZE);
+
+	if (fd < 0) {
+		printf("bpf_load_program() err=%d\n%s", errno, bpf_log_buf);
+		return -1;
+	}
+
+	return fd;
+}
+
+int set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+{
+	struct sockaddr_nl sa;
+	int sock, len, ret = -1;
+	uint32_t seq = 0;
+	char buf[4096];
+	struct nlattr *nla, *nla_xdp;
+	struct {
+		struct nlmsghdr  nh;
+		struct ifinfomsg ifinfo;
+		char             attrbuf[64];
+	} req;
+	struct nlmsghdr *nh;
+	struct nlmsgerr *err;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.nl_family = AF_NETLINK;
+
+	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	if (sock < 0) {
+		printf("open netlink socket: %s\n", strerror(errno));
+		return -1;
+	}
+
+	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+		printf("bind to netlink: %s\n", strerror(errno));
+		goto cleanup;
+	}
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_type = RTM_SETLINK;
+	req.nh.nlmsg_pid = 0;
+	req.nh.nlmsg_seq = ++seq;
+	req.ifinfo.ifi_family = AF_UNSPEC;
+	req.ifinfo.ifi_index = ifindex;
+
+	/* started nested attribute for XDP */
+	nla = (struct nlattr *)(((char *)&req)
+				+ NLMSG_ALIGN(req.nh.nlmsg_len));
+	nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
+	nla->nla_len = NLA_HDRLEN;
+
+	/* add XDP fd */
+	nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+	nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
+	nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
+	memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
+	nla->nla_len += nla_xdp->nla_len;
+
+	/* if user passed in any flags, add those too */
+	if (flags) {
+		nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+		nla_xdp->nla_type = 3/*IFLA_XDP_FLAGS*/;
+		nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
+		memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
+		nla->nla_len += nla_xdp->nla_len;
+	}
+
+	req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		printf("send to netlink: %s\n", strerror(errno));
+		goto cleanup;
+	}
+
+	len = recv(sock, buf, sizeof(buf), 0);
+	if (len < 0) {
+		printf("recv from netlink: %s\n", strerror(errno));
+		goto cleanup;
+	}
+
+	for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, (unsigned int)len);
+	     nh = NLMSG_NEXT(nh, len)) {
+		if (nh->nlmsg_pid != (uint32_t)getpid()) {
+			printf("Wrong pid %d, expected %d\n",
+			       nh->nlmsg_pid, getpid());
+			goto cleanup;
+		}
+		if (nh->nlmsg_seq != seq) {
+			printf("Wrong seq %d, expected %d\n",
+			       nh->nlmsg_seq, seq);
+			goto cleanup;
+		}
+		switch (nh->nlmsg_type) {
+		case NLMSG_ERROR:
+			err = (struct nlmsgerr *)NLMSG_DATA(nh);
+			if (!err->error)
+				continue;
+			printf("nlmsg error %s\n", strerror(-err->error));
+			goto cleanup;
+		case NLMSG_DONE:
+			break;
+		}
+	}
+
+	ret = 0;
+
+cleanup:
+	close(sock);
+	return ret;
+}
diff --git a/drivers/net/af_xdp/bpf_load.h b/drivers/net/af_xdp/bpf_load.h
new file mode 100644
index 000000000..2561ede55
--- /dev/null
+++ b/drivers/net/af_xdp/bpf_load.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+#ifndef __BPF_LOAD_H
+#define __BPF_LOAD_H
+
+#include <bpf/bpf.h>
+
+int load_bpf_file(void);
+int set_link_xdp_fd(int ifindex, int fd, __u32 flags);
+#endif
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 7e839f0da..825273c11 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -11,6 +11,7 @@
 
 #include <linux/if_ether.h>
 #include <linux/if_xdp.h>
+#include <linux/if_link.h>
 #include <arpa/inet.h>
 #include <net/if.h>
 #include <sys/types.h>
@@ -20,6 +21,7 @@
 #include <unistd.h>
 #include <poll.h>
 #include "xdpsock_queue.h"
+#include "bpf_load.h"
 
 #ifndef SOL_XDP
 #define SOL_XDP 283
@@ -81,6 +83,9 @@ struct pmd_internals {
 	uint16_t port_id;
 	uint16_t queue_idx;
 	int ring_size;
+
+	uint32_t xdp_flags;
+	int bpf_fd;
 };
 
 static const char * const valid_arguments[] = {
@@ -97,6 +102,39 @@ static struct rte_eth_link pmd_link = {
 	.link_autoneg = ETH_LINK_AUTONEG
 };
 
+static int load_bpf(struct pmd_internals *internals)
+{
+	/* need fix: hard coded bpf file */
+	int fd = load_bpf_file();
+
+	if (fd < 0)
+		return -1;
+
+	internals->bpf_fd = fd;
+	return 0;
+}
+
+static int link_bpf_file(struct pmd_internals *internals)
+{
+	if (!set_link_xdp_fd(internals->if_index,
+			     internals->bpf_fd,
+			     XDP_FLAGS_DRV_MODE))
+		internals->xdp_flags = XDP_FLAGS_DRV_MODE;
+	else if (!set_link_xdp_fd(internals->if_index,
+				  internals->bpf_fd,
+				  XDP_FLAGS_SKB_MODE))
+		internals->xdp_flags = XDP_FLAGS_SKB_MODE;
+	else
+		return -1;
+
+	return 0;
+}
+
+static void unlink_bpf_file(struct pmd_internals *internals)
+{
+	set_link_xdp_fd(internals->if_index, -1, internals->xdp_flags);
+}
+
 static void *get_pkt_data(struct pmd_internals *internals,
 			  uint32_t index,
 			  uint32_t offset)
@@ -380,8 +418,26 @@ eth_stats_reset(struct rte_eth_dev *dev)
 }
 
 static void
-eth_dev_close(struct rte_eth_dev *dev __rte_unused)
+eth_dev_close(struct rte_eth_dev *dev)
 {
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	if (internals->xdp_flags) {
+		unlink_bpf_file(internals);
+		internals->xdp_flags = 0;
+	}
+
+	if (internals->umem) {
+		if (internals->umem->mb_pool && !internals->share_mb_pool)
+			rte_mempool_free(internals->umem->mb_pool);
+		free(internals->umem);
+		internals->umem = NULL;
+	}
+
+	if (internals->sfd != -1) {
+		close(internals->sfd);
+		internals->sfd = -1;
+	}
 }
 
 static void
@@ -743,9 +799,17 @@ init_internals(struct rte_vdev_device *dev,
 	if (ret)
 		goto error_3;
 
+	if (load_bpf(internals)) {
+		printf("load bpf file failed\n");
+		goto error_3;
+	}
+
+	if (link_bpf_file(internals))
+		goto error_3;
+
 	eth_dev = rte_eth_vdev_allocate(dev, 0);
 	if (!eth_dev)
-		goto error_3;
+		goto error_4;
 
 	rte_memcpy(data, eth_dev->data, sizeof(*data));
 	internals->port_id = eth_dev->data->port_id;
@@ -763,6 +827,9 @@ init_internals(struct rte_vdev_device *dev,
 
 	return 0;
 
+error_4:
+	unlink_bpf_file(internals);
+
 error_3:
 	close(internals->sfd);
 
@@ -808,7 +875,6 @@ static int
 rte_pmd_af_xdp_remove(struct rte_vdev_device *dev)
 {
 	struct rte_eth_dev *eth_dev = NULL;
-	struct pmd_internals *internals;
 
 	RTE_LOG(INFO, PMD, "Closing AF_XDP ethdev on numa socket %u\n",
 		rte_socket_id());
@@ -821,15 +887,9 @@ rte_pmd_af_xdp_remove(struct rte_vdev_device *dev)
 	if (!eth_dev)
 		return -1;
 
-	internals = eth_dev->data->dev_private;
-	if (internals->umem) {
-		if (internals->umem->mb_pool && !internals->share_mb_pool)
-			rte_mempool_free(internals->umem->mb_pool);
-		rte_free(internals->umem);
-	}
+	eth_dev_close(eth_dev);
 	rte_free(eth_dev->data->dev_private);
 	rte_free(eth_dev->data);
-	close(internals->sfd);
 
 	rte_eth_dev_release_port(eth_dev);
 
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index bc26e1457..d05e6c0e4 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -120,7 +120,7 @@ ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),n)
 _LDLIBS-$(CONFIG_RTE_DRIVER_MEMPOOL_STACK)  += -lrte_mempool_stack
 
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AF_PACKET)  += -lrte_pmd_af_packet
-_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP)     += -lrte_pmd_af_xdp
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP)     += -lrte_pmd_af_xdp -lelf -lbpf
 _LDLIBS-$(CONFIG_RTE_LIBRTE_ARK_PMD)        += -lrte_pmd_ark
 _LDLIBS-$(CONFIG_RTE_LIBRTE_AVF_PMD)        += -lrte_pmd_avf
 _LDLIBS-$(CONFIG_RTE_LIBRTE_AVP_PMD)        += -lrte_pmd_avp
-- 
2.13.6

  parent reply	other threads:[~2018-03-08 13:52 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-08 13:52 [dpdk-dev] [RFC v2 0/7] PMD driver for AF_XDP Qi Zhang
2018-03-08 13:52 ` [dpdk-dev] [RFC v2 1/7] net/af_xdp: new PMD driver Qi Zhang
2018-03-08 13:52 ` [dpdk-dev] [RFC v2 2/7] lib/mbuf: enable parse flags when create mempool Qi Zhang
2018-03-08 13:52 ` [dpdk-dev] [RFC v2 3/7] lib/mempool: allow page size aligned mempool Qi Zhang
2018-03-08 13:52 ` [dpdk-dev] [RFC v2 4/7] net/af_xdp: use mbuf mempool for buffer management Qi Zhang
2018-03-08 13:52 ` [dpdk-dev] [RFC v2 5/7] net/af_xdp: enable share mempool Qi Zhang
2018-03-08 13:52 ` Qi Zhang [this message]
2018-03-08 14:20   ` [dpdk-dev] [RFC v2 6/7] net/af_xdp: load BPF file Zhang, Qi Z
2018-03-08 23:15   ` Stephen Hemminger
2018-05-09  7:02     ` Björn Töpel
2018-03-08 13:52 ` [dpdk-dev] [RFC v2 7/7] app/testpmd: enable parameter for mempool flags Qi Zhang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180308135249.28187-7-qi.z.zhang@intel.com \
    --to=qi.z.zhang@intel.com \
    --cc=bjorn.topel@intel.com \
    --cc=dev@dpdk.org \
    --cc=magnus.karlsson@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).