DPDK patches and discussions
 help / color / mirror / Atom feed
From: Ophir Munk <ophirmu@mellanox.com>
To: dev@dpdk.org, Pascal Mazon <pascal.mazon@6wind.com>
Cc: Thomas Monjalon <thomas@monjalon.net>,
	Olga Shern <olgas@mellanox.com>,
	Ophir Munk <ophirmu@mellanox.com>
Subject: [dpdk-dev] [PATCH v6 4/6] net/tap: add eBPF API
Date: Sat, 20 Jan 2018 21:11:35 +0000	[thread overview]
Message-ID: <1516482697-8348-5-git-send-email-ophirmu@mellanox.com> (raw)
In-Reply-To: <1516482697-8348-1-git-send-email-ophirmu@mellanox.com>

This commit include BPF API to be used by TAP.

tap_flow_bpf_cls_q() - download to kernel BPF program that classifies
packets to their matching queues
tap_flow_bpf_calc_l3_l4_hash() - download to kernel BPF program that
calculates per packet layer 3 and layer 4 RSS hash
tap_flow_bpf_rss_map_create() - create BPF RSS map for storing RSS
parameters per RSS rule
tap_flow_bpf_update_rss_elem() - update BPF map entry with RSS rule
parameters

Signed-off-by: Ophir Munk <ophirmu@mellanox.com>
Acked-by: Pascal Mazon <pascal.mazon@6wind.com>
---
 drivers/net/tap/Makefile      |   6 ++
 drivers/net/tap/tap_bpf.h     | 112 +++++++++++++++++++++++++
 drivers/net/tap/tap_bpf_api.c | 190 ++++++++++++++++++++++++++++++++++++++++++
 drivers/net/tap/tap_flow.h    |   6 ++
 4 files changed, 314 insertions(+)
 create mode 100644 drivers/net/tap/tap_bpf.h
 create mode 100644 drivers/net/tap/tap_bpf_api.c

diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
index fbf84e1..fad8a94 100644
--- a/drivers/net/tap/Makefile
+++ b/drivers/net/tap/Makefile
@@ -35,6 +35,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += rte_eth_tap.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap_flow.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap_netlink.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap_tcmsgs.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_TAP) += tap_bpf_api.c
 
 include $(RTE_SDK)/mk/rte.lib.mk
 
@@ -61,6 +62,11 @@ tap_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
 		linux/pkt_cls.h \
 		enum TCA_FLOWER_KEY_VLAN_PRIO \
 		$(AUTOCONF_OUTPUT)
+	$Q sh -- '$<' '$@' \
+		HAVE_BPF_PROG_LOAD \
+		linux/bpf.h \
+		enum BPF_PROG_LOAD \
+		$(AUTOCONF_OUTPUT)
 
 # Create tap_autoconf.h or update it in case it differs from the new one.
 
diff --git a/drivers/net/tap/tap_bpf.h b/drivers/net/tap/tap_bpf.h
new file mode 100644
index 0000000..30eefb3
--- /dev/null
+++ b/drivers/net/tap/tap_bpf.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+ * Copyright 2017 Mellanox Technologies, Ltd.
+ */
+
+#ifndef __TAP_BPF_H__
+#define __TAP_BPF_H__
+
+#include <tap_autoconf.h>
+
+#ifdef HAVE_BPF_PROG_LOAD
+#include <linux/bpf.h>
+#else
+/* BPF_MAP_UPDATE_ELEM command flags */
+#define	BPF_ANY	0 /* create a new element or update an existing */
+
+/* BPF architecture instruction struct */
+struct bpf_insn {
+	__u8	code;
+	__u8	dst_reg:4;
+	__u8	src_reg:4;
+	__s16	off;
+	__s32	imm; /* immediate value */
+};
+
+/* BPF program types */
+enum bpf_prog_type {
+	BPF_PROG_TYPE_UNSPEC,
+	BPF_PROG_TYPE_SOCKET_FILTER,
+	BPF_PROG_TYPE_KPROBE,
+	BPF_PROG_TYPE_SCHED_CLS,
+	BPF_PROG_TYPE_SCHED_ACT,
+};
+
+/* BPF commands types */
+enum bpf_cmd {
+	BPF_MAP_CREATE,
+	BPF_MAP_LOOKUP_ELEM,
+	BPF_MAP_UPDATE_ELEM,
+	BPF_MAP_DELETE_ELEM,
+	BPF_MAP_GET_NEXT_KEY,
+	BPF_PROG_LOAD,
+};
+
+/* BPF maps types */
+enum bpf_map_type {
+	BPF_MAP_TYPE_UNSPEC,
+	BPF_MAP_TYPE_HASH,
+};
+
+/* union of anonymous structs used with TAP BPF commands */
+union bpf_attr {
+	/* BPF_MAP_CREATE command */
+	struct {
+		__u32	map_type;
+		__u32	key_size;
+		__u32	value_size;
+		__u32	max_entries;
+		__u32	map_flags;
+		__u32	inner_map_fd;
+	};
+
+	/* BPF_MAP_UPDATE_ELEM, BPF_MAP_DELETE_ELEM commands */
+	struct {
+		__u32		map_fd;
+		__aligned_u64	key;
+		union {
+			__aligned_u64 value;
+			__aligned_u64 next_key;
+		};
+		__u64		flags;
+	};
+
+	/* BPF_PROG_LOAD command */
+	struct {
+		__u32		prog_type;
+		__u32		insn_cnt;
+		__aligned_u64	insns;
+		__aligned_u64	license;
+		__u32		log_level;
+		__u32		log_size;
+		__aligned_u64	log_buf;
+		__u32		kern_version;
+		__u32		prog_flags;
+	};
+} __attribute__((aligned(8)));
+#endif
+
+#ifndef __NR_bpf
+# if defined(__i386__)
+#  define __NR_bpf 357
+# elif defined(__x86_64__)
+#  define __NR_bpf 321
+# elif defined(__aarch64__)
+#  define __NR_bpf 280
+# elif defined(__sparc__)
+#  define __NR_bpf 349
+# elif defined(__s390__)
+#  define __NR_bpf 351
+# else
+#  error __NR_bpf not defined
+# endif
+#endif
+
+enum {
+	BPF_MAP_ID_KEY,
+	BPF_MAP_ID_SIMPLE,
+};
+
+static int bpf_load(enum bpf_prog_type type, const struct bpf_insn *insns,
+		size_t insns_cnt, const char *license);
+
+#endif /* __TAP_BPF_H__ */
diff --git a/drivers/net/tap/tap_bpf_api.c b/drivers/net/tap/tap_bpf_api.c
new file mode 100644
index 0000000..109a681
--- /dev/null
+++ b/drivers/net/tap/tap_bpf_api.c
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 Mellanox Technologies, Ltd.
+ */
+
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/queue.h>
+
+#include <rte_malloc.h>
+#include <rte_eth_tap.h>
+#include <tap_flow.h>
+#include <tap_autoconf.h>
+#include <tap_tcmsgs.h>
+#include <tap_bpf.h>
+#include <tap_bpf_insns.h>
+
+/**
+ * Load BPF program (section cls_q) into the kernel and return a bpf fd
+ *
+ * @param queue_idx
+ *   Queue index matching packet cb
+ *
+ * @return
+ *   -1 if the BPF program couldn't be loaded. An fd (int) otherwise.
+ */
+int tap_flow_bpf_cls_q(__u32 queue_idx)
+{
+	cls_q_insns[1].imm = queue_idx;
+
+	return bpf_load(BPF_PROG_TYPE_SCHED_CLS,
+		(struct bpf_insn *)cls_q_insns,
+		RTE_DIM(cls_q_insns),
+		"Dual BSD/GPL");
+}
+
+/**
+ * Load BPF program (section l3_l4) into the kernel and return a bpf fd.
+ *
+ * @param[in] key_idx
+ *   RSS MAP key index
+ *
+ * @param[in] map_fd
+ *   BPF RSS map file descriptor
+ *
+ * @return
+ *   -1 if the BPF program couldn't be loaded. An fd (int) otherwise.
+ */
+int tap_flow_bpf_calc_l3_l4_hash(__u32 key_idx, int map_fd)
+{
+	l3_l4_hash_insns[4].imm = key_idx;
+	l3_l4_hash_insns[9].imm = map_fd;
+
+	return bpf_load(BPF_PROG_TYPE_SCHED_ACT,
+		(struct bpf_insn *)l3_l4_hash_insns,
+		RTE_DIM(l3_l4_hash_insns),
+		"Dual BSD/GPL");
+}
+
+/**
+ * Helper function to convert a pointer to unsigned 64 bits
+ *
+ * @param[in] ptr
+ *   pointer to address
+ *
+ * @return
+ *   64 bit unsigned long type of pointer address
+ */
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+	return (__u64)(unsigned long)ptr;
+}
+
+/**
+ * Call BPF system call
+ *
+ * @param[in] cmd
+ *   BPF command for program loading, map creation, map entry update, etc
+ *
+ * @param[in] attr
+ *   System call attributes relevant to system call command
+ *
+ * @param[in] size
+ *   size of attr parameter
+ *
+ * @return
+ *   -1 if BPF system call failed, 0 otherwise
+ */
+static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
+			unsigned int size)
+{
+	return syscall(__NR_bpf, cmd, attr, size);
+}
+
+/**
+ * Load BPF instructions to kernel
+ *
+ * @param[in] type
+ *   BPF program type: classifieir or action
+ *
+ * @param[in] insns
+ *   Array of BPF instructions (equivalent to BPF instructions)
+ *
+ * @param[in] insns_cnt
+ *   Number of BPF instructions (size of array)
+ *
+ * @param[in] lincense
+ *   License string that must be acknowledged by the kernel
+ *
+ * @return
+ *   -1 if the BPF program couldn't be loaded, fd (file descriptor) otherwise
+ */
+static int bpf_load(enum bpf_prog_type type,
+		  const struct bpf_insn *insns,
+		  size_t insns_cnt,
+		  const char *license)
+{
+	union bpf_attr attr = {};
+
+	bzero(&attr, sizeof(attr));
+	attr.prog_type = type;
+	attr.insn_cnt = (__u32)insns_cnt;
+	attr.insns = ptr_to_u64(insns);
+	attr.license = ptr_to_u64(license);
+	attr.log_buf = ptr_to_u64(NULL);
+	attr.log_level = 0;
+	attr.kern_version = 0;
+
+	return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+}
+
+/**
+ * Create BPF map for RSS rules
+ *
+ * @param[in] key_size
+ *   map RSS key size
+ *
+ * @param[in] value_size
+ *   Map RSS value size
+ *
+ * @param[in] max_entries
+ *   Map max number of RSS entries (limit on max RSS rules)
+ *
+ * @return
+ *   -1 if BPF map couldn't be created, map fd otherwise
+ */
+int tap_flow_bpf_rss_map_create(unsigned int key_size,
+		unsigned int value_size,
+		unsigned int max_entries)
+{
+	union bpf_attr attr = {};
+
+	bzero(&attr, sizeof(attr));
+	attr.map_type    = BPF_MAP_TYPE_HASH;
+	attr.key_size    = key_size;
+	attr.value_size  = value_size;
+	attr.max_entries = max_entries;
+
+	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
+/**
+ * Update RSS entry in BPF map
+ *
+ * @param[in] fd
+ *   RSS map fd
+ *
+ * @param[in] key
+ *   Pointer to RSS key whose entry is updated
+ *
+ * @param[in] value
+ *   Pointer to RSS new updated value
+ *
+ * @return
+ *   -1 if RSS entry failed to be updated, 0 otherwise
+ */
+int tap_flow_bpf_update_rss_elem(int fd, void *key, void *value)
+{
+	union bpf_attr attr = {};
+
+	bzero(&attr, sizeof(attr));
+
+	attr.map_type = BPF_MAP_TYPE_HASH;
+	attr.map_fd = fd;
+	attr.key = ptr_to_u64(key);
+	attr.value = ptr_to_u64(value);
+	attr.flags = BPF_ANY;
+
+	return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+}
diff --git a/drivers/net/tap/tap_flow.h b/drivers/net/tap/tap_flow.h
index 9e332b0..894c959 100644
--- a/drivers/net/tap/tap_flow.h
+++ b/drivers/net/tap/tap_flow.h
@@ -80,4 +80,10 @@ int tap_flow_implicit_destroy(struct pmd_internals *pmd,
 int tap_flow_implicit_flush(struct pmd_internals *pmd,
 			    struct rte_flow_error *error);
 
+int tap_flow_bpf_cls_q(__u32 queue_idx);
+int tap_flow_bpf_calc_l3_l4_hash(__u32 key_idx, int map_fd);
+int tap_flow_bpf_rss_map_create(unsigned int key_size, unsigned int value_size,
+			unsigned int max_entries);
+int tap_flow_bpf_update_rss_elem(int fd, void *key, void *value);
+
 #endif /* _TAP_FLOW_H_ */
-- 
2.7.4

  parent reply	other threads:[~2018-01-20 21:11 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-17 12:21 [dpdk-dev] [PATCH v1 0/3] TAP RSS eBPF cover letter Ophir Munk
2017-12-17 12:21 ` [dpdk-dev] [PATCH v1 1/3] net/tap: add BPF sections for TAP device Ophir Munk
2017-12-17 12:21 ` [dpdk-dev] [PATCH v1 2/3] net/tap: add eBPF instructions Ophir Munk
2017-12-17 12:21 ` [dpdk-dev] [PATCH v1 3/3] net/tap: add eBPF classifiers map and actions Ophir Munk
2017-12-28 10:09 ` [dpdk-dev] [PATCH v2 0/2] TAP RSS eBPF cover letter Ophir Munk
2017-12-28 10:09   ` [dpdk-dev] [PATCH v2 1/2] net/tap: add eBPF instructions to TAP device Ophir Munk
2017-12-28 10:09   ` [dpdk-dev] [PATCH v2 2/2] net/tap: implement RSS with eBPF classifier and action Ophir Munk
2018-01-10  7:06   ` [dpdk-dev] [PATCH v3 0/2] TAP RSS eBPF cover letter Ophir Munk
2018-01-10  7:06     ` [dpdk-dev] [PATCH v3 1/2] net/tap: add eBPF instructions to TAP device Ophir Munk
2018-01-10  9:34       ` Pascal Mazon
2018-01-10 10:12         ` Ophir Munk
2018-01-10  7:06     ` [dpdk-dev] [PATCH v3 2/2] net/tap: implement RSS with eBPF classifier and action Ophir Munk
2018-01-10 10:19       ` Pascal Mazon
2018-01-11 17:20         ` Ophir Munk
2018-01-10 10:10     ` [dpdk-dev] [PATCH v3 0/2] TAP RSS eBPF cover letter Jason Wang
2018-01-10 15:34       ` Ophir Munk
2018-01-11  2:45         ` Jason Wang
2018-01-11 17:45     ` [dpdk-dev] [PATCH v4 0/5] " Ophir Munk
2018-01-11 17:45       ` [dpdk-dev] [PATCH v4 1/5] net/tap: support actions for different classifiers Ophir Munk
2018-01-11 17:45       ` [dpdk-dev] [PATCH v4 2/5] net/tap: add eBPF bytes code Ophir Munk
2018-01-11 17:45       ` [dpdk-dev] [PATCH v4 3/5] net/tap: add eBPF program file Ophir Munk
2018-01-11 17:45       ` [dpdk-dev] [PATCH v4 4/5] net/tap: add eBPF API Ophir Munk
2018-01-11 17:45       ` [dpdk-dev] [PATCH v4 5/5] net/tap: implement TAP RSS using eBPF Ophir Munk
2018-01-18 13:38       ` [dpdk-dev] [PATCH v5 0/6] TAP RSS eBPF cover letter Ophir Munk
2018-01-18 13:38         ` [dpdk-dev] [PATCH v5 1/6] net/tap: support actions for different classifiers Ophir Munk
2018-01-18 13:38         ` [dpdk-dev] [PATCH v5 2/6] net/tap: add eBPF bytes code Ophir Munk
2018-01-18 13:38         ` [dpdk-dev] [PATCH v5 3/6] net/tap: add eBPF program file Ophir Munk
2018-01-18 13:38         ` [dpdk-dev] [PATCH v5 4/6] net/tap: add eBPF API Ophir Munk
2018-01-18 13:38         ` [dpdk-dev] [PATCH v5 5/6] net/tap: implement TAP RSS using eBPF Ophir Munk
2018-01-18 13:38         ` [dpdk-dev] [PATCH v5 6/6] doc: detail new tap RSS feature in guides Ophir Munk
2018-01-19  6:48         ` [dpdk-dev] [PATCH v5 0/6] TAP RSS eBPF cover letter Pascal Mazon
2018-01-20 16:15           ` Ferruh Yigit
2018-01-20 21:25             ` Ophir Munk
2018-01-21 14:20               ` Ferruh Yigit
2018-01-20 21:11         ` [dpdk-dev] [PATCH v6 " Ophir Munk
2018-01-20 21:11           ` [dpdk-dev] [PATCH v6 1/6] net/tap: support actions for different classifiers Ophir Munk
2018-01-20 21:11           ` [dpdk-dev] [PATCH v6 2/6] net/tap: add eBPF program file Ophir Munk
2018-01-20 21:11           ` [dpdk-dev] [PATCH v6 3/6] net/tap: add eBPF bytes code Ophir Munk
2018-01-20 21:11           ` Ophir Munk [this message]
2018-01-20 21:11           ` [dpdk-dev] [PATCH v6 5/6] net/tap: implement TAP RSS using eBPF Ophir Munk
2018-01-20 21:11           ` [dpdk-dev] [PATCH v6 6/6] doc: detail new tap RSS feature in guides Ophir Munk
2018-01-21 14:50           ` [dpdk-dev] [PATCH v6 0/6] TAP RSS eBPF cover letter Ferruh Yigit

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1516482697-8348-5-git-send-email-ophirmu@mellanox.com \
    --to=ophirmu@mellanox.com \
    --cc=dev@dpdk.org \
    --cc=olgas@mellanox.com \
    --cc=pascal.mazon@6wind.com \
    --cc=thomas@monjalon.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).