* [dpdk-dev] [RFC PATCH 1/5] bpf: add BPF loading and execution framework
2018-03-08 1:29 [dpdk-dev] [RFC PATCH 0/5] add framework to load and execute BPF code Konstantin Ananyev
@ 2018-03-08 1:29 ` Konstantin Ananyev
2018-03-08 1:29 ` [dpdk-dev] [RFC PATCH 2/5] bpf: add JIT compilation for x86_64 ISA Konstantin Ananyev
` (3 subsequent siblings)
4 siblings, 0 replies; 10+ messages in thread
From: Konstantin Ananyev @ 2018-03-08 1:29 UTC (permalink / raw)
To: dev; +Cc: Konstantin Ananyev
librte_bpf provides a framework to load and execute eBPF bytecode
inside user-space dpdk based applications.
Not currently supported features:
- JIT
- cBPF
- tail-pointer call
- eBPF MAP
- skb
Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
---
config/common_base | 5 +
config/common_linuxapp | 1 +
lib/Makefile | 2 +
lib/librte_bpf/Makefile | 30 +++
lib/librte_bpf/bpf.c | 48 ++++
lib/librte_bpf/bpf_exec.c | 453 +++++++++++++++++++++++++++++++++++++
lib/librte_bpf/bpf_impl.h | 37 +++
lib/librte_bpf/bpf_load.c | 344 ++++++++++++++++++++++++++++
lib/librte_bpf/bpf_validate.c | 55 +++++
lib/librte_bpf/rte_bpf.h | 154 +++++++++++++
lib/librte_bpf/rte_bpf_version.map | 12 +
mk/rte.app.mk | 2 +
12 files changed, 1143 insertions(+)
create mode 100644 lib/librte_bpf/Makefile
create mode 100644 lib/librte_bpf/bpf.c
create mode 100644 lib/librte_bpf/bpf_exec.c
create mode 100644 lib/librte_bpf/bpf_impl.h
create mode 100644 lib/librte_bpf/bpf_load.c
create mode 100644 lib/librte_bpf/bpf_validate.c
create mode 100644 lib/librte_bpf/rte_bpf.h
create mode 100644 lib/librte_bpf/rte_bpf_version.map
diff --git a/config/common_base b/config/common_base
index ad03cf433..2205b684f 100644
--- a/config/common_base
+++ b/config/common_base
@@ -823,3 +823,8 @@ CONFIG_RTE_APP_CRYPTO_PERF=y
# Compile the eventdev application
#
CONFIG_RTE_APP_EVENTDEV=y
+
+#
+# Compile librte_bpf
+#
+CONFIG_RTE_LIBRTE_BPF=n
diff --git a/config/common_linuxapp b/config/common_linuxapp
index ff98f2355..7b4a0ce7d 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -10,6 +10,7 @@ CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES=y
CONFIG_RTE_EAL_IGB_UIO=y
CONFIG_RTE_EAL_VFIO=y
CONFIG_RTE_KNI_KMOD=y
+CONFIG_RTE_LIBRTE_BPF=y
CONFIG_RTE_LIBRTE_KNI=y
CONFIG_RTE_LIBRTE_PMD_KNI=y
CONFIG_RTE_LIBRTE_VHOST=y
diff --git a/lib/Makefile b/lib/Makefile
index ec965a606..a4a2329f9 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -97,6 +97,8 @@ DEPDIRS-librte_pdump := librte_eal librte_mempool librte_mbuf librte_ether
DIRS-$(CONFIG_RTE_LIBRTE_GSO) += librte_gso
DEPDIRS-librte_gso := librte_eal librte_mbuf librte_ether librte_net
DEPDIRS-librte_gso += librte_mempool
+DIRS-$(CONFIG_RTE_LIBRTE_BPF) += librte_bpf
+DEPDIRS-librte_bpf := librte_eal librte_mempool librte_mbuf librte_ether
ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
diff --git a/lib/librte_bpf/Makefile b/lib/librte_bpf/Makefile
new file mode 100644
index 000000000..e0f434e77
--- /dev/null
+++ b/lib/librte_bpf/Makefile
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_bpf.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+LDLIBS += -lrte_net -lrte_eal
+LDLIBS += -lrte_mempool -lrte_ring
+LDLIBS += -lrte_mbuf -lrte_ethdev
+LDLIBS += -lelf
+
+EXPORT_MAP := rte_bpf_version.map
+
+LIBABIVER := 1
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf.c
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_exec.c
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_load.c
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_validate.c
+
+# install header files
+SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += rte_bpf.h
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_bpf/bpf.c b/lib/librte_bpf/bpf.c
new file mode 100644
index 000000000..4727d2251
--- /dev/null
+++ b/lib/librte_bpf/bpf.c
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_eal.h>
+
+#include "bpf_impl.h"
+
+__rte_experimental void
+rte_bpf_destroy(struct rte_bpf *bpf)
+{
+ if (bpf != NULL) {
+ if (bpf->jit.func != NULL)
+ munmap(bpf->jit.func, bpf->jit.sz);
+ munmap(bpf, bpf->sz);
+ }
+}
+
+__rte_experimental int
+rte_bpf_get_jit(const struct rte_bpf *bpf, struct rte_bpf_jit *jit)
+{
+ if (bpf == NULL || jit == NULL)
+ return -EINVAL;
+
+ jit[0] = bpf->jit;
+ return 0;
+}
+
+int
+bpf_jit(struct rte_bpf *bpf)
+{
+ int32_t rc;
+
+ rc = -ENOTSUP;
+
+ if (rc != 0)
+ RTE_LOG(WARNING, USER1, "%s(%p) failed, error code: %d;\n",
+ __func__, bpf, rc);
+ return rc;
+}
diff --git a/lib/librte_bpf/bpf_exec.c b/lib/librte_bpf/bpf_exec.c
new file mode 100644
index 000000000..4bad0cc9e
--- /dev/null
+++ b/lib/librte_bpf/bpf_exec.c
@@ -0,0 +1,453 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_byteorder.h>
+
+#include "bpf_impl.h"
+
+#define BPF_JMP_UNC(ins) ((ins) += (ins)->off)
+
+#define BPF_JMP_CND_REG(reg, ins, op, type) \
+ ((ins) += \
+ ((type)(reg)[(ins)->dst_reg] op (type)(reg)[(ins)->src_reg]) ? \
+ (ins)->off : 0)
+
+#define BPF_JMP_CND_IMM(reg, ins, op, type) \
+ ((ins) += \
+ ((type)(reg)[(ins)->dst_reg] op (type)(ins)->imm) ? \
+ (ins)->off : 0)
+
+#define BPF_NEG_ALU(reg, ins, type) \
+ ((reg)[(ins)->dst_reg] = (type)(-(reg)[(ins)->dst_reg]))
+
+#define BPF_MOV_ALU_REG(reg, ins, type) \
+ ((reg)[(ins)->dst_reg] = (type)(reg)[(ins)->src_reg])
+
+#define BPF_OP_ALU_REG(reg, ins, op, type) \
+ ((reg)[(ins)->dst_reg] = \
+ (type)(reg)[(ins)->dst_reg] op (type)(reg)[(ins)->src_reg])
+
+#define BPF_MOV_ALU_IMM(reg, ins, type) \
+ ((reg)[(ins)->dst_reg] = (type)(ins)->imm)
+
+#define BPF_OP_ALU_IMM(reg, ins, op, type) \
+ ((reg)[(ins)->dst_reg] = \
+ (type)(reg)[(ins)->dst_reg] op (type)(ins)->imm)
+
+#define BPF_DIV_ZERO_CHECK(bpf, reg, ins, type) do { \
+ if ((type)(reg)[(ins)->src_reg] == 0) { \
+ RTE_LOG(ERR, USER1, \
+ "%s(%p): division by 0 at pc: %#zx;\n", \
+ __func__, bpf, \
+ (uintptr_t)(ins) - (uintptr_t)(bpf)->prm.ins); \
+ return 0; \
+ } \
+} while (0)
+
+#define BPF_LD_REG(reg, ins, type) \
+ ((reg)[(ins)->dst_reg] = \
+ *(type *)(uintptr_t)((reg)[(ins)->src_reg] + (ins)->off))
+
+#define BPF_ST_IMM(reg, ins, type) \
+ (*(type *)(uintptr_t)((reg)[(ins)->dst_reg] + (ins)->off) = \
+ (type)(ins)->imm)
+
+#define BPF_ST_REG(reg, ins, type) \
+ (*(type *)(uintptr_t)((reg)[(ins)->dst_reg] + (ins)->off) = \
+ (type)(reg)[(ins)->src_reg])
+
+#define BPF_ST_XADD_REG(reg, ins, tp) \
+ (rte_atomic##tp##_add((rte_atomic##tp##_t *) \
+ (uintptr_t)((reg)[(ins)->dst_reg] + (ins)->off), \
+ reg[ins->src_reg]))
+
+static inline void
+bpf_alu_be(uint64_t reg[MAX_BPF_REG], const struct bpf_insn *ins)
+{
+ uint64_t *v;
+
+ v = reg + ins->dst_reg;
+ switch (ins->imm) {
+ case 16:
+ *v = rte_cpu_to_be_16(*v);
+ break;
+ case 32:
+ *v = rte_cpu_to_be_32(*v);
+ break;
+ case 64:
+ *v = rte_cpu_to_be_64(*v);
+ break;
+ }
+}
+
+static inline void
+bpf_alu_le(uint64_t reg[MAX_BPF_REG], const struct bpf_insn *ins)
+{
+ uint64_t *v;
+
+ v = reg + ins->dst_reg;
+ switch (ins->imm) {
+ case 16:
+ *v = rte_cpu_to_le_16(*v);
+ break;
+ case 32:
+ *v = rte_cpu_to_le_32(*v);
+ break;
+ case 64:
+ *v = rte_cpu_to_le_64(*v);
+ break;
+ }
+}
+
+static inline uint64_t
+bpf_exec(const struct rte_bpf *bpf, uint64_t reg[MAX_BPF_REG])
+{
+ const struct bpf_insn *ins;
+
+ for (ins = bpf->prm.ins; ; ins++) {
+ switch (ins->code) {
+ /* 32 bit ALU IMM operations */
+ case (BPF_ALU | BPF_ADD | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, +, uint32_t);
+ break;
+ case (BPF_ALU | BPF_SUB | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, -, uint32_t);
+ break;
+ case (BPF_ALU | BPF_AND | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, &, uint32_t);
+ break;
+ case (BPF_ALU | BPF_OR | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, |, uint32_t);
+ break;
+ case (BPF_ALU | BPF_LSH | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, <<, uint32_t);
+ break;
+ case (BPF_ALU | BPF_RSH | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, >>, uint32_t);
+ break;
+ case (BPF_ALU | BPF_XOR | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, ^, uint32_t);
+ break;
+ case (BPF_ALU | BPF_MUL | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, *, uint32_t);
+ break;
+ case (BPF_ALU | BPF_DIV | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, /, uint32_t);
+ break;
+ case (BPF_ALU | BPF_MOD | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, %, uint32_t);
+ break;
+ case (BPF_ALU | BPF_MOV | BPF_K):
+ BPF_MOV_ALU_IMM(reg, ins, uint32_t);
+ break;
+ /* 32 bit ALU REG operations */
+ case (BPF_ALU | BPF_ADD | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, +, uint32_t);
+ break;
+ case (BPF_ALU | BPF_SUB | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, -, uint32_t);
+ break;
+ case (BPF_ALU | BPF_AND | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, &, uint32_t);
+ break;
+ case (BPF_ALU | BPF_OR | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, |, uint32_t);
+ break;
+ case (BPF_ALU | BPF_LSH | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, <<, uint32_t);
+ break;
+ case (BPF_ALU | BPF_RSH | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, >>, uint32_t);
+ break;
+ case (BPF_ALU | BPF_XOR | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, ^, uint32_t);
+ break;
+ case (BPF_ALU | BPF_MUL | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, *, uint32_t);
+ break;
+ case (BPF_ALU | BPF_DIV | BPF_X):
+ BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint32_t);
+ BPF_OP_ALU_REG(reg, ins, /, uint32_t);
+ break;
+ case (BPF_ALU | BPF_MOD | BPF_X):
+ BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint32_t);
+ BPF_OP_ALU_REG(reg, ins, %, uint32_t);
+ break;
+ case (BPF_ALU | BPF_MOV | BPF_X):
+ BPF_MOV_ALU_REG(reg, ins, uint32_t);
+ break;
+ case (BPF_ALU | BPF_NEG):
+ BPF_NEG_ALU(reg, ins, uint32_t);
+ break;
+ case (BPF_ALU | BPF_END | BPF_TO_BE):
+ bpf_alu_be(reg, ins);
+ break;
+ case (BPF_ALU | BPF_END | BPF_TO_LE):
+ bpf_alu_le(reg, ins);
+ break;
+ /* 64 bit ALU IMM operations */
+ case (BPF_ALU64 | BPF_ADD | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, +, uint64_t);
+ break;
+ case (BPF_ALU64 | BPF_SUB | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, -, uint64_t);
+ break;
+ case (BPF_ALU64 | BPF_AND | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, &, uint64_t);
+ break;
+ case (BPF_ALU64 | BPF_OR | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, |, uint64_t);
+ break;
+ case (BPF_ALU64 | BPF_LSH | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, <<, uint64_t);
+ break;
+ case (BPF_ALU64 | BPF_RSH | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, >>, uint64_t);
+ break;
+ case (BPF_ALU64 | BPF_ARSH | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, >>, int64_t);
+ break;
+ case (BPF_ALU64 | BPF_XOR | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, ^, uint64_t);
+ break;
+ case (BPF_ALU64 | BPF_MUL | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, *, uint64_t);
+ break;
+ case (BPF_ALU64 | BPF_DIV | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, /, uint64_t);
+ break;
+ case (BPF_ALU64 | BPF_MOD | BPF_K):
+ BPF_OP_ALU_IMM(reg, ins, %, uint64_t);
+ break;
+ case (BPF_ALU64 | BPF_MOV | BPF_K):
+ BPF_MOV_ALU_IMM(reg, ins, uint64_t);
+ break;
+ /* 64 bit ALU REG operations */
+ case (BPF_ALU64 | BPF_ADD | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, +, uint64_t);
+ break;
+ case (BPF_ALU64 | BPF_SUB | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, -, uint64_t);
+ break;
+ case (BPF_ALU64 | BPF_AND | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, &, uint64_t);
+ break;
+ case (BPF_ALU64 | BPF_OR | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, |, uint64_t);
+ break;
+ case (BPF_ALU64 | BPF_LSH | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, <<, uint64_t);
+ break;
+ case (BPF_ALU64 | BPF_RSH | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, >>, uint64_t);
+ break;
+ case (BPF_ALU64 | BPF_ARSH | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, >>, int64_t);
+ break;
+ case (BPF_ALU64 | BPF_XOR | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, ^, uint64_t);
+ break;
+ case (BPF_ALU64 | BPF_MUL | BPF_X):
+ BPF_OP_ALU_REG(reg, ins, *, uint64_t);
+ break;
+ case (BPF_ALU64 | BPF_DIV | BPF_X):
+ BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint64_t);
+ BPF_OP_ALU_REG(reg, ins, /, uint64_t);
+ break;
+ case (BPF_ALU64 | BPF_MOD | BPF_X):
+ BPF_DIV_ZERO_CHECK(bpf, reg, ins, uint64_t);
+ BPF_OP_ALU_REG(reg, ins, %, uint64_t);
+ break;
+ case (BPF_ALU64 | BPF_MOV | BPF_X):
+ BPF_MOV_ALU_REG(reg, ins, uint64_t);
+ break;
+ case (BPF_ALU64 | BPF_NEG):
+ BPF_NEG_ALU(reg, ins, uint64_t);
+ break;
+ /* load instructions */
+ case (BPF_LDX | BPF_MEM | BPF_B):
+ BPF_LD_REG(reg, ins, uint8_t);
+ break;
+ case (BPF_LDX | BPF_MEM | BPF_H):
+ BPF_LD_REG(reg, ins, uint16_t);
+ break;
+ case (BPF_LDX | BPF_MEM | BPF_W):
+ BPF_LD_REG(reg, ins, uint32_t);
+ break;
+ case (BPF_LDX | BPF_MEM | BPF_DW):
+ BPF_LD_REG(reg, ins, uint64_t);
+ break;
+ /* load 64 bit immediate value */
+ case (BPF_LD | BPF_IMM | BPF_DW):
+ reg[ins->dst_reg] = (uint32_t)ins[0].imm |
+ (uint64_t)(uint32_t)ins[1].imm << 32;
+ ins++;
+ break;
+ /* store instructions */
+ case (BPF_STX | BPF_MEM | BPF_B):
+ BPF_ST_REG(reg, ins, uint8_t);
+ break;
+ case (BPF_STX | BPF_MEM | BPF_H):
+ BPF_ST_REG(reg, ins, uint16_t);
+ break;
+ case (BPF_STX | BPF_MEM | BPF_W):
+ BPF_ST_REG(reg, ins, uint32_t);
+ break;
+ case (BPF_STX | BPF_MEM | BPF_DW):
+ BPF_ST_REG(reg, ins, uint64_t);
+ break;
+ case (BPF_ST | BPF_MEM | BPF_B):
+ BPF_ST_IMM(reg, ins, uint8_t);
+ break;
+ case (BPF_ST | BPF_MEM | BPF_H):
+ BPF_ST_IMM(reg, ins, uint16_t);
+ break;
+ case (BPF_ST | BPF_MEM | BPF_W):
+ BPF_ST_IMM(reg, ins, uint32_t);
+ break;
+ case (BPF_ST | BPF_MEM | BPF_DW):
+ BPF_ST_IMM(reg, ins, uint64_t);
+ break;
+ /* atomic add instructions */
+ case (BPF_STX | BPF_XADD | BPF_W):
+ BPF_ST_XADD_REG(reg, ins, 32);
+ break;
+ case (BPF_STX | BPF_XADD | BPF_DW):
+ BPF_ST_XADD_REG(reg, ins, 64);
+ break;
+ /* jump instructions */
+ case (BPF_JMP | BPF_JA):
+ BPF_JMP_UNC(ins);
+ break;
+ /* jump IMM instructions */
+ case (BPF_JMP | BPF_JEQ | BPF_K):
+ BPF_JMP_CND_IMM(reg, ins, ==, uint64_t);
+ break;
+ case (BPF_JMP | BPF_JNE | BPF_K):
+ BPF_JMP_CND_IMM(reg, ins, !=, uint64_t);
+ break;
+ case (BPF_JMP | BPF_JGT | BPF_K):
+ BPF_JMP_CND_IMM(reg, ins, >, uint64_t);
+ break;
+ case (BPF_JMP | BPF_JLT | BPF_K):
+ BPF_JMP_CND_IMM(reg, ins, <, uint64_t);
+ break;
+ case (BPF_JMP | BPF_JGE | BPF_K):
+ BPF_JMP_CND_IMM(reg, ins, >=, uint64_t);
+ break;
+ case (BPF_JMP | BPF_JLE | BPF_K):
+ BPF_JMP_CND_IMM(reg, ins, <=, uint64_t);
+ break;
+ case (BPF_JMP | BPF_JSGT | BPF_K):
+ BPF_JMP_CND_IMM(reg, ins, >, int64_t);
+ break;
+ case (BPF_JMP | BPF_JSLT | BPF_K):
+ BPF_JMP_CND_IMM(reg, ins, <, int64_t);
+ break;
+ case (BPF_JMP | BPF_JSGE | BPF_K):
+ BPF_JMP_CND_IMM(reg, ins, >=, int64_t);
+ break;
+ case (BPF_JMP | BPF_JSLE | BPF_K):
+ BPF_JMP_CND_IMM(reg, ins, <=, int64_t);
+ break;
+ case (BPF_JMP | BPF_JSET | BPF_K):
+ BPF_JMP_CND_IMM(reg, ins, &, uint64_t);
+ break;
+ /* jump REG instructions */
+ case (BPF_JMP | BPF_JEQ | BPF_X):
+ BPF_JMP_CND_REG(reg, ins, ==, uint64_t);
+ break;
+ case (BPF_JMP | BPF_JNE | BPF_X):
+ BPF_JMP_CND_REG(reg, ins, !=, uint64_t);
+ break;
+ case (BPF_JMP | BPF_JGT | BPF_X):
+ BPF_JMP_CND_REG(reg, ins, >, uint64_t);
+ break;
+ case (BPF_JMP | BPF_JLT | BPF_X):
+ BPF_JMP_CND_REG(reg, ins, <, uint64_t);
+ break;
+ case (BPF_JMP | BPF_JGE | BPF_X):
+ BPF_JMP_CND_REG(reg, ins, >=, uint64_t);
+ break;
+ case (BPF_JMP | BPF_JLE | BPF_X):
+ BPF_JMP_CND_REG(reg, ins, <=, uint64_t);
+ break;
+ case (BPF_JMP | BPF_JSGT | BPF_X):
+ BPF_JMP_CND_REG(reg, ins, >, int64_t);
+ break;
+ case (BPF_JMP | BPF_JSLT | BPF_X):
+ BPF_JMP_CND_REG(reg, ins, <, int64_t);
+ break;
+ case (BPF_JMP | BPF_JSGE | BPF_X):
+ BPF_JMP_CND_REG(reg, ins, >=, int64_t);
+ break;
+ case (BPF_JMP | BPF_JSLE | BPF_X):
+ BPF_JMP_CND_REG(reg, ins, <=, int64_t);
+ break;
+ case (BPF_JMP | BPF_JSET | BPF_X):
+ BPF_JMP_CND_REG(reg, ins, &, uint64_t);
+ break;
+ /* call instructions */
+ case (BPF_JMP | BPF_CALL):
+ reg[BPF_REG_0] = bpf->prm.xsym[ins->imm].func(
+ reg[BPF_REG_1], reg[BPF_REG_2], reg[BPF_REG_3],
+ reg[BPF_REG_4], reg[BPF_REG_5]);
+ break;
+ /* return instruction */
+ case (BPF_JMP | BPF_EXIT):
+ return reg[BPF_REG_0];
+ default:
+ RTE_LOG(ERR, USER1,
+ "%s(%p): invalid opcode %#x at pc: %#zx;\n",
+ __func__, bpf, ins->code,
+ (uintptr_t)ins - (uintptr_t)bpf->prm.ins);
+ return 0;
+ }
+ }
+
+ /* should never be reached */
+ RTE_VERIFY(0);
+ return 0;
+}
+
+__rte_experimental uint32_t
+rte_bpf_exec_burst(const struct rte_bpf *bpf, void *ctx[], uint64_t rc[],
+ uint32_t num)
+{
+ uint32_t i;
+ uint64_t reg[MAX_BPF_REG];
+ uint64_t stack[MAX_BPF_STACK_SIZE / sizeof(uint64_t)];
+
+ for (i = 0; i != num; i++) {
+
+ reg[BPF_REG_1] = (uintptr_t)ctx[i];
+ reg[BPF_REG_10] = (uintptr_t)(stack + RTE_DIM(stack));
+
+ rc[i] = bpf_exec(bpf, reg);
+ }
+
+ return i;
+}
+
+__rte_experimental uint64_t
+rte_bpf_exec(const struct rte_bpf *bpf, void *ctx)
+{
+ uint64_t rc;
+
+ rte_bpf_exec_burst(bpf, &ctx, &rc, 1);
+ return rc;
+}
+
diff --git a/lib/librte_bpf/bpf_impl.h b/lib/librte_bpf/bpf_impl.h
new file mode 100644
index 000000000..f09417088
--- /dev/null
+++ b/lib/librte_bpf/bpf_impl.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _BPF_H_
+#define _BPF_H_
+
+#include <rte_bpf.h>
+#include <sys/mman.h>
+#include <linux/bpf.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_BPF_STACK_SIZE 0x200
+
+struct rte_bpf {
+ struct rte_bpf_prm prm;
+ struct rte_bpf_jit jit;
+ size_t sz;
+ uint32_t stack_sz;
+};
+
+extern int bpf_validate(struct rte_bpf *bpf);
+
+extern int bpf_jit(struct rte_bpf *bpf);
+
+#ifdef RTE_ARCH_X86_64
+extern int bpf_jit_x86(struct rte_bpf *);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _BPF_H_ */
diff --git a/lib/librte_bpf/bpf_load.c b/lib/librte_bpf/bpf_load.c
new file mode 100644
index 000000000..84c6b9417
--- /dev/null
+++ b/lib/librte_bpf/bpf_load.c
@@ -0,0 +1,344 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/queue.h>
+#include <fcntl.h>
+
+#include <libelf.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_byteorder.h>
+#include <rte_errno.h>
+
+#include "bpf_impl.h"
+
+static uint32_t
+bpf_find_func(const char *sn, const struct rte_bpf_xsym fp[], uint32_t fn)
+{
+ uint32_t i;
+
+ if (sn == NULL || fp == NULL)
+ return UINT32_MAX;
+
+ for (i = 0; i != fn; i++) {
+ if (fp[i].type == RTE_BPF_XTYPE_FUNC &&
+ strcmp(sn, fp[i].name) == 0)
+ break;
+ }
+
+ return (i != fn) ? i : UINT32_MAX;
+}
+
+static int
+check_elf_header(const Elf64_Ehdr * eh)
+{
+ const char *err;
+
+ err = NULL;
+
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+ if (eh->e_ident[EI_DATA] != ELFDATA2LSB)
+#else
+ if (eh->e_ident[EI_DATA] != ELFDATA2MSB)
+#endif
+ err = "not native byte order";
+ else if (eh->e_ident[EI_OSABI] != ELFOSABI_NONE)
+ err = "unexpected OS ABI";
+ else if (eh->e_type != ET_REL)
+ err = "unexpected ELF type";
+ else if (eh->e_machine != EM_NONE && eh->e_machine != EM_BPF)
+ err = "unexpected machine type";
+
+ if (err != NULL) {
+ RTE_LOG(ERR, USER1, "%s(): %s\n", __func__, err);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * helper function, find executable section by name.
+ */
+static int
+find_elf_code(Elf *elf, const char *section, Elf_Data **psd, size_t *pidx)
+{
+ Elf_Scn *sc;
+ const Elf64_Ehdr *eh;
+ const Elf64_Shdr *sh;
+ Elf_Data *sd;
+ const char *sn;
+ int32_t rc;
+
+ eh = elf64_getehdr(elf);
+ if (eh == NULL) {
+ rc = elf_errno();
+ RTE_LOG(ERR, USER1, "%s(%p, %s) error code: %d(%s)\n",
+ __func__, elf, section, rc, elf_errmsg(rc));
+ return -EINVAL;
+ }
+
+ if (check_elf_header(eh) != 0)
+ return -EINVAL;
+
+ /* find given section by name */
+ for (sc = elf_nextscn(elf, NULL); sc != NULL;
+ sc = elf_nextscn(elf, sc)) {
+ sh = elf64_getshdr(sc);
+ sn = elf_strptr(elf, eh->e_shstrndx, sh->sh_name);
+ if (sn != NULL && strcmp(section, sn) == 0 &&
+ sh->sh_type == SHT_PROGBITS &&
+ sh->sh_flags == (SHF_ALLOC | SHF_EXECINSTR))
+ break;
+ }
+
+ sd = elf_getdata(sc, NULL);
+ if (sd == NULL || sd->d_size == 0 ||
+ sd->d_size % sizeof(struct bpf_insn) != 0) {
+ rc = elf_errno();
+ RTE_LOG(ERR, USER1, "%s(%p, %s) error code: %d(%s)\n",
+ __func__, elf, section, rc, elf_errmsg(rc));
+ return -EINVAL;
+ }
+
+ *psd = sd;
+ *pidx = elf_ndxscn(sc);
+ return 0;
+}
+
+/*
+ * helper function to process data from relocation table.
+ */
+static int
+process_reloc(Elf *elf, size_t sym_idx, Elf64_Rel *re, size_t re_sz,
+ struct bpf_insn *ins, size_t ins_sz, const struct rte_bpf_prm *prm)
+{
+ uint32_t i, idx, fidx, n;
+ size_t ofs, sym;
+ const char *sn;
+ const Elf64_Ehdr *eh;
+ Elf_Scn *sc;
+ const Elf_Data *sd;
+ Elf64_Sym *sm;
+
+ eh = elf64_getehdr(elf);
+
+ /* get symtable by section index */
+ sc = elf_getscn(elf, sym_idx);
+ sd = elf_getdata(sc, NULL);
+ if (sd == NULL)
+ return -EINVAL;
+ sm = sd->d_buf;
+
+ n = re_sz / sizeof(re[0]);
+ for (i = 0; i != n; i++) {
+
+ ofs = re[i].r_offset;
+ if (ofs % sizeof(ins[0]) != 0 || ofs >= ins_sz)
+ return -EINVAL;
+
+ idx = ofs / sizeof(ins[0]);
+ if (ins[idx].code != (BPF_JMP | BPF_CALL))
+ return -EINVAL;
+
+ /* retrieve index in the symtable */
+ sym = ELF64_R_SYM(re[i].r_info);
+ if (sym * sizeof(sm[0]) >= sd->d_size)
+ return -EINVAL;
+
+ sn = elf_strptr(elf, eh->e_shstrndx, sm[sym].st_name);
+
+ fidx = bpf_find_func(sn, prm->xsym, prm->nb_xsym);
+ if (fidx == UINT32_MAX)
+ return -EINVAL;
+
+ ins[idx].imm = fidx;
+ }
+
+ return 0;
+}
+
+/*
+ * helper function, find relocation information (if any)
+ * and update bpf code.
+ */
+static int
+elf_reloc_code(Elf *elf, Elf_Data *ed, size_t sidx,
+ const struct rte_bpf_prm *prm)
+{
+ Elf64_Rel *re;
+ Elf_Scn *sc;
+ const Elf64_Shdr *sh;
+ const Elf_Data *sd;
+ int32_t rc;
+
+ rc = 0;
+
+ /* walk through all sections */
+ for (sc = elf_nextscn(elf, NULL); sc != NULL && rc == 0;
+ sc = elf_nextscn(elf, sc)) {
+
+ sh = elf64_getshdr(sc);
+
+ /* relocation data for our code section */
+ if (sh->sh_type == SHT_REL && sh->sh_info == sidx) {
+ sd = elf_getdata(sc, NULL);
+ if (sd == NULL || sd->d_size == 0 ||
+ sd->d_size % sizeof(re[0]) != 0)
+ return -EINVAL;
+ rc = process_reloc(elf, sh->sh_link,
+ sd->d_buf, sd->d_size, ed->d_buf, ed->d_size,
+ prm);
+ }
+ }
+
+ return rc;
+}
+
+static struct rte_bpf *
+bpf_load(const struct rte_bpf_prm *prm)
+{
+ uint8_t *buf;
+ struct rte_bpf *bpf;
+ size_t sz, bsz, insz, xsz;
+
+ xsz = prm->nb_xsym * sizeof(prm->xsym[0]);
+ insz = prm->nb_ins * sizeof(prm->ins[0]);
+ bsz = sizeof(bpf[0]);
+ sz = insz + xsz + bsz;
+
+ buf = mmap(NULL, sz, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (buf == MAP_FAILED)
+ return NULL;
+
+ bpf = (void *)buf;
+ bpf->sz = sz;
+
+ memcpy(&bpf->prm, prm, sizeof(bpf->prm));
+
+ memcpy(buf + bsz, prm->xsym, xsz);
+ memcpy(buf + bsz + xsz, prm->ins, insz);
+
+ bpf->prm.xsym = (void *)(buf + bsz);
+ bpf->prm.ins = (void *)(buf + bsz + xsz);
+
+ return bpf;
+}
+
+__rte_experimental struct rte_bpf *
+rte_bpf_load(const struct rte_bpf_prm *prm)
+{
+ struct rte_bpf *bpf;
+ int32_t rc;
+
+ if (prm == NULL || prm->ins == NULL) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ bpf = bpf_load(prm);
+ if (bpf == NULL) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ rc = bpf_validate(bpf);
+ if (rc == 0) {
+ bpf_jit(bpf);
+ if (mprotect(bpf, bpf->sz, PROT_READ) != 0)
+ rc = -ENOMEM;
+ }
+
+ if (rc != 0) {
+ rte_bpf_destroy(bpf);
+ rte_errno = -rc;
+ return NULL;
+ }
+
+ return bpf;
+}
+
+static struct rte_bpf *
+bpf_load_elf(const struct rte_bpf_prm *prm, int32_t fd, const char *section)
+{
+ Elf *elf;
+ Elf_Data *sd;
+ size_t sidx;
+ int32_t rc;
+ struct rte_bpf *bpf;
+ struct rte_bpf_prm np;
+
+ elf_version(EV_CURRENT);
+ elf = elf_begin(fd, ELF_C_READ, NULL);
+
+ rc = find_elf_code(elf, section, &sd, &sidx);
+ if (rc == 0)
+ rc = elf_reloc_code(elf, sd, sidx, prm);
+
+ if (rc == 0) {
+ np = prm[0];
+ np.ins = sd->d_buf;
+ np.nb_ins = sd->d_size / sizeof(struct bpf_insn);
+ bpf = rte_bpf_load(&np);
+ } else {
+ bpf = NULL;
+ rte_errno = -rc;
+ }
+
+ elf_end(elf);
+ return bpf;
+}
+
+__rte_experimental struct rte_bpf *
+rte_bpf_elf_load(const struct rte_bpf_prm *prm, const char *fname,
+ const char *sname)
+{
+ int32_t fd, rc;
+ struct rte_bpf *bpf;
+
+ if (prm == NULL || fname == NULL || sname == NULL) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ fd = open(fname, O_RDONLY);
+ if (fd < 0) {
+ rc = errno;
+ RTE_LOG(ERR, USER1, "%s(%s) error code: %d(%s)\n",
+ __func__, fname, rc, strerror(rc));
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ bpf = bpf_load_elf(prm, fd, sname);
+ close(fd);
+
+ if (bpf == NULL) {
+ RTE_LOG(ERR, USER1,
+ "%s(fname=\"%s\", sname=\"%s\") failed, "
+ "error code: %d\n",
+ __func__, fname, sname, rte_errno);
+ return NULL;
+ }
+
+ RTE_LOG(INFO, USER1, "%s(fname=\"%s\", sname=\"%s\") "
+ "successfully creates %p;\n",
+ __func__, fname, sname, bpf);
+ return bpf;
+}
diff --git a/lib/librte_bpf/bpf_validate.c b/lib/librte_bpf/bpf_validate.c
new file mode 100644
index 000000000..7c1267cbd
--- /dev/null
+++ b/lib/librte_bpf/bpf_validate.c
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_eal.h>
+
+#include "bpf_impl.h"
+
+/*
+ * dummy one for now, need more work.
+ */
+int
+bpf_validate(struct rte_bpf *bpf)
+{
+ int32_t rc, ofs, stack_sz;
+ uint32_t i, op, dr;
+ const struct bpf_insn *ins;
+
+ rc = 0;
+ stack_sz = 0;
+ for (i = 0; i != bpf->prm.nb_ins; i++) {
+
+ ins = bpf->prm.ins + i;
+ op = ins->code;
+ dr = ins->dst_reg;
+ ofs = ins->off;
+
+ if ((BPF_CLASS(op) == BPF_STX || BPF_CLASS(op) == BPF_ST) &&
+ dr == BPF_REG_10) {
+ ofs -= sizeof(uint64_t);
+ stack_sz = RTE_MIN(ofs, stack_sz);
+ }
+ }
+
+ if (stack_sz != 0) {
+ stack_sz = -stack_sz;
+ if (stack_sz > MAX_BPF_STACK_SIZE)
+ rc = -ERANGE;
+ else
+ bpf->stack_sz = stack_sz;
+ }
+
+ if (rc != 0)
+ RTE_LOG(ERR, USER1, "%s(%p) failed, error code: %d;\n",
+ __func__, bpf, rc);
+ return rc;
+}
diff --git a/lib/librte_bpf/rte_bpf.h b/lib/librte_bpf/rte_bpf.h
new file mode 100644
index 000000000..45f622818
--- /dev/null
+++ b/lib/librte_bpf/rte_bpf.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_BPF_H_
+#define _RTE_BPF_H_
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <linux/bpf.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Possible types for external symbols.
+ */
+enum rte_bpf_xtype {
+ RTE_BPF_XTYPE_FUNC, /**< function */
+ RTE_BPF_XTYPE_NUM
+};
+
+/**
+ * Definition for external symbols available in the BPF program.
+ */
+struct rte_bpf_xsym {
+ const char *name; /**< name */
+ enum rte_bpf_xtype type; /**< type */
+ uint64_t (*func)(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
+ /**< value */
+};
+
+/**
+ * Possible BPF program types.
+ */
+enum rte_bpf_prog_type {
+ RTE_BPF_PROG_TYPE_UNSPEC = BPF_PROG_TYPE_UNSPEC,
+ /**< input is a pointer to raw data */
+ RTE_BPF_PROG_TYPE_MBUF,
+ /**< input is a pointer to rte_mbuf */
+};
+
+/**
+ * Input parameters for loading eBPF code.
+ */
+struct rte_bpf_prm {
+ const struct bpf_insn *ins; /**< array of eBPF instructions */
+ uint32_t nb_ins; /**< number of instructions in ins */
+ const struct rte_bpf_xsym *xsym;
+ /**< array of external symbols that eBPF code is allowed to reference */
+ uint32_t nb_xsym; /**< number of elements in xsym */
+ enum rte_bpf_prog_type prog_type; /**< eBPF program type */
+};
+
+/**
+ * Information about compiled into native ISA eBPF code.
+ */
+struct rte_bpf_jit {
+ uint64_t (*func)(void *);
+ size_t sz;
+};
+
+struct rte_bpf;
+
+/**
+ * De-allocate all memory used by this eBPF execution context.
+ *
+ * @param bpf
+ * BPF handle to destroy.
+ */
+void rte_bpf_destroy(struct rte_bpf *bpf);
+
+/**
+ * Create a new eBPF execution context and load given BPF code into it.
+ *
+ * @param prm
+ * Parameters used to create and initialise the BPF exeution context.
+ * @return
+ * BPF handle that is used in future BPF operations,
+ * or NULL on error, with error code set in rte_errno.
+ * Possible rte_errno errors include:
+ * - EINVAL - invalid parameter passed to function
+ * - ENOMEM - can't reserve enough memory
+ */
+struct rte_bpf *rte_bpf_load(const struct rte_bpf_prm *prm);
+
+/**
+ * Create a new eBPF execution context and load BPF code from given ELF
+ * file into it.
+ *
+ * @param prm
+ * Parameters used to create and initialise the BPF exeution context.
+ * @param fname
+ * Pathname for a ELF file.
+ * @param sname
+ * Name of the executable section within the file to load.
+ * @return
+ * BPF handle that is used in future BPF operations,
+ * or NULL on error, with error code set in rte_errno.
+ * Possible rte_errno errors include:
+ * - EINVAL - invalid parameter passed to function
+ * - ENOMEM - can't reserve enough memory
+ */
+struct rte_bpf *rte_bpf_elf_load(const struct rte_bpf_prm *prm,
+ const char *fname, const char *sname);
+
+/**
+ * Execute given BPF bytecode.
+ *
+ * @param bpf
+ * handle for the BPF code to execute.
+ * @param ctx
+ * pointer to input context.
+ * @return
+ * BPF execution return value.
+ */
+uint64_t rte_bpf_exec(const struct rte_bpf *bpf, void *ctx);
+
+/**
+ * Execute given BPF bytecode over a set of input contexts.
+ *
+ * @param bpf
+ * handle for the BPF code to execute.
+ * @param ctx
+ * array of pointers to the input contexts.
+ * @param rc
+ * array of return values (one per input).
+ * @param num
+ * number of elements in ctx[] (and rc[]).
+ * @return
+ * number of successfully processed inputs.
+ */
+uint32_t rte_bpf_exec_burst(const struct rte_bpf *bpf, void *ctx[],
+ uint64_t rc[], uint32_t num);
+
+/**
+ * Provide information about natively compield code for given BPF handle.
+ *
+ * @param bpf
+ * handle for the BPF code.
+ * @param jit
+ * pointer to the rte_bpf_jit structure to be filled with related data.
+ * @return
+ * - -EINVAL if the parameters are invalid.
+ * - Zero if operation completed successfully.
+ */
+int rte_bpf_get_jit(const struct rte_bpf *bpf, struct rte_bpf_jit *jit);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BPF_H_ */
diff --git a/lib/librte_bpf/rte_bpf_version.map b/lib/librte_bpf/rte_bpf_version.map
new file mode 100644
index 000000000..ff65144df
--- /dev/null
+++ b/lib/librte_bpf/rte_bpf_version.map
@@ -0,0 +1,12 @@
+EXPERIMENTAL {
+ global:
+
+ rte_bpf_destroy;
+ rte_bpf_elf_load;
+ rte_bpf_exec;
+ rte_bpf_exec_burst;
+ rte_bpf_get_jit;
+ rte_bpf_load;
+
+ local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 3eb41d176..fb41c77d2 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -83,6 +83,8 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_POWER) += -lrte_power
_LDLIBS-$(CONFIG_RTE_LIBRTE_TIMER) += -lrte_timer
_LDLIBS-$(CONFIG_RTE_LIBRTE_EFD) += -lrte_efd
+_LDLIBS-$(CONFIG_RTE_LIBRTE_BPF) += -lrte_bpf -lelf
+
_LDLIBS-y += --whole-archive
_LDLIBS-$(CONFIG_RTE_LIBRTE_CFGFILE) += -lrte_cfgfile
--
2.13.6
^ permalink raw reply [flat|nested] 10+ messages in thread
* [dpdk-dev] [RFC PATCH 2/5] bpf: add JIT compilation for x86_64 ISA.
2018-03-08 1:29 [dpdk-dev] [RFC PATCH 0/5] add framework to load and execute BPF code Konstantin Ananyev
2018-03-08 1:29 ` [dpdk-dev] [RFC PATCH 1/5] bpf: add BPF loading and execution framework Konstantin Ananyev
@ 2018-03-08 1:29 ` Konstantin Ananyev
2018-03-08 1:30 ` [dpdk-dev] [RFC PATCH 3/5] bpf: introduce basic RX/TX BPF filters Konstantin Ananyev
` (2 subsequent siblings)
4 siblings, 0 replies; 10+ messages in thread
From: Konstantin Ananyev @ 2018-03-08 1:29 UTC (permalink / raw)
To: dev; +Cc: Konstantin Ananyev
Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
---
lib/librte_bpf/Makefile | 3 +
lib/librte_bpf/bpf.c | 4 +
lib/librte_bpf/bpf_jit_x86.c | 1155 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 1162 insertions(+)
create mode 100644 lib/librte_bpf/bpf_jit_x86.c
diff --git a/lib/librte_bpf/Makefile b/lib/librte_bpf/Makefile
index e0f434e77..44b12c439 100644
--- a/lib/librte_bpf/Makefile
+++ b/lib/librte_bpf/Makefile
@@ -23,6 +23,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf.c
SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_exec.c
SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_load.c
SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_validate.c
+ifeq ($(CONFIG_RTE_ARCH_X86_64),y)
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_jit_x86.c
+endif
# install header files
SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += rte_bpf.h
diff --git a/lib/librte_bpf/bpf.c b/lib/librte_bpf/bpf.c
index 4727d2251..b69d20fc8 100644
--- a/lib/librte_bpf/bpf.c
+++ b/lib/librte_bpf/bpf.c
@@ -39,7 +39,11 @@ bpf_jit(struct rte_bpf *bpf)
{
int32_t rc;
+#ifdef RTE_ARCH_X86_64
+ rc = bpf_jit_x86(bpf);
+#else
rc = -ENOTSUP;
+#endif
if (rc != 0)
RTE_LOG(WARNING, USER1, "%s(%p) failed, error code: %d;\n",
diff --git a/lib/librte_bpf/bpf_jit_x86.c b/lib/librte_bpf/bpf_jit_x86.c
new file mode 100644
index 000000000..b1ba37aec
--- /dev/null
+++ b/lib/librte_bpf/bpf_jit_x86.c
@@ -0,0 +1,1155 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_byteorder.h>
+
+#include "bpf_impl.h"
+
+#define GET_BPF_OP(op) (BPF_OP(op) >> 4)
+
+enum {
+ RAX = 0, /* scratch, return value */
+ RCX = 1, /* scratch, 4th arg */
+ RDX = 2, /* scratch, 3rd arg */
+ RBX = 3, /* callee saved */
+ RSP = 4, /* stack pointer */
+ RBP = 5, /* frame pointer, callee saved */
+ RSI = 6, /* scratch, 2nd arg */
+ RDI = 7, /* scratch, 1st arg */
+ R8 = 8, /* scratch, 5th arg */
+ R9 = 9, /* scratch, 6th arg */
+ R10 = 10, /* scratch */
+ R11 = 11, /* scratch */
+ R12 = 12, /* callee saved */
+ R13 = 13, /* callee saved */
+ R14 = 14, /* callee saved */
+ R15 = 15, /* callee saved */
+};
+
+#define IS_EXT_REG(r) ((r) >= R8)
+
+enum {
+ REX_PREFIX = 0x40, /* fixed value 0100 */
+ REX_W = 0x8, /* 64bit operand size */
+ REX_R = 0x4, /* extension of the ModRM.reg field */
+ REX_X = 0x2, /* extension of the SIB.index field */
+ REX_B = 0x1, /* extension of the ModRM.rm field */
+};
+
+enum {
+ MOD_INDIRECT = 0,
+ MOD_IDISP8 = 1,
+ MOD_IDISP32 = 2,
+ MOD_DIRECT = 3,
+};
+
+enum {
+ SIB_SCALE_1 = 0,
+ SIB_SCALE_2 = 1,
+ SIB_SCALE_4 = 2,
+ SIB_SCALE_8 = 3,
+};
+
+/*
+ * eBPF to x86_64 register mappings.
+ */
+static const uint32_t ebpf2x86[] = {
+ [BPF_REG_0] = RAX,
+ [BPF_REG_1] = RDI,
+ [BPF_REG_2] = RSI,
+ [BPF_REG_3] = RDX,
+ [BPF_REG_4] = RCX,
+ [BPF_REG_5] = R8,
+ [BPF_REG_6] = RBX,
+ [BPF_REG_7] = R13,
+ [BPF_REG_8] = R14,
+ [BPF_REG_9] = R15,
+ [BPF_REG_10] = RBP,
+};
+
+/*
+ * r10 and r11 are used as a scratch temporary registers.
+ */
+enum {
+ REG_DIV_IMM = R9,
+ REG_TMP0 = R11,
+ REG_TMP1 = R10,
+};
+
+/*
+ * callee saved registers list.
+ * keep RBP as the last one.
+ */
+static const uint32_t save_regs[] = {RBX, R12, R13, R14, R15, RBP};
+
+struct bpf_jit_state {
+ uint32_t idx;
+ size_t sz;
+ struct {
+ uint32_t num;
+ int32_t off;
+ } exit;
+ uint32_t reguse;
+ int32_t *off;
+ uint8_t *ins;
+};
+
+#define INUSE(v, r) (((v) >> (r)) & 1)
+#define USED(v, r) ((v) |= 1 << (r))
+
+union bpf_jit_imm {
+ uint32_t u32;
+ uint8_t u8[4];
+};
+
+static size_t
+bpf_size(uint32_t bpf_op_sz)
+{
+ if (bpf_op_sz == BPF_B)
+ return sizeof(uint8_t);
+ else if (bpf_op_sz == BPF_H)
+ return sizeof(uint16_t);
+ else if (bpf_op_sz == BPF_W)
+ return sizeof(uint32_t);
+ else if (bpf_op_sz == BPF_DW)
+ return sizeof(uint64_t);
+ return 0;
+}
+
+static size_t
+imm_size(int32_t v)
+{
+ if (v == (int8_t)v)
+ return sizeof(int8_t);
+ return sizeof(int32_t);
+}
+
+static void
+emit_bytes(struct bpf_jit_state *st, const uint8_t ins[], uint32_t sz)
+{
+ uint32_t i;
+
+ if (st->ins != NULL) {
+ for (i = 0; i != sz; i++)
+ st->ins[st->sz + i] = ins[i];
+ }
+ st->sz += sz;
+}
+
+static void
+emit_imm(struct bpf_jit_state *st, const uint32_t imm, uint32_t sz)
+{
+ union bpf_jit_imm v;
+
+ v.u32 = imm;
+ emit_bytes(st, v.u8, sz);
+}
+
+static void
+emit_rex(struct bpf_jit_state *st, uint32_t op, uint32_t reg, uint32_t rm)
+{
+ uint8_t rex;
+
+ /* mark operand registers as used*/
+ USED(st->reguse, reg);
+ USED(st->reguse, rm);
+
+ rex = 0;
+ if (BPF_CLASS(op) == BPF_ALU64 ||
+ op == (BPF_ST | BPF_MEM | BPF_DW) ||
+ op == (BPF_STX | BPF_MEM | BPF_DW) ||
+ op == (BPF_STX | BPF_XADD | BPF_DW) ||
+ op == (BPF_LD | BPF_IMM | BPF_DW) ||
+ (BPF_CLASS(op) == BPF_LDX &&
+ BPF_MODE(op) == BPF_MEM &&
+ BPF_SIZE(op) != BPF_W))
+ rex |= REX_W;
+
+ if (IS_EXT_REG(reg))
+ rex |= REX_R;
+
+ if (IS_EXT_REG(rm))
+ rex |= REX_B;
+
+ /* store using SIL, DIL */
+ if (op == (BPF_STX | BPF_MEM | BPF_B) && (reg == RDI || reg == RSI))
+ rex |= REX_PREFIX;
+
+ if (rex != 0) {
+ rex |= REX_PREFIX;
+ emit_bytes(st, &rex, sizeof(rex));
+ }
+}
+
+static void
+emit_modregrm(struct bpf_jit_state *st, uint32_t mod, uint32_t reg, uint32_t rm)
+{
+ uint8_t v;
+
+ v = mod << 6 | (reg & 7) << 3 | (rm & 7);
+ emit_bytes(st, &v, sizeof(v));
+}
+
+static void
+emit_sib(struct bpf_jit_state *st, uint32_t scale, uint32_t idx, uint32_t base)
+{
+ uint8_t v;
+
+ v = scale << 6 | (idx & 7) << 3 | (base & 7);
+ emit_bytes(st, &v, sizeof(v));
+}
+
+static void
+emit_xchg_reg(struct bpf_jit_state *st, uint32_t sreg, uint32_t dreg)
+{
+ static const uint8_t ops = 0x87;
+
+ emit_rex(st, BPF_ALU64, sreg, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, sreg, dreg);
+}
+
+static void
+emit_neg(struct bpf_jit_state *st, uint32_t op, uint32_t dreg)
+{
+ const uint8_t ops = 0xF7;
+ const uint8_t mods = 3;
+
+ emit_rex(st, op, 0, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, mods, dreg);
+}
+
+static void
+emit_mov_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+ uint32_t dreg)
+{
+ static const uint8_t ops = 0x89;
+
+ if (sreg != dreg || BPF_CLASS(op) == BPF_ALU) {
+ emit_rex(st, op, sreg, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, sreg, dreg);
+ }
+}
+
+static void
+emit_movzwl(struct bpf_jit_state *st, uint32_t sreg, uint32_t dreg)
+{
+ const uint8_t ops[] = {0x0F, 0xB7};
+
+ emit_rex(st, BPF_ALU, sreg, dreg);
+ emit_bytes(st, ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, sreg, dreg);
+}
+
+static void
+emit_ror_imm(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm)
+{
+ const uint8_t prfx = 0x66;
+ const uint8_t ops = 0xC1;
+ const uint8_t mods = 1;
+
+ emit_bytes(st, &prfx, sizeof(prfx));
+ emit_rex(st, BPF_ALU, 0, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, mods, dreg);
+ emit_imm(st, imm, imm_size(imm));
+}
+
+static void
+emit_be2le_48(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm)
+{
+ uint32_t rop;
+
+ const uint8_t ops = 0x0F;
+ const uint8_t mods = 1;
+
+ rop = (imm == 64) ? BPF_ALU64 : BPF_ALU;
+ emit_rex(st, rop, 0, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, mods, dreg);
+}
+
+static void
+emit_be2le(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm)
+{
+ if (imm == 16) {
+ emit_ror_imm(st, dreg, 8);
+ emit_movzwl(st, dreg, dreg);
+ } else
+ emit_be2le_48(st, dreg, imm);
+}
+
+static void
+emit_le2be(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm)
+{
+ if (imm == 16)
+ emit_movzwl(st, dreg, dreg);
+ else if (imm == 32)
+ emit_mov_reg(st, BPF_ALU | BPF_MOV | BPF_X, dreg, dreg);
+}
+
+static void
+emit_alu_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm)
+{
+ uint8_t mod, opcode;
+ uint32_t bop, imsz;
+
+ const uint8_t op8 = 0x83;
+ const uint8_t op32 = 0x81;
+ const uint8_t mods[] = {
+ [GET_BPF_OP(BPF_ADD)] = 0,
+ [GET_BPF_OP(BPF_AND)] = 4,
+ [GET_BPF_OP(BPF_OR)] = 1,
+ [GET_BPF_OP(BPF_SUB)] = 5,
+ [GET_BPF_OP(BPF_XOR)] = 6,
+ };
+
+ bop = GET_BPF_OP(op);
+ mod = mods[bop];
+
+ imsz = imm_size(imm);
+ opcode = (imsz == 1) ? op8 : op32;
+
+ emit_rex(st, op, 0, dreg);
+ emit_bytes(st, &opcode, sizeof(opcode));
+ emit_modregrm(st, MOD_DIRECT, mod, dreg);
+ emit_imm(st, imm, imsz);
+}
+
+static void
+emit_alu_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+ uint32_t dreg)
+{
+ uint32_t bop;
+
+ static const uint8_t ops[] = {
+ [GET_BPF_OP(BPF_ADD)] = 0x01,
+ [GET_BPF_OP(BPF_AND)] = 0x21,
+ [GET_BPF_OP(BPF_OR)] = 0x09,
+ [GET_BPF_OP(BPF_SUB)] = 0x29,
+ [GET_BPF_OP(BPF_XOR)] = 0x31,
+ };
+
+ bop = GET_BPF_OP(op);
+
+ emit_rex(st, op, sreg, dreg);
+ emit_bytes(st, &ops[bop], sizeof(ops[bop]));
+ emit_modregrm(st, MOD_DIRECT, sreg, dreg);
+}
+
+static void
+emit_shift(struct bpf_jit_state *st, uint32_t op, uint32_t dreg)
+{
+ uint8_t mod;
+ uint32_t bop, opx;
+
+ static const uint8_t ops[] = {0xC1, 0xD3};
+ static const uint8_t mods[] = {
+ [GET_BPF_OP(BPF_LSH)] = 4,
+ [GET_BPF_OP(BPF_RSH)] = 5,
+ [GET_BPF_OP(BPF_ARSH)] = 7,
+ };
+
+ bop = GET_BPF_OP(op);
+ mod = mods[bop];
+ opx = (BPF_SRC(op) == BPF_X);
+
+ emit_rex(st, op, 0, dreg);
+ emit_bytes(st, &ops[opx], sizeof(ops[opx]));
+ emit_modregrm(st, MOD_DIRECT, mod, dreg);
+}
+
+static void
+emit_shift_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg,
+ uint32_t imm)
+{
+ emit_shift(st, op, dreg);
+ emit_imm(st, imm, imm_size(imm));
+}
+
+static void
+emit_shift_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+ uint32_t dreg)
+{
+ if (sreg != RCX)
+ emit_xchg_reg(st, RCX, sreg);
+
+ emit_shift(st, op, dreg);
+
+ if (sreg != RCX)
+ emit_xchg_reg(st, RCX, sreg);
+}
+
+static void
+emit_mov_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm)
+{
+ static const uint8_t ops = 0xC7;
+
+ if (imm == 0) {
+ /* replace 'mov <dst> 0' with 'xor <dst> <dst> */
+ op = BPF_CLASS(op) | BPF_XOR | BPF_X;
+ emit_alu_reg(st, op, dreg, dreg);
+ return;
+ }
+
+ emit_rex(st, op, 0, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, 0, dreg);
+ emit_imm(st, imm, sizeof(imm));
+}
+
+static void
+emit_ld_imm64(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm0,
+ uint32_t imm1)
+{
+ static const uint8_t ops = 0xB8;
+
+ if (imm1 == 0) {
+ emit_mov_imm(st, BPF_ALU64 | BPF_MOV | BPF_K, dreg, imm0);
+ return;
+ }
+
+ emit_rex(st, BPF_ALU64, 0, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, 0, dreg);
+
+ emit_imm(st, imm0, sizeof(imm0));
+ emit_imm(st, imm1, sizeof(imm1));
+}
+
+static void
+emit_mul(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, uint32_t dreg,
+ uint32_t imm)
+{
+ const uint8_t ops = 0xF7;
+ const uint8_t mods = 4;
+
+ /* save rax & rdx */
+ emit_mov_reg(st, BPF_ALU64 | BPF_MOV | BPF_X, RAX, REG_TMP0);
+ emit_mov_reg(st, BPF_ALU64 | BPF_MOV | BPF_X, RDX, REG_TMP1);
+
+ /* rax = dreg */
+ emit_mov_reg(st, BPF_ALU64 | BPF_MOV | BPF_X, dreg, RAX);
+
+ if (BPF_CLASS(op) == BPF_X)
+ /* rdx = sreg */
+ emit_mov_reg(st, BPF_ALU64 | BPF_MOV | BPF_X,
+ sreg == RAX ? REG_TMP0 : sreg, RDX);
+ else
+ /* rdx = imm */
+ emit_mov_imm(st, BPF_ALU64 | BPF_MOV | BPF_K, RDX, imm);
+
+ emit_rex(st, op, RAX, RDX);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, mods, RDX);
+
+ if (dreg != RDX)
+ /* restore rdx */
+ emit_mov_reg(st, BPF_ALU64 | BPF_MOV | BPF_X, REG_TMP1, RDX);
+
+ if (dreg != RAX) {
+ /* dreg = rax */
+ emit_mov_reg(st, BPF_ALU64 | BPF_MOV | BPF_X, RAX, dreg);
+ /* restore rax */
+ emit_mov_reg(st, BPF_ALU64 | BPF_MOV | BPF_X, REG_TMP0, RAX);
+ }
+}
+
+static void
+emit_ld_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, uint32_t dreg,
+ int32_t ofs)
+{
+ uint32_t mods, opsz;
+ const uint8_t op32 = 0x8B;
+ const uint8_t op16[] = {0x0F, 0xB7};
+ const uint8_t op8[] = {0x0F, 0xB6};
+
+ emit_rex(st, op, dreg, sreg);
+
+ opsz = BPF_SIZE(op);
+ if (opsz == BPF_B)
+ emit_bytes(st, op8, sizeof(op8));
+ else if (opsz == BPF_H)
+ emit_bytes(st, op16, sizeof(op16));
+ else
+ emit_bytes(st, &op32, sizeof(op32));
+
+ mods = (imm_size(ofs) == 1) ? MOD_IDISP8 : MOD_IDISP32;
+
+ emit_modregrm(st, mods, dreg, sreg);
+ if (sreg == RSP || sreg == R12)
+ emit_sib(st, SIB_SCALE_1, sreg, sreg);
+ emit_imm(st, ofs, imm_size(ofs));
+}
+
+static void
+emit_st_common(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+ uint32_t dreg, uint32_t imm, int32_t ofs)
+{
+ uint32_t mods, imsz, opsz, opx;
+ const uint8_t prfx16 = 0x66;
+
+ /* 8 bit instruction opcodes */
+ const uint8_t op8[] = {0xC6, 0x88};
+
+ /* 16/32/64 bit instruction opcodes */
+ const uint8_t ops[] = {0xC7, 0x89};
+
+ /* is the instruction has immediate value or src reg? */
+ opx = (BPF_CLASS(op) == BPF_STX);
+
+ opsz = BPF_SIZE(op);
+ if (opsz == BPF_H)
+ emit_bytes(st, &prfx16, sizeof(prfx16));
+
+ emit_rex(st, op, sreg, dreg);
+
+ if (opsz == BPF_B)
+ emit_bytes(st, &op8[opx], sizeof(op8[opx]));
+ else
+ emit_bytes(st, &ops[opx], sizeof(ops[opx]));
+
+ imsz = imm_size(ofs);
+ mods = (imsz == 1) ? MOD_IDISP8 : MOD_IDISP32;
+
+ emit_modregrm(st, mods, sreg, dreg);
+
+ if (dreg == RSP || dreg == R12)
+ emit_sib(st, SIB_SCALE_1, dreg, dreg);
+
+ emit_imm(st, ofs, imsz);
+
+ if (opx == 0)
+ emit_imm(st, imm, bpf_size(opsz));
+}
+
+static void
+emit_st_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm,
+ int32_t ofs)
+{
+ emit_st_common(st, op, 0, dreg, imm, ofs);
+}
+
+static void
+emit_st_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, uint32_t dreg,
+ int32_t ofs)
+{
+ emit_st_common(st, op, sreg, dreg, 0, ofs);
+}
+
+static void
+emit_st_xadd(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+ uint32_t dreg, int32_t ofs)
+{
+ uint32_t imsz, mods;
+
+ const uint8_t lck = 0xF0; /* lock prefix */
+ const uint8_t ops = 0x01; /* add opcode */
+
+ imsz = imm_size(ofs);
+ mods = (imsz == 1) ? MOD_IDISP8 : MOD_IDISP32;
+
+ emit_bytes(st, &lck, sizeof(lck));
+ emit_rex(st, op, sreg, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, mods, sreg, dreg);
+ emit_imm(st, ofs, imsz);
+}
+
+static void
+emit_call(struct bpf_jit_state *st, uintptr_t trg)
+{
+ const uint8_t ops = 0xFF;
+ const uint8_t mods = 2;
+
+ emit_ld_imm64(st, RAX, trg, trg >> 32);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, mods, RAX);
+}
+
+static void
+emit_jmp(struct bpf_jit_state *st, int32_t ofs)
+{
+ int32_t joff;
+ uint32_t imsz;
+
+ const uint8_t op8 = 0xEB;
+ const uint8_t op32 = 0xE9;
+
+ const int32_t sz8 = sizeof(op8) + sizeof(uint8_t);
+ const int32_t sz32 = sizeof(op32) + sizeof(uint32_t);
+
+ /* max possible jmp instruction size */
+ const int32_t iszm = RTE_MAX(sz8, sz32);
+
+ joff = st->off[st->idx + ofs] - st->sz;
+ imsz = RTE_MAX(imm_size(joff), imm_size(joff + iszm));
+
+ if (imsz == 1) {
+ emit_bytes(st, &op8, sizeof(op8));
+ joff -= sz8;
+ } else {
+ emit_bytes(st, &op32, sizeof(op32));
+ joff -= sz32;
+ }
+
+ emit_imm(st, joff, imsz);
+}
+
+static void
+emit_movcc_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+ uint32_t dreg)
+{
+ uint32_t bop;
+
+ static const uint8_t ops[][2] = {
+ [GET_BPF_OP(BPF_JEQ)] = {0x0F, 0x44}, /* CMOVZ */
+ [GET_BPF_OP(BPF_JNE)] = {0x0F, 0x45}, /* CMOVNE */
+ [GET_BPF_OP(BPF_JGT)] = {0x0F, 0x47}, /* CMOVA */
+ [GET_BPF_OP(BPF_JLT)] = {0x0F, 0x42}, /* CMOVB */
+ [GET_BPF_OP(BPF_JGE)] = {0x0F, 0x43}, /* CMOVAE */
+ [GET_BPF_OP(BPF_JLE)] = {0x0F, 0x46}, /* CMOVBE */
+ [GET_BPF_OP(BPF_JSGT)] = {0x0F, 0x4F}, /* CMOVG */
+ [GET_BPF_OP(BPF_JSLT)] = {0x0F, 0x4C}, /* CMOVL */
+ [GET_BPF_OP(BPF_JSGE)] = {0x0F, 0x4D}, /* CMOVGE */
+ [GET_BPF_OP(BPF_JSLE)] = {0x0F, 0x4E}, /* CMOVLE */
+ [GET_BPF_OP(BPF_JSET)] = {0x0F, 0x45}, /* CMOVNE */
+ };
+
+ bop = GET_BPF_OP(op);
+
+ emit_rex(st, op, sreg, dreg);
+ emit_bytes(st, ops[bop], sizeof(ops[bop]));
+ emit_modregrm(st, MOD_DIRECT, sreg, dreg);
+}
+
+static void
+emit_jcc(struct bpf_jit_state *st, uint32_t op, int32_t ofs)
+{
+ uint32_t bop, imsz;
+ int32_t joff;
+
+ static const uint8_t op8[] = {
+ [GET_BPF_OP(BPF_JEQ)] = 0x74, /* JE */
+ [GET_BPF_OP(BPF_JNE)] = 0x75, /* JNE */
+ [GET_BPF_OP(BPF_JGT)] = 0x77, /* JA */
+ [GET_BPF_OP(BPF_JLT)] = 0x72, /* JB */
+ [GET_BPF_OP(BPF_JGE)] = 0x73, /* JAE */
+ [GET_BPF_OP(BPF_JLE)] = 0x76, /* JBE */
+ [GET_BPF_OP(BPF_JSGT)] = 0x7F, /* JG */
+ [GET_BPF_OP(BPF_JSLT)] = 0x7C, /* JL */
+ [GET_BPF_OP(BPF_JSGE)] = 0x7D, /*JGE */
+ [GET_BPF_OP(BPF_JSLE)] = 0x7E, /* JLE */
+ [GET_BPF_OP(BPF_JSET)] = 0x75, /*JNE */
+ };
+
+ static const uint8_t op32[][2] = {
+ [GET_BPF_OP(BPF_JEQ)] = {0x0F, 0x84}, /* JE */
+ [GET_BPF_OP(BPF_JNE)] = {0x0F, 0x85}, /* JNE */
+ [GET_BPF_OP(BPF_JGT)] = {0x0F, 0x87}, /* JA */
+ [GET_BPF_OP(BPF_JLT)] = {0x0F, 0x82}, /* JB */
+ [GET_BPF_OP(BPF_JGE)] = {0x0F, 0x83}, /* JAE */
+ [GET_BPF_OP(BPF_JLE)] = {0x0F, 0x86}, /* JBE */
+ [GET_BPF_OP(BPF_JSGT)] = {0x0F, 0x8F}, /* JG */
+ [GET_BPF_OP(BPF_JSLT)] = {0x0F, 0x8C}, /* JL */
+ [GET_BPF_OP(BPF_JSGE)] = {0x0F, 0x8D}, /*JGE */
+ [GET_BPF_OP(BPF_JSLE)] = {0x0F, 0x8E}, /* JLE */
+ [GET_BPF_OP(BPF_JSET)] = {0x0F, 0x85}, /*JNE */
+ };
+
+ const int32_t sz8 = sizeof(op8[0]) + sizeof(uint8_t);
+ const int32_t sz32 = sizeof(op32[0]) + sizeof(uint32_t);
+
+ /* max possible jcc instruction size */
+ const int32_t iszm = RTE_MAX(sz8, sz32);
+
+ joff = st->off[st->idx + ofs] - st->sz;
+ imsz = RTE_MAX(imm_size(joff), imm_size(joff + iszm));
+
+ bop = GET_BPF_OP(op);
+
+ if (imsz == 1) {
+ emit_bytes(st, &op8[bop], sizeof(op8[bop]));
+ joff -= sz8;
+ } else {
+ emit_bytes(st, op32[bop], sizeof(op32[bop]));
+ joff -= sz32;
+ }
+
+ emit_imm(st, joff, imsz);
+}
+
+static void
+emit_cmp_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm)
+{
+ uint8_t ops;
+ uint32_t imsz;
+
+ const uint8_t op8 = 0x83;
+ const uint8_t op32 = 0x81;
+ const uint8_t mods = 7;
+
+ imsz = imm_size(imm);
+ ops = (imsz == 1) ? op8 : op32;
+
+ emit_rex(st, op, 0, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, mods, dreg);
+ emit_imm(st, imm, imsz);
+}
+
+static void
+emit_tst_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg, uint32_t imm)
+{
+ const uint8_t ops = 0xF7;
+ const uint8_t mods = 0;
+
+ emit_rex(st, op, 0, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, mods, dreg);
+ emit_imm(st, imm, imm_size(imm));
+}
+
+static void
+emit_jcc_imm(struct bpf_jit_state *st, uint32_t op, uint32_t dreg,
+ uint32_t imm, int32_t ofs)
+{
+ if (BPF_OP(op) == BPF_JSET)
+ emit_tst_imm(st, BPF_ALU64, dreg, imm);
+ else
+ emit_cmp_imm(st, BPF_ALU64, dreg, imm);
+
+ emit_jcc(st, op, ofs);
+}
+
+static void
+emit_tst_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+ uint32_t dreg)
+{
+ const uint8_t ops = 0x85;
+
+ emit_rex(st, op, sreg, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, sreg, dreg);
+}
+
+static void
+emit_cmp_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+ uint32_t dreg)
+{
+ const uint8_t ops = 0x39;
+
+ emit_rex(st, op, sreg, dreg);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, sreg, dreg);
+
+}
+
+static void
+emit_jcc_reg(struct bpf_jit_state *st, uint32_t op, uint32_t sreg,
+ uint32_t dreg, int32_t ofs)
+{
+ if (BPF_OP(op) == BPF_JSET)
+ emit_tst_reg(st, BPF_ALU64, sreg, dreg);
+ else
+ emit_cmp_reg(st, BPF_ALU64, sreg, dreg);
+
+ emit_jcc(st, op, ofs);
+}
+
+static void
+emit_div(struct bpf_jit_state *st, uint32_t op, uint32_t sreg, uint32_t dreg,
+ uint32_t imm)
+{
+ uint32_t sr;
+
+ const uint8_t ops = 0xF7;
+ const uint8_t mods = 6;
+
+ if (BPF_SRC(op) == BPF_X) {
+ emit_tst_reg(st, BPF_CLASS(op), sreg, sreg);
+ emit_movcc_reg(st, BPF_CLASS(op) | BPF_JEQ | BPF_X, sreg, RAX);
+ emit_jcc(st, BPF_JMP | BPF_JEQ | BPF_K, st->exit.off);
+ }
+
+ /* save rax & rdx */
+ if (dreg != RAX)
+ emit_mov_reg(st, BPF_ALU64 | BPF_MOV | BPF_X, RAX, REG_TMP0);
+ if (dreg != RDX)
+ emit_mov_reg(st, BPF_ALU64 | BPF_MOV | BPF_X, RDX, REG_TMP1);
+
+ /* fill rax & rdx */
+ emit_mov_reg(st, BPF_ALU64 | BPF_MOV | BPF_X, dreg, RAX);
+ emit_mov_imm(st, BPF_ALU64 | BPF_MOV | BPF_K, RDX, 0);
+
+ if (BPF_SRC(op) == BPF_X) {
+ sr = sreg;
+ if (sr == RAX)
+ sr = REG_TMP0;
+ else if (sr == RDX)
+ sr = REG_TMP1;
+ } else {
+ sr = REG_DIV_IMM;
+ emit_mov_imm(st, BPF_ALU64 | BPF_MOV | BPF_K, sr, imm);
+ }
+
+ emit_rex(st, op, 0, sr);
+ emit_bytes(st, &ops, sizeof(ops));
+ emit_modregrm(st, MOD_DIRECT, mods, sr);
+
+ if (BPF_OP(op) == BPF_DIV)
+ emit_mov_reg(st, BPF_ALU64 | BPF_MOV | BPF_X, RAX, dreg);
+ else
+ emit_mov_reg(st, BPF_ALU64 | BPF_MOV | BPF_X, RDX, dreg);
+
+ if (dreg != RAX)
+ emit_mov_reg(st, BPF_ALU64 | BPF_MOV | BPF_X, REG_TMP0, RAX);
+ if (dreg != RDX)
+ emit_mov_reg(st, BPF_ALU64 | BPF_MOV | BPF_X, REG_TMP1, RDX);
+}
+
+static void
+emit_prolog(struct bpf_jit_state *st, int32_t stack_size)
+{
+ uint32_t i;
+ int32_t spil, ofs;
+
+ spil = 0;
+ for (i = 0; i != RTE_DIM(save_regs); i++)
+ spil += INUSE(st->reguse, save_regs[i]);
+
+ /* we can avoid touching the stack at all */
+ if (spil == 0)
+ return;
+
+
+ emit_alu_imm(st, BPF_ALU64 | BPF_SUB | BPF_K, RSP,
+ spil * sizeof(uint64_t));
+
+ ofs = 0;
+ for (i = 0; i != RTE_DIM(save_regs); i++) {
+ if (INUSE(st->reguse, save_regs[i]) != 0) {
+ emit_st_reg(st, BPF_STX | BPF_MEM | BPF_DW,
+ save_regs[i], RSP, ofs);
+ ofs += sizeof(uint64_t);
+ }
+ }
+
+ if (INUSE(st->reguse, RBP) != 0) {
+ emit_mov_reg(st, BPF_ALU64 | BPF_MOV | BPF_X, RSP, RBP);
+ emit_alu_imm(st, BPF_ALU64 | BPF_SUB | BPF_K, RSP, stack_size);
+ }
+}
+
+static void
+emit_ret(struct bpf_jit_state *st)
+{
+ const uint8_t ops = 0xC3;
+
+ emit_bytes(st, &ops, sizeof(ops));
+}
+
+static void
+emit_epilog(struct bpf_jit_state *st)
+{
+ uint32_t i;
+ int32_t spil, ofs;
+
+ /* if we allready have an epilog generate a jump to it */
+ if (st->exit.num++ != 0) {
+ emit_jcc(st, BPF_JMP | BPF_JA | BPF_K, st->exit.off);
+ return;
+ }
+
+ /* store offset of epilog block */
+ st->exit.off = st->sz;
+
+ spil = 0;
+ for (i = 0; i != RTE_DIM(save_regs); i++)
+ spil += INUSE(st->reguse, save_regs[i]);
+
+ if (spil != 0) {
+
+ if (INUSE(st->reguse, RBP) != 0)
+ emit_mov_reg(st, BPF_ALU64 | BPF_MOV | BPF_X, RBP, RSP);
+
+ ofs = 0;
+ for (i = 0; i != RTE_DIM(save_regs); i++) {
+ if (INUSE(st->reguse, save_regs[i]) != 0) {
+ emit_ld_reg(st, BPF_LDX | BPF_MEM | BPF_DW,
+ RSP, save_regs[i], ofs);
+ ofs += sizeof(uint64_t);
+ }
+ }
+
+ emit_alu_imm(st, BPF_ALU64 | BPF_ADD | BPF_K, RSP,
+ spil * sizeof(uint64_t));
+ }
+
+ emit_ret(st);
+}
+
+static int
+emit(struct bpf_jit_state *st, const struct rte_bpf *bpf)
+{
+ uint32_t i, dr, op, sr;
+ const struct bpf_insn *ins;
+
+ /* reset state fields */
+ st->sz = 0;
+ st->exit.num = 0;
+
+ emit_prolog(st, bpf->stack_sz);
+
+ for (i = 0; i != bpf->prm.nb_ins; i++) {
+
+ st->idx = i;
+ st->off[i] = st->sz;
+
+ ins = bpf->prm.ins + i;
+
+ dr = ebpf2x86[ins->dst_reg];
+ sr = ebpf2x86[ins->src_reg];
+ op = ins->code;
+
+ switch (op) {
+ /* 32 bit ALU IMM operations */
+ case (BPF_ALU | BPF_ADD | BPF_K):
+ case (BPF_ALU | BPF_SUB | BPF_K):
+ case (BPF_ALU | BPF_AND | BPF_K):
+ case (BPF_ALU | BPF_OR | BPF_K):
+ case (BPF_ALU | BPF_XOR | BPF_K):
+ emit_alu_imm(st, op, dr, ins->imm);
+ break;
+ case (BPF_ALU | BPF_LSH | BPF_K):
+ case (BPF_ALU | BPF_RSH | BPF_K):
+ emit_shift_imm(st, op, dr, ins->imm);
+ break;
+ case (BPF_ALU | BPF_MOV | BPF_K):
+ emit_mov_imm(st, op, dr, ins->imm);
+ break;
+ /* 32 bit ALU REG operations */
+ case (BPF_ALU | BPF_ADD | BPF_X):
+ case (BPF_ALU | BPF_SUB | BPF_X):
+ case (BPF_ALU | BPF_AND | BPF_X):
+ case (BPF_ALU | BPF_OR | BPF_X):
+ case (BPF_ALU | BPF_XOR | BPF_X):
+ emit_alu_reg(st, op, sr, dr);
+ break;
+ case (BPF_ALU | BPF_LSH | BPF_X):
+ case (BPF_ALU | BPF_RSH | BPF_X):
+ emit_shift_reg(st, op, sr, dr);
+ break;
+ case (BPF_ALU | BPF_MOV | BPF_X):
+ emit_mov_reg(st, op, sr, dr);
+ break;
+ case (BPF_ALU | BPF_NEG):
+ emit_neg(st, op, dr);
+ break;
+ case (BPF_ALU | BPF_END | BPF_TO_BE):
+ emit_be2le(st, dr, ins->imm);
+ break;
+ case (BPF_ALU | BPF_END | BPF_TO_LE):
+ emit_le2be(st, dr, ins->imm);
+ break;
+ /* 64 bit ALU IMM operations */
+ case (BPF_ALU64 | BPF_ADD | BPF_K):
+ case (BPF_ALU64 | BPF_SUB | BPF_K):
+ case (BPF_ALU64 | BPF_AND | BPF_K):
+ case (BPF_ALU64 | BPF_OR | BPF_K):
+ case (BPF_ALU64 | BPF_XOR | BPF_K):
+ emit_alu_imm(st, op, dr, ins->imm);
+ break;
+ case (BPF_ALU64 | BPF_LSH | BPF_K):
+ case (BPF_ALU64 | BPF_RSH | BPF_K):
+ case (BPF_ALU64 | BPF_ARSH | BPF_K):
+ emit_shift_imm(st, op, dr, ins->imm);
+ break;
+ case (BPF_ALU64 | BPF_MOV | BPF_K):
+ emit_mov_imm(st, op, dr, ins->imm);
+ break;
+ /* 64 bit ALU REG operations */
+ case (BPF_ALU64 | BPF_ADD | BPF_X):
+ case (BPF_ALU64 | BPF_SUB | BPF_X):
+ case (BPF_ALU64 | BPF_AND | BPF_X):
+ case (BPF_ALU64 | BPF_OR | BPF_X):
+ case (BPF_ALU64 | BPF_XOR | BPF_X):
+ emit_alu_reg(st, op, sr, dr);
+ break;
+ case (BPF_ALU64 | BPF_LSH | BPF_X):
+ case (BPF_ALU64 | BPF_RSH | BPF_X):
+ case (BPF_ALU64 | BPF_ARSH | BPF_X):
+ emit_shift_reg(st, op, sr, dr);
+ break;
+ case (BPF_ALU64 | BPF_MOV | BPF_X):
+ emit_mov_reg(st, op, sr, dr);
+ break;
+ case (BPF_ALU64 | BPF_NEG):
+ emit_neg(st, op, dr);
+ break;
+ /* multiply instructions */
+ case (BPF_ALU | BPF_MUL | BPF_K):
+ case (BPF_ALU | BPF_MUL | BPF_X):
+ case (BPF_ALU64 | BPF_MUL | BPF_K):
+ case (BPF_ALU64 | BPF_MUL | BPF_X):
+ emit_mul(st, op, sr, dr, ins->imm);
+ break;
+ /* divide instructions */
+ case (BPF_ALU | BPF_DIV | BPF_K):
+ case (BPF_ALU | BPF_MOD | BPF_K):
+ case (BPF_ALU | BPF_DIV | BPF_X):
+ case (BPF_ALU | BPF_MOD | BPF_X):
+ case (BPF_ALU64 | BPF_DIV | BPF_K):
+ case (BPF_ALU64 | BPF_MOD | BPF_K):
+ case (BPF_ALU64 | BPF_DIV | BPF_X):
+ case (BPF_ALU64 | BPF_MOD | BPF_X):
+ emit_div(st, op, sr, dr, ins->imm);
+ break;
+ /* load instructions */
+ case (BPF_LDX | BPF_MEM | BPF_B):
+ case (BPF_LDX | BPF_MEM | BPF_H):
+ case (BPF_LDX | BPF_MEM | BPF_W):
+ case (BPF_LDX | BPF_MEM | BPF_DW):
+ emit_ld_reg(st, op, sr, dr, ins->off);
+ break;
+ /* load 64 bit immediate value */
+ case (BPF_LD | BPF_IMM | BPF_DW):
+ emit_ld_imm64(st, dr, ins[0].imm, ins[1].imm);
+ ins++;
+ break;
+ /* store instructions */
+ case (BPF_STX | BPF_MEM | BPF_B):
+ case (BPF_STX | BPF_MEM | BPF_H):
+ case (BPF_STX | BPF_MEM | BPF_W):
+ case (BPF_STX | BPF_MEM | BPF_DW):
+ emit_st_reg(st, op, sr, dr, ins->off);
+ break;
+ case (BPF_ST | BPF_MEM | BPF_B):
+ case (BPF_ST | BPF_MEM | BPF_H):
+ case (BPF_ST | BPF_MEM | BPF_W):
+ case (BPF_ST | BPF_MEM | BPF_DW):
+ emit_st_imm(st, op, dr, ins->imm, ins->off);
+ break;
+ /* atomic add instructions */
+ case (BPF_STX | BPF_XADD | BPF_W):
+ case (BPF_STX | BPF_XADD | BPF_DW):
+ emit_st_xadd(st, op, sr, dr, ins->off);
+ break;
+ /* jump instructions */
+ case (BPF_JMP | BPF_JA):
+ emit_jmp(st, ins->off + 1);
+ break;
+ /* jump IMM instructions */
+ case (BPF_JMP | BPF_JEQ | BPF_K):
+ case (BPF_JMP | BPF_JNE | BPF_K):
+ case (BPF_JMP | BPF_JGT | BPF_K):
+ case (BPF_JMP | BPF_JLT | BPF_K):
+ case (BPF_JMP | BPF_JGE | BPF_K):
+ case (BPF_JMP | BPF_JLE | BPF_K):
+ case (BPF_JMP | BPF_JSGT | BPF_K):
+ case (BPF_JMP | BPF_JSLT | BPF_K):
+ case (BPF_JMP | BPF_JSGE | BPF_K):
+ case (BPF_JMP | BPF_JSLE | BPF_K):
+ case (BPF_JMP | BPF_JSET | BPF_K):
+ emit_jcc_imm(st, op, dr, ins->imm, ins->off + 1);
+ break;
+ /* jump REG instructions */
+ case (BPF_JMP | BPF_JEQ | BPF_X):
+ case (BPF_JMP | BPF_JNE | BPF_X):
+ case (BPF_JMP | BPF_JGT | BPF_X):
+ case (BPF_JMP | BPF_JLT | BPF_X):
+ case (BPF_JMP | BPF_JGE | BPF_X):
+ case (BPF_JMP | BPF_JLE | BPF_X):
+ case (BPF_JMP | BPF_JSGT | BPF_X):
+ case (BPF_JMP | BPF_JSLT | BPF_X):
+ case (BPF_JMP | BPF_JSGE | BPF_X):
+ case (BPF_JMP | BPF_JSLE | BPF_X):
+ case (BPF_JMP | BPF_JSET | BPF_X):
+ emit_jcc_reg(st, op, sr, dr, ins->off + 1);
+ break;
+ /* call instructions */
+ case (BPF_JMP | BPF_CALL):
+ emit_call(st, (uintptr_t)bpf->prm.xsym[ins->imm].func);
+ break;
+ /* return instruction */
+ case (BPF_JMP | BPF_EXIT):
+ emit_epilog(st);
+ break;
+ default:
+ RTE_LOG(ERR, USER1,
+ "%s(%p): invalid opcode %#x at pc: %#zx;\n",
+ __func__, bpf, ins->code,
+ (uintptr_t)ins - (uintptr_t)bpf->prm.ins);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+int
+bpf_jit_x86(struct rte_bpf *bpf)
+{
+ int32_t rc;
+ uint32_t i;
+ size_t sz;
+ struct bpf_jit_state st;
+
+ /* init state */
+ memset(&st, 0, sizeof(st));
+ st.off = malloc(bpf->prm.nb_ins * sizeof(st.off[0]));
+ if (st.off == NULL)
+ return -ENOMEM;
+
+ /* fill with fake offsets */
+ st.exit.off = INT32_MAX;
+ for (i = 0; i != bpf->prm.nb_ins; i++)
+ st.off[i] = INT32_MAX;
+
+ /*
+ * dry runs, used to calculate total code size and valid jump offsets.
+ * stop when we get minimal possible size
+ */
+ do {
+ sz = st.sz;
+ rc = emit(&st, bpf);
+ } while (rc == 0 && sz != st.sz);
+
+ if (rc == 0) {
+
+ /* allocate memory needed */
+ st.ins = mmap(NULL, st.sz, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (st.ins == MAP_FAILED)
+ rc = -ENOMEM;
+ else
+ /* generate code */
+ rc = emit(&st, bpf);
+ }
+
+ if (rc == 0 && mprotect(st.ins, st.sz, PROT_READ | PROT_EXEC) != 0)
+ rc = -ENOMEM;
+
+ if (rc != 0)
+ munmap(st.ins, st.sz);
+ else {
+ bpf->jit.func = (void *)st.ins;
+ bpf->jit.sz = st.sz;
+ }
+
+ free(st.off);
+ return rc;
+}
--
2.13.6
^ permalink raw reply [flat|nested] 10+ messages in thread
* [dpdk-dev] [RFC PATCH 3/5] bpf: introduce basic RX/TX BPF filters
2018-03-08 1:29 [dpdk-dev] [RFC PATCH 0/5] add framework to load and execute BPF code Konstantin Ananyev
2018-03-08 1:29 ` [dpdk-dev] [RFC PATCH 1/5] bpf: add BPF loading and execution framework Konstantin Ananyev
2018-03-08 1:29 ` [dpdk-dev] [RFC PATCH 2/5] bpf: add JIT compilation for x86_64 ISA Konstantin Ananyev
@ 2018-03-08 1:30 ` Konstantin Ananyev
2018-03-08 1:30 ` [dpdk-dev] [RFC PATCH 4/5] testpmd: new commands to load/unload " Konstantin Ananyev
2018-03-08 1:30 ` [dpdk-dev] [RFC PATCH 5/5] test: add few eBPF samples Konstantin Ananyev
4 siblings, 0 replies; 10+ messages in thread
From: Konstantin Ananyev @ 2018-03-08 1:30 UTC (permalink / raw)
To: dev; +Cc: Konstantin Ananyev
Introduce API to install BPF based filters on ethdev RX/TX path.
Current implementation is pure SW one, based on ethdev RX/TX callback mechanism.
Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
---
lib/librte_bpf/Makefile | 2 +
lib/librte_bpf/bpf_pkt.c | 524 +++++++++++++++++++++++++++++++++++++
lib/librte_bpf/rte_bpf_ethdev.h | 50 ++++
lib/librte_bpf/rte_bpf_version.map | 4 +
4 files changed, 580 insertions(+)
create mode 100644 lib/librte_bpf/bpf_pkt.c
create mode 100644 lib/librte_bpf/rte_bpf_ethdev.h
diff --git a/lib/librte_bpf/Makefile b/lib/librte_bpf/Makefile
index 44b12c439..501c49c60 100644
--- a/lib/librte_bpf/Makefile
+++ b/lib/librte_bpf/Makefile
@@ -22,6 +22,7 @@ LIBABIVER := 1
SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf.c
SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_exec.c
SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_load.c
+SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_pkt.c
SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_validate.c
ifeq ($(CONFIG_RTE_ARCH_X86_64),y)
SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_jit_x86.c
@@ -29,5 +30,6 @@ endif
# install header files
SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += rte_bpf.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += rte_bpf_ethdev.h
include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_bpf/bpf_pkt.c b/lib/librte_bpf/bpf_pkt.c
new file mode 100644
index 000000000..b0177ad82
--- /dev/null
+++ b/lib/librte_bpf/bpf_pkt.c
@@ -0,0 +1,524 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include <sys/queue.h>
+#include <sys/stat.h>
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_cycles.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_atomic.h>
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+
+#include <rte_bpf_ethdev.h>
+
+/*
+ * information about all installed BPF rx/tx callbacks
+ */
+
+struct bpf_eth_cbi {
+ uint32_t use; /*usage counter */
+ void *cb; /* callback handle */
+ struct rte_bpf *bpf;
+ struct rte_bpf_jit jit;
+} __rte_cache_aligned;
+
+/*
+ * Odd number means that callback is used by datapath.
+ * Even number means that callback is not used by datapath.
+ */
+#define BPF_ETH_CBI_INUSE 1
+
+static struct bpf_eth_cbi rx_cbi[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
+static struct bpf_eth_cbi tx_cbi[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
+
+/*
+ * Marks given callback as used by datapath.
+ */
+static __rte_always_inline void
+bpf_eth_cbi_inuse(struct bpf_eth_cbi *cbi)
+{
+ cbi->use++;
+ /* make sure no store/load reordering could happen */
+ rte_smp_mb();
+}
+
+/*
+ * Marks given callback list as not used by datapath.
+ */
+static __rte_always_inline void
+bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi)
+{
+ /* make sure all previous loads are completed */
+ rte_smp_rmb();
+ cbi->use++;
+}
+
+/*
+ * Waits till datapath finished using given callback.
+ */
+static void
+bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi)
+{
+ uint32_t nuse, puse;
+
+ /* make sure all previous loads and stores are completed */
+ rte_smp_mb();
+
+ puse = cbi->use;
+
+ /* in use, busy wait till current RX/TX iteration is finished */
+ if ((puse & BPF_ETH_CBI_INUSE) != 0) {
+ do {
+ rte_pause();
+ rte_compiler_barrier();
+ nuse = cbi->use;
+ } while (nuse == puse);
+ }
+}
+
+static void
+bpf_eth_cbi_cleanup(struct bpf_eth_cbi *bc)
+{
+ bc->bpf = NULL;
+ memset(&bc->jit, 0, sizeof(bc->jit));
+}
+
+/*
+ * BPF packet processing routinies.
+ */
+
+static inline uint32_t
+apply_filter(struct rte_mbuf *mb[], const uint64_t rc[], uint32_t num,
+ uint32_t drop)
+{
+ uint32_t i, j, k;
+ struct rte_mbuf *dr[num];
+
+ for (i = 0, j = 0, k = 0; i != num; i++) {
+
+ /* filter matches */
+ if (rc[i] != 0)
+ mb[j++] = mb[i];
+ /* no match */
+ else
+ dr[k++] = mb[i];
+ }
+
+ if (drop != 0) {
+ /* free filtered out mbufs */
+ for (i = 0; i != k; i++)
+ rte_pktmbuf_free(dr[i]);
+ } else {
+ /* copy filtered out mbufs beyond good ones */
+ for (i = 0; i != k; i++)
+ mb[j + i] = dr[i];
+ }
+
+ return j;
+}
+
+static inline uint32_t
+pkt_filter_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num,
+ uint32_t drop)
+{
+ uint32_t i;
+ void *dp[num];
+ uint64_t rc[num];
+
+ for (i = 0; i != num; i++)
+ dp[i] = rte_pktmbuf_mtod(mb[i], void *);
+
+ rte_bpf_exec_burst(bpf, dp, rc, num);
+ return apply_filter(mb, rc, num, drop);
+}
+
+static inline uint32_t
+pkt_filter_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[],
+ uint32_t num, uint32_t drop)
+{
+ uint32_t i;
+ void *dp;
+ uint64_t rc[num];
+
+ for (i = 0; i != num; i++) {
+ dp = rte_pktmbuf_mtod(mb[i], void *);
+ rc[i] = (jit->func(dp) != 0);
+ }
+
+ return apply_filter(mb, rc, num, drop);
+}
+
+static inline uint32_t
+pkt_filter_mb_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num,
+ uint32_t drop)
+{
+ uint64_t rc[num];
+
+ rte_bpf_exec_burst(bpf, (void **)mb, rc, num);
+ return apply_filter(mb, rc, num, drop);
+}
+
+static inline uint32_t
+pkt_filter_mb_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[],
+ uint32_t num, uint32_t drop)
+{
+ uint32_t i;
+ uint64_t rc[num];
+
+ for (i = 0; i != num; i++)
+ rc[i] = (jit->func(mb[i]) != 0);
+
+ return apply_filter(mb, rc, num, drop);
+}
+
+/*
+ * RX/TX callbacks for raw data bpf.
+ */
+
+static uint16_t
+bpf_rx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ struct bpf_eth_cbi *cbi;
+ uint16_t rc;
+
+ cbi = user_param;
+
+ bpf_eth_cbi_inuse(cbi);
+ rc = (cbi->cb != NULL) ?
+ pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 1) :
+ nb_pkts;
+ bpf_eth_cbi_unuse(cbi);
+ return rc;
+}
+
+static uint16_t
+bpf_rx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ struct bpf_eth_cbi *cbi;
+ uint16_t rc;
+
+ cbi = user_param;
+ bpf_eth_cbi_inuse(cbi);
+ rc = (cbi->cb != NULL) ?
+ pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 1) :
+ nb_pkts;
+ bpf_eth_cbi_unuse(cbi);
+ return rc;
+}
+
+static uint16_t
+bpf_tx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
+{
+ struct bpf_eth_cbi *cbi;
+ uint16_t rc;
+
+ cbi = user_param;
+ bpf_eth_cbi_inuse(cbi);
+ rc = (cbi->cb != NULL) ?
+ pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 0) :
+ nb_pkts;
+ bpf_eth_cbi_unuse(cbi);
+ return rc;
+}
+
+static uint16_t
+bpf_tx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
+{
+ struct bpf_eth_cbi *cbi;
+ uint16_t rc;
+
+ cbi = user_param;
+ bpf_eth_cbi_inuse(cbi);
+ rc = (cbi->cb != NULL) ?
+ pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 0) :
+ nb_pkts;
+ bpf_eth_cbi_unuse(cbi);
+ return rc;
+}
+
+/*
+ * RX/TX callbacks for mbuf.
+ */
+
+static uint16_t
+bpf_rx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ struct bpf_eth_cbi *cbi;
+ uint16_t rc;
+
+ cbi = user_param;
+ bpf_eth_cbi_inuse(cbi);
+ rc = (cbi->cb != NULL) ?
+ pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 1) :
+ nb_pkts;
+ bpf_eth_cbi_unuse(cbi);
+ return rc;
+}
+
+static uint16_t
+bpf_rx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ struct bpf_eth_cbi *cbi;
+ uint16_t rc;
+
+ cbi = user_param;
+ bpf_eth_cbi_inuse(cbi);
+ rc = (cbi->cb != NULL) ?
+ pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 1) :
+ nb_pkts;
+ bpf_eth_cbi_unuse(cbi);
+ return rc;
+}
+
+static uint16_t
+bpf_tx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
+{
+ struct bpf_eth_cbi *cbi;
+ uint16_t rc;
+
+ cbi = user_param;
+ bpf_eth_cbi_inuse(cbi);
+ rc = (cbi->cb != NULL) ?
+ pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 0) :
+ nb_pkts;
+ bpf_eth_cbi_unuse(cbi);
+ return rc;
+}
+
+static uint16_t
+bpf_tx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param)
+{
+ struct bpf_eth_cbi *cbi;
+ uint16_t rc;
+
+ cbi = user_param;
+ bpf_eth_cbi_inuse(cbi);
+ rc = (cbi->cb != NULL) ?
+ pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 0) :
+ nb_pkts;
+ bpf_eth_cbi_unuse(cbi);
+ return rc;
+}
+
+static rte_rx_callback_fn
+select_rx_callback(enum rte_bpf_prog_type ptype, uint32_t flags)
+{
+ if (flags & RTE_BPF_ETH_F_JIT) {
+ if (ptype == RTE_BPF_PROG_TYPE_UNSPEC)
+ return bpf_rx_callback_jit;
+ else if (ptype == RTE_BPF_PROG_TYPE_MBUF)
+ return bpf_rx_callback_mb_jit;
+ } else if (ptype == RTE_BPF_PROG_TYPE_UNSPEC)
+ return bpf_rx_callback_vm;
+ else if (ptype == RTE_BPF_PROG_TYPE_MBUF)
+ return bpf_rx_callback_mb_vm;
+
+ return NULL;
+}
+
+static rte_tx_callback_fn
+select_tx_callback(enum rte_bpf_prog_type ptype, uint32_t flags)
+{
+ if (flags & RTE_BPF_ETH_F_JIT) {
+ if (ptype == RTE_BPF_PROG_TYPE_UNSPEC)
+ return bpf_tx_callback_jit;
+ else if (ptype == RTE_BPF_PROG_TYPE_MBUF)
+ return bpf_tx_callback_mb_jit;
+ } else if (ptype == RTE_BPF_PROG_TYPE_UNSPEC)
+ return bpf_tx_callback_vm;
+ else if (ptype == RTE_BPF_PROG_TYPE_MBUF)
+ return bpf_tx_callback_mb_vm;
+
+ return NULL;
+}
+
+/*
+ * helper function to perform BPF unload for given port/queue.
+ * have to introduce extra complexity (and slowdown) here,
+ * as right now there is no safe generic way to remove RX/TX callback
+ * while IO is active.
+ * Still don't free memory allocated for callback handle itself,
+ * again right now there is no safe way to do that without stopping RX/TX
+ * on given port/queue first.
+ */
+static void
+bpf_eth_unload(struct bpf_eth_cbi *bc)
+{
+ /* mark this cbi as empty */
+ bc->cb = NULL;
+ rte_smp_mb();
+
+ /* make sure datapath doesn't use bpf anymore, then destroy bpf */
+ bpf_eth_cbi_wait(bc);
+ rte_bpf_destroy(bc->bpf);
+ bpf_eth_cbi_cleanup(bc);
+}
+
+__rte_experimental void
+rte_bpf_eth_rx_unload(uint16_t port, uint16_t queue)
+{
+ struct bpf_eth_cbi *bc;
+ void *cb;
+
+ bc = &rx_cbi[port][queue];
+ cb = bc->cb;
+
+ if (cb == NULL)
+ return;
+
+ rte_eth_remove_rx_callback(port, queue, cb);
+ bpf_eth_unload(bc);
+}
+
+__rte_experimental void
+rte_bpf_eth_tx_unload(uint16_t port, uint16_t queue)
+{
+ struct bpf_eth_cbi *bc;
+ void *cb;
+
+ bc = &tx_cbi[port][queue];
+ cb = bc->cb;
+
+ if (cb == NULL)
+ return;
+
+ rte_eth_remove_tx_callback(port, queue, cb);
+ bpf_eth_unload(bc);
+}
+
+__rte_experimental int
+rte_bpf_eth_rx_elf_load(uint16_t port, uint16_t queue,
+ const struct rte_bpf_prm *prm, const char *fname, const char *sname,
+ uint32_t flags)
+{
+ int32_t rc;
+ struct bpf_eth_cbi *bc;
+ struct rte_bpf *bpf;
+ rte_rx_callback_fn fn;
+
+ if (prm == NULL)
+ return -EINVAL;
+
+ /* remove old one, if any */
+ rte_bpf_eth_rx_unload(port, queue);
+
+ fn = select_rx_callback(prm->prog_type, flags);
+ if (fn == NULL) {
+ RTE_LOG(ERR, USER1, "%s(%u, %u): no callback selected;\n",
+ __func__, port, queue);
+ return -EINVAL;
+ }
+
+ bpf = rte_bpf_elf_load(prm, fname, sname);
+ if (bpf == NULL)
+ return -rte_errno;
+
+ /* update global callback info */
+ bc = &rx_cbi[port][queue];
+ bc->bpf = bpf;
+ rte_bpf_get_jit(bpf, &bc->jit);
+
+ rc = 0;
+
+ if ((flags & RTE_BPF_ETH_F_JIT) != 0 && bc->jit.func == NULL) {
+ RTE_LOG(ERR, USER1, "%s(%u, %u): no JIT generated;\n",
+ __func__, port, queue);
+ rc = -EINVAL;
+ } else {
+ bc->cb = rte_eth_add_rx_callback(port, queue, fn, bc);
+ if (bc->cb == NULL)
+ rc = -rte_errno;
+ }
+
+ if (rc != 0) {
+ rte_bpf_destroy(bpf);
+ bpf_eth_cbi_cleanup(bc);
+ }
+
+ return rc;
+}
+
+__rte_experimental int
+rte_bpf_eth_tx_elf_load(uint16_t port, uint16_t queue,
+ const struct rte_bpf_prm *prm, const char *fname, const char *sname,
+ uint32_t flags)
+{
+ int32_t rc;
+ struct bpf_eth_cbi *bc;
+ struct rte_bpf *bpf;
+ rte_tx_callback_fn fn;
+
+ if (prm == NULL)
+ return -EINVAL;
+
+ /* remove old one, if any */
+ rte_bpf_eth_tx_unload(port, queue);
+
+ fn = select_tx_callback(prm->prog_type, flags);
+ if (fn == NULL) {
+ RTE_LOG(ERR, USER1, "%s(%u, %u): no callback selected;\n",
+ __func__, port, queue);
+ return -EINVAL;
+ }
+
+ bpf = rte_bpf_elf_load(prm, fname, sname);
+ if (bpf == NULL)
+ return -rte_errno;
+
+ /* update global callback info */
+ bc = &tx_cbi[port][queue];
+ bc->bpf = bpf;
+ rte_bpf_get_jit(bpf, &bc->jit);
+
+ rc = 0;
+
+ if ((flags & RTE_BPF_ETH_F_JIT) != 0 && bc->jit.func == NULL) {
+ RTE_LOG(ERR, USER1, "%s(%u, %u): no JIT generated;\n",
+ __func__, port, queue);
+ rc = -EINVAL;
+ } else {
+ bc->cb = rte_eth_add_tx_callback(port, queue, fn, bc);
+ if (bc->cb == NULL)
+ rc = -rte_errno;
+ }
+
+ if (rc != 0) {
+ rte_bpf_destroy(bpf);
+ bpf_eth_cbi_cleanup(bc);
+ }
+
+ return rc;
+}
diff --git a/lib/librte_bpf/rte_bpf_ethdev.h b/lib/librte_bpf/rte_bpf_ethdev.h
new file mode 100644
index 000000000..abc3b8e5f
--- /dev/null
+++ b/lib/librte_bpf/rte_bpf_ethdev.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_BPF_ETHDEV_H_
+#define _RTE_BPF_ETHDEV_H_
+
+#include <rte_bpf.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum {
+ RTE_BPF_ETH_F_NONE = 0,
+ RTE_BPF_ETH_F_JIT = 0x1, /*< compile BPF into native ISA */
+};
+
+/*
+ * API to install BPF filter as RX/TX callbacks for eth devices.
+ * Note that right now:
+ * - it is not MT safe, i.e. it is not allowed to do load/unload for the
+ * same port/queue from different threads in parallel.
+ * - though it allows to do load/unload at runtime
+ * (while RX/TX is ongoing on given port/queue).
+ * - allows only one BPF program per port/queue,
+ * i.e. new load will replace previously loaded for that port/queue BPF program.
+ * Filter behaviour - if BPF program returns zero value for a given packet,
+ * then it will be dropped inside callback and no further processing
+ * on RX - it will be dropped inside callback and no further processing
+ * for that packet will happen.
+ * on TX - packet will remain unsent, and it is responsibility of the user
+ * to handle such situation (drop, try to send again, etc.).
+ */
+
+void rte_bpf_eth_rx_unload(uint16_t port, uint16_t queue);
+void rte_bpf_eth_tx_unload(uint16_t port, uint16_t queue);
+
+int rte_bpf_eth_rx_elf_load(uint16_t port, uint16_t queue,
+ const struct rte_bpf_prm *prm, const char *fname, const char *sname,
+ uint32_t flags);
+int rte_bpf_eth_tx_elf_load(uint16_t port, uint16_t queue,
+ const struct rte_bpf_prm *prm, const char *fname, const char *sname,
+ uint32_t flags);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_BPF_ETHDEV_H_ */
diff --git a/lib/librte_bpf/rte_bpf_version.map b/lib/librte_bpf/rte_bpf_version.map
index ff65144df..a203e088e 100644
--- a/lib/librte_bpf/rte_bpf_version.map
+++ b/lib/librte_bpf/rte_bpf_version.map
@@ -3,6 +3,10 @@ EXPERIMENTAL {
rte_bpf_destroy;
rte_bpf_elf_load;
+ rte_bpf_eth_rx_elf_load;
+ rte_bpf_eth_rx_unload;
+ rte_bpf_eth_tx_elf_load;
+ rte_bpf_eth_tx_unload;
rte_bpf_exec;
rte_bpf_exec_burst;
rte_bpf_get_jit;
--
2.13.6
^ permalink raw reply [flat|nested] 10+ messages in thread
* [dpdk-dev] [RFC PATCH 4/5] testpmd: new commands to load/unload BPF filters
2018-03-08 1:29 [dpdk-dev] [RFC PATCH 0/5] add framework to load and execute BPF code Konstantin Ananyev
` (2 preceding siblings ...)
2018-03-08 1:30 ` [dpdk-dev] [RFC PATCH 3/5] bpf: introduce basic RX/TX BPF filters Konstantin Ananyev
@ 2018-03-08 1:30 ` Konstantin Ananyev
2018-03-08 1:30 ` [dpdk-dev] [RFC PATCH 5/5] test: add few eBPF samples Konstantin Ananyev
4 siblings, 0 replies; 10+ messages in thread
From: Konstantin Ananyev @ 2018-03-08 1:30 UTC (permalink / raw)
To: dev; +Cc: Konstantin Ananyev
Introduce new testpmd commands to load/unload RX/TX BPF-based filters.
Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
---
app/test-pmd/cmdline.c | 144 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 144 insertions(+)
diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index d1dc1de6c..ee6dc94b8 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -47,6 +47,7 @@
#include <rte_eth_ctrl.h>
#include <rte_flow.h>
#include <rte_gro.h>
+#include <rte_bpf_ethdev.h>
#include <cmdline_rdline.h>
#include <cmdline_parse.h>
@@ -16030,6 +16031,147 @@ cmdline_parse_inst_t cmd_load_from_file = {
},
};
+/* *** load BPF program *** */
+struct cmd_bpf_ld_result {
+ cmdline_fixed_string_t bpf;
+ cmdline_fixed_string_t dir;
+ uint8_t port;
+ uint16_t queue;
+ cmdline_fixed_string_t op;
+ cmdline_fixed_string_t flags;
+ cmdline_fixed_string_t prm;
+};
+
+static void
+bpf_parse_flags(const char *str, enum rte_bpf_prog_type *ptype, uint32_t *flags)
+{
+ uint32_t i, v;
+
+ *flags = RTE_BPF_ETH_F_NONE;
+ *ptype = RTE_BPF_PROG_TYPE_UNSPEC;
+
+ for (i = 0; str[i] != 0; i++) {
+ v = toupper(str[i]);
+ if (v == 'J')
+ *flags |= RTE_BPF_ETH_F_JIT;
+ else if (v == 'M')
+ *ptype = RTE_BPF_PROG_TYPE_MBUF;
+ else if (v == '-')
+ continue;
+ else
+ printf("unknown flag: \'%c\'", v);
+ }
+}
+
+static void cmd_operate_bpf_ld_parsed(void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ int32_t rc;
+ uint32_t flags;
+ struct cmd_bpf_ld_result *res;
+ struct rte_bpf_prm prm;
+ const char *fname, *sname;
+
+ res = parsed_result;
+ memset(&prm, 0, sizeof(prm));
+
+ bpf_parse_flags(res->flags, &prm.prog_type, &flags);
+ fname = res->prm;
+ sname = ".text";
+
+ if (strcmp(res->dir, "rx") == 0) {
+ rc = rte_bpf_eth_rx_elf_load(res->port, res->queue, &prm,
+ fname, sname, flags);
+ printf("%d:%s\n", rc, strerror(-rc));
+ } else if (strcmp(res->dir, "tx") == 0) {
+ rc = rte_bpf_eth_tx_elf_load(res->port, res->queue, &prm,
+ fname, sname, flags);
+ printf("%d:%s\n", rc, strerror(-rc));
+ } else
+ printf("invalid value: %s\n", res->dir);
+}
+
+cmdline_parse_token_string_t cmd_load_bpf_start =
+ TOKEN_STRING_INITIALIZER(struct cmd_bpf_ld_result,
+ bpf, "bpf-load");
+cmdline_parse_token_string_t cmd_load_bpf_dir =
+ TOKEN_STRING_INITIALIZER(struct cmd_bpf_ld_result,
+ dir, "rx#tx");
+cmdline_parse_token_num_t cmd_load_bpf_port =
+ TOKEN_NUM_INITIALIZER(struct cmd_bpf_ld_result, port, UINT8);
+cmdline_parse_token_num_t cmd_load_bpf_queue =
+ TOKEN_NUM_INITIALIZER(struct cmd_bpf_ld_result, queue, UINT16);
+cmdline_parse_token_string_t cmd_load_bpf_flags =
+ TOKEN_STRING_INITIALIZER(struct cmd_bpf_ld_result,
+ flags, NULL);
+cmdline_parse_token_string_t cmd_load_bpf_prm =
+ TOKEN_STRING_INITIALIZER(struct cmd_bpf_ld_result,
+ prm, NULL);
+
+cmdline_parse_inst_t cmd_operate_bpf_ld_parse = {
+ .f = cmd_operate_bpf_ld_parsed,
+ .data = NULL,
+ .help_str = "bpf-load rx|tx <port> <queue> <J|M|B> <file_name>",
+ .tokens = {
+ (void *)&cmd_load_bpf_start,
+ (void *)&cmd_load_bpf_dir,
+ (void *)&cmd_load_bpf_port,
+ (void *)&cmd_load_bpf_queue,
+ (void *)&cmd_load_bpf_flags,
+ (void *)&cmd_load_bpf_prm,
+ NULL,
+ },
+};
+
+/* *** unload BPF program *** */
+struct cmd_bpf_unld_result {
+ cmdline_fixed_string_t bpf;
+ cmdline_fixed_string_t dir;
+ uint8_t port;
+ uint16_t queue;
+};
+
+static void cmd_operate_bpf_unld_parsed(void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_bpf_unld_result *res;
+
+ res = parsed_result;
+
+ if (strcmp(res->dir, "rx") == 0)
+ rte_bpf_eth_rx_unload(res->port, res->queue);
+ else if (strcmp(res->dir, "tx") == 0)
+ rte_bpf_eth_tx_unload(res->port, res->queue);
+ else
+ printf("invalid value: %s\n", res->dir);
+}
+
+cmdline_parse_token_string_t cmd_unload_bpf_start =
+ TOKEN_STRING_INITIALIZER(struct cmd_bpf_unld_result,
+ bpf, "bpf-unload");
+cmdline_parse_token_string_t cmd_unload_bpf_dir =
+ TOKEN_STRING_INITIALIZER(struct cmd_bpf_unld_result,
+ dir, "rx#tx");
+cmdline_parse_token_num_t cmd_unload_bpf_port =
+ TOKEN_NUM_INITIALIZER(struct cmd_bpf_unld_result, port, UINT8);
+cmdline_parse_token_num_t cmd_unload_bpf_queue =
+ TOKEN_NUM_INITIALIZER(struct cmd_bpf_unld_result, queue, UINT16);
+
+cmdline_parse_inst_t cmd_operate_bpf_unld_parse = {
+ .f = cmd_operate_bpf_unld_parsed,
+ .data = NULL,
+ .help_str = "bpf-unload rx|tx <port> <queue>",
+ .tokens = {
+ (void *)&cmd_unload_bpf_start,
+ (void *)&cmd_unload_bpf_dir,
+ (void *)&cmd_unload_bpf_port,
+ (void *)&cmd_unload_bpf_queue,
+ NULL,
+ },
+};
+
/* ******************************************************************************** */
/* list of instructions */
@@ -16272,6 +16414,8 @@ cmdline_parse_ctx_t main_ctx[] = {
(cmdline_parse_inst_t *)&cmd_del_port_tm_node,
(cmdline_parse_inst_t *)&cmd_set_port_tm_node_parent,
(cmdline_parse_inst_t *)&cmd_port_tm_hierarchy_commit,
+ (cmdline_parse_inst_t *)&cmd_operate_bpf_ld_parse,
+ (cmdline_parse_inst_t *)&cmd_operate_bpf_unld_parse,
NULL,
};
--
2.13.6
^ permalink raw reply [flat|nested] 10+ messages in thread
* [dpdk-dev] [RFC PATCH 5/5] test: add few eBPF samples
2018-03-08 1:29 [dpdk-dev] [RFC PATCH 0/5] add framework to load and execute BPF code Konstantin Ananyev
` (3 preceding siblings ...)
2018-03-08 1:30 ` [dpdk-dev] [RFC PATCH 4/5] testpmd: new commands to load/unload " Konstantin Ananyev
@ 2018-03-08 1:30 ` Konstantin Ananyev
2018-03-13 14:01 ` Jerin Jacob
4 siblings, 1 reply; 10+ messages in thread
From: Konstantin Ananyev @ 2018-03-08 1:30 UTC (permalink / raw)
To: dev; +Cc: Konstantin Ananyev
Add few simple eBPF programs as an example.
Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
---
test/bpf/dummy.c | 20 ++
test/bpf/mbuf.h | 556 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
test/bpf/t1.c | 54 ++++++
test/bpf/t2.c | 31 ++++
4 files changed, 661 insertions(+)
create mode 100644 test/bpf/dummy.c
create mode 100644 test/bpf/mbuf.h
create mode 100644 test/bpf/t1.c
create mode 100644 test/bpf/t2.c
diff --git a/test/bpf/dummy.c b/test/bpf/dummy.c
new file mode 100644
index 000000000..5851469e7
--- /dev/null
+++ b/test/bpf/dummy.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+/*
+ * eBPF program sample.
+ * does nothing always return success.
+ * used to measure BPF infrastructure overhead.
+ * To compile:
+ * clang -O2 -target bpf -c dummy.c
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+
+uint64_t
+entry(void *arg)
+{
+ return 1;
+}
diff --git a/test/bpf/mbuf.h b/test/bpf/mbuf.h
new file mode 100644
index 000000000..aeef6339d
--- /dev/null
+++ b/test/bpf/mbuf.h
@@ -0,0 +1,556 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation.
+ * Copyright 2014 6WIND S.A.
+ */
+
+/*
+ * Snipper from dpdk.org rte_mbuf.h.
+ * used to provide BPF programs information about rte_mbuf layout.
+ */
+
+#ifndef _MBUF_H_
+#define _MBUF_H_
+
+#include <stdint.h>
+#include <rte_common.h>
+#include <rte_memory.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Packet Offload Features Flags. It also carry packet type information.
+ * Critical resources. Both rx/tx shared these bits. Be cautious on any change
+ *
+ * - RX flags start at bit position zero, and get added to the left of previous
+ * flags.
+ * - The most-significant 3 bits are reserved for generic mbuf flags
+ * - TX flags therefore start at bit position 60 (i.e. 63-3), and new flags get
+ * added to the right of the previously defined flags i.e. they should count
+ * downwards, not upwards.
+ *
+ * Keep these flags synchronized with rte_get_rx_ol_flag_name() and
+ * rte_get_tx_ol_flag_name().
+ */
+
+/**
+ * RX packet is a 802.1q VLAN packet. This flag was set by PMDs when
+ * the packet is recognized as a VLAN, but the behavior between PMDs
+ * was not the same. This flag is kept for some time to avoid breaking
+ * applications and should be replaced by PKT_RX_VLAN_STRIPPED.
+ */
+#define PKT_RX_VLAN_PKT (1ULL << 0)
+
+#define PKT_RX_RSS_HASH (1ULL << 1) /**< RX packet with RSS hash result. */
+#define PKT_RX_FDIR (1ULL << 2) /**< RX packet with FDIR match indicate. */
+
+/**
+ * Deprecated.
+ * Checking this flag alone is deprecated: check the 2 bits of
+ * PKT_RX_L4_CKSUM_MASK.
+ * This flag was set when the L4 checksum of a packet was detected as
+ * wrong by the hardware.
+ */
+#define PKT_RX_L4_CKSUM_BAD (1ULL << 3)
+
+/**
+ * Deprecated.
+ * Checking this flag alone is deprecated: check the 2 bits of
+ * PKT_RX_IP_CKSUM_MASK.
+ * This flag was set when the IP checksum of a packet was detected as
+ * wrong by the hardware.
+ */
+#define PKT_RX_IP_CKSUM_BAD (1ULL << 4)
+
+#define PKT_RX_EIP_CKSUM_BAD (1ULL << 5) /**< External IP header checksum error. */
+
+/**
+ * A vlan has been stripped by the hardware and its tci is saved in
+ * mbuf->vlan_tci. This can only happen if vlan stripping is enabled
+ * in the RX configuration of the PMD.
+ */
+#define PKT_RX_VLAN_STRIPPED (1ULL << 6)
+
+/**
+ * Mask of bits used to determine the status of RX IP checksum.
+ * - PKT_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum
+ * - PKT_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong
+ * - PKT_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid
+ * - PKT_RX_IP_CKSUM_NONE: the IP checksum is not correct in the packet
+ * data, but the integrity of the IP header is verified.
+ */
+#define PKT_RX_IP_CKSUM_MASK ((1ULL << 4) | (1ULL << 7))
+
+#define PKT_RX_IP_CKSUM_UNKNOWN 0
+#define PKT_RX_IP_CKSUM_BAD (1ULL << 4)
+#define PKT_RX_IP_CKSUM_GOOD (1ULL << 7)
+#define PKT_RX_IP_CKSUM_NONE ((1ULL << 4) | (1ULL << 7))
+
+/**
+ * Mask of bits used to determine the status of RX L4 checksum.
+ * - PKT_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum
+ * - PKT_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong
+ * - PKT_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid
+ * - PKT_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet
+ * data, but the integrity of the L4 data is verified.
+ */
+#define PKT_RX_L4_CKSUM_MASK ((1ULL << 3) | (1ULL << 8))
+
+#define PKT_RX_L4_CKSUM_UNKNOWN 0
+#define PKT_RX_L4_CKSUM_BAD (1ULL << 3)
+#define PKT_RX_L4_CKSUM_GOOD (1ULL << 8)
+#define PKT_RX_L4_CKSUM_NONE ((1ULL << 3) | (1ULL << 8))
+
+#define PKT_RX_IEEE1588_PTP (1ULL << 9) /**< RX IEEE1588 L2 Ethernet PT Packet. */
+#define PKT_RX_IEEE1588_TMST (1ULL << 10) /**< RX IEEE1588 L2/L4 timestamped packet.*/
+#define PKT_RX_FDIR_ID (1ULL << 13) /**< FD id reported if FDIR match. */
+#define PKT_RX_FDIR_FLX (1ULL << 14) /**< Flexible bytes reported if FDIR match. */
+
+/**
+ * The 2 vlans have been stripped by the hardware and their tci are
+ * saved in mbuf->vlan_tci (inner) and mbuf->vlan_tci_outer (outer).
+ * This can only happen if vlan stripping is enabled in the RX
+ * configuration of the PMD. If this flag is set, PKT_RX_VLAN_STRIPPED
+ * must also be set.
+ */
+#define PKT_RX_QINQ_STRIPPED (1ULL << 15)
+
+/**
+ * Deprecated.
+ * RX packet with double VLAN stripped.
+ * This flag is replaced by PKT_RX_QINQ_STRIPPED.
+ */
+#define PKT_RX_QINQ_PKT PKT_RX_QINQ_STRIPPED
+
+/**
+ * When packets are coalesced by a hardware or virtual driver, this flag
+ * can be set in the RX mbuf, meaning that the m->tso_segsz field is
+ * valid and is set to the segment size of original packets.
+ */
+#define PKT_RX_LRO (1ULL << 16)
+
+/**
+ * Indicate that the timestamp field in the mbuf is valid.
+ */
+#define PKT_RX_TIMESTAMP (1ULL << 17)
+
+/* add new RX flags here */
+
+/* add new TX flags here */
+
+/**
+ * Offload the MACsec. This flag must be set by the application to enable
+ * this offload feature for a packet to be transmitted.
+ */
+#define PKT_TX_MACSEC (1ULL << 44)
+
+/**
+ * Bits 45:48 used for the tunnel type.
+ * When doing Tx offload like TSO or checksum, the HW needs to configure the
+ * tunnel type into the HW descriptors.
+ */
+#define PKT_TX_TUNNEL_VXLAN (0x1ULL << 45)
+#define PKT_TX_TUNNEL_GRE (0x2ULL << 45)
+#define PKT_TX_TUNNEL_IPIP (0x3ULL << 45)
+#define PKT_TX_TUNNEL_GENEVE (0x4ULL << 45)
+/**< TX packet with MPLS-in-UDP RFC 7510 header. */
+#define PKT_TX_TUNNEL_MPLSINUDP (0x5ULL << 45)
+/* add new TX TUNNEL type here */
+#define PKT_TX_TUNNEL_MASK (0xFULL << 45)
+
+/**
+ * Second VLAN insertion (QinQ) flag.
+ */
+#define PKT_TX_QINQ_PKT (1ULL << 49) /**< TX packet with double VLAN inserted. */
+
+/**
+ * TCP segmentation offload. To enable this offload feature for a
+ * packet to be transmitted on hardware supporting TSO:
+ * - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
+ * PKT_TX_TCP_CKSUM)
+ * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6
+ * - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum
+ * to 0 in the packet
+ * - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz
+ * - calculate the pseudo header checksum without taking ip_len in account,
+ * and set it in the TCP header. Refer to rte_ipv4_phdr_cksum() and
+ * rte_ipv6_phdr_cksum() that can be used as helpers.
+ */
+#define PKT_TX_TCP_SEG (1ULL << 50)
+
+#define PKT_TX_IEEE1588_TMST (1ULL << 51) /**< TX IEEE1588 packet to timestamp. */
+
+/**
+ * Bits 52+53 used for L4 packet type with checksum enabled: 00: Reserved,
+ * 01: TCP checksum, 10: SCTP checksum, 11: UDP checksum. To use hardware
+ * L4 checksum offload, the user needs to:
+ * - fill l2_len and l3_len in mbuf
+ * - set the flags PKT_TX_TCP_CKSUM, PKT_TX_SCTP_CKSUM or PKT_TX_UDP_CKSUM
+ * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6
+ * - calculate the pseudo header checksum and set it in the L4 header (only
+ * for TCP or UDP). See rte_ipv4_phdr_cksum() and rte_ipv6_phdr_cksum().
+ * For SCTP, set the crc field to 0.
+ */
+#define PKT_TX_L4_NO_CKSUM (0ULL << 52) /**< Disable L4 cksum of TX pkt. */
+#define PKT_TX_TCP_CKSUM (1ULL << 52) /**< TCP cksum of TX pkt. computed by NIC. */
+#define PKT_TX_SCTP_CKSUM (2ULL << 52) /**< SCTP cksum of TX pkt. computed by NIC. */
+#define PKT_TX_UDP_CKSUM (3ULL << 52) /**< UDP cksum of TX pkt. computed by NIC. */
+#define PKT_TX_L4_MASK (3ULL << 52) /**< Mask for L4 cksum offload request. */
+
+/**
+ * Offload the IP checksum in the hardware. The flag PKT_TX_IPV4 should
+ * also be set by the application, although a PMD will only check
+ * PKT_TX_IP_CKSUM.
+ * - set the IP checksum field in the packet to 0
+ * - fill the mbuf offload information: l2_len, l3_len
+ */
+#define PKT_TX_IP_CKSUM (1ULL << 54)
+
+/**
+ * Packet is IPv4. This flag must be set when using any offload feature
+ * (TSO, L3 or L4 checksum) to tell the NIC that the packet is an IPv4
+ * packet. If the packet is a tunneled packet, this flag is related to
+ * the inner headers.
+ */
+#define PKT_TX_IPV4 (1ULL << 55)
+
+/**
+ * Packet is IPv6. This flag must be set when using an offload feature
+ * (TSO or L4 checksum) to tell the NIC that the packet is an IPv6
+ * packet. If the packet is a tunneled packet, this flag is related to
+ * the inner headers.
+ */
+#define PKT_TX_IPV6 (1ULL << 56)
+
+#define PKT_TX_VLAN_PKT (1ULL << 57) /**< TX packet is a 802.1q VLAN packet. */
+
+/**
+ * Offload the IP checksum of an external header in the hardware. The
+ * flag PKT_TX_OUTER_IPV4 should also be set by the application, alto ugh
+ * a PMD will only check PKT_TX_IP_CKSUM. The IP checksum field in the
+ * packet must be set to 0.
+ * - set the outer IP checksum field in the packet to 0
+ * - fill the mbuf offload information: outer_l2_len, outer_l3_len
+ */
+#define PKT_TX_OUTER_IP_CKSUM (1ULL << 58)
+
+/**
+ * Packet outer header is IPv4. This flag must be set when using any
+ * outer offload feature (L3 or L4 checksum) to tell the NIC that the
+ * outer header of the tunneled packet is an IPv4 packet.
+ */
+#define PKT_TX_OUTER_IPV4 (1ULL << 59)
+
+/**
+ * Packet outer header is IPv6. This flag must be set when using any
+ * outer offload feature (L4 checksum) to tell the NIC that the outer
+ * header of the tunneled packet is an IPv6 packet.
+ */
+#define PKT_TX_OUTER_IPV6 (1ULL << 60)
+
+/**
+ * Bitmask of all supported packet Tx offload features flags,
+ * which can be set for packet.
+ */
+#define PKT_TX_OFFLOAD_MASK ( \
+ PKT_TX_IP_CKSUM | \
+ PKT_TX_L4_MASK | \
+ PKT_TX_OUTER_IP_CKSUM | \
+ PKT_TX_TCP_SEG | \
+ PKT_TX_IEEE1588_TMST | \
+ PKT_TX_QINQ_PKT | \
+ PKT_TX_VLAN_PKT | \
+ PKT_TX_TUNNEL_MASK | \
+ PKT_TX_MACSEC)
+
+#define __RESERVED (1ULL << 61) /**< reserved for future mbuf use */
+
+#define IND_ATTACHED_MBUF (1ULL << 62) /**< Indirect attached mbuf */
+
+/* Use final bit of flags to indicate a control mbuf */
+#define CTRL_MBUF_FLAG (1ULL << 63) /**< Mbuf contains control data */
+
+/** Alignment constraint of mbuf private area. */
+#define RTE_MBUF_PRIV_ALIGN 8
+
+/**
+ * Get the name of a RX offload flag
+ *
+ * @param mask
+ * The mask describing the flag.
+ * @return
+ * The name of this flag, or NULL if it's not a valid RX flag.
+ */
+const char *rte_get_rx_ol_flag_name(uint64_t mask);
+
+/**
+ * Dump the list of RX offload flags in a buffer
+ *
+ * @param mask
+ * The mask describing the RX flags.
+ * @param buf
+ * The output buffer.
+ * @param buflen
+ * The length of the buffer.
+ * @return
+ * 0 on success, (-1) on error.
+ */
+int rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen);
+
+/**
+ * Get the name of a TX offload flag
+ *
+ * @param mask
+ * The mask describing the flag. Usually only one bit must be set.
+ * Several bits can be given if they belong to the same mask.
+ * Ex: PKT_TX_L4_MASK.
+ * @return
+ * The name of this flag, or NULL if it's not a valid TX flag.
+ */
+const char *rte_get_tx_ol_flag_name(uint64_t mask);
+
+/**
+ * Dump the list of TX offload flags in a buffer
+ *
+ * @param mask
+ * The mask describing the TX flags.
+ * @param buf
+ * The output buffer.
+ * @param buflen
+ * The length of the buffer.
+ * @return
+ * 0 on success, (-1) on error.
+ */
+int rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen);
+
+/**
+ * Some NICs need at least 2KB buffer to RX standard Ethernet frame without
+ * splitting it into multiple segments.
+ * So, for mbufs that planned to be involved into RX/TX, the recommended
+ * minimal buffer length is 2KB + RTE_PKTMBUF_HEADROOM.
+ */
+#define RTE_MBUF_DEFAULT_DATAROOM 2048
+#define RTE_MBUF_DEFAULT_BUF_SIZE \
+ (RTE_MBUF_DEFAULT_DATAROOM + RTE_PKTMBUF_HEADROOM)
+
+/* define a set of marker types that can be used to refer to set points in the
+ * mbuf */
+__extension__
+typedef void *MARKER[0]; /**< generic marker for a point in a structure */
+__extension__
+typedef uint8_t MARKER8[0]; /**< generic marker with 1B alignment */
+__extension__
+typedef uint64_t MARKER64[0]; /**< marker that allows us to overwrite 8 bytes
+ * with a single assignment */
+
+typedef struct {
+ volatile int16_t cnt; /**< An internal counter value. */
+} rte_atomic16_t;
+
+/**
+ * The generic rte_mbuf, containing a packet mbuf.
+ */
+struct rte_mbuf {
+ MARKER cacheline0;
+
+ void *buf_addr; /**< Virtual address of segment buffer. */
+ /**
+ * Physical address of segment buffer.
+ * Force alignment to 8-bytes, so as to ensure we have the exact
+ * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes
+ * working on vector drivers easier.
+ */
+ phys_addr_t buf_physaddr __rte_aligned(sizeof(phys_addr_t));
+
+ /* next 8 bytes are initialised on RX descriptor rearm */
+ MARKER64 rearm_data;
+ uint16_t data_off;
+
+ /**
+ * Reference counter. Its size should at least equal to the size
+ * of port field (16 bits), to support zero-copy broadcast.
+ * It should only be accessed using the following functions:
+ * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and
+ * rte_mbuf_refcnt_set(). The functionality of these functions (atomic,
+ * or non-atomic) is controlled by the CONFIG_RTE_MBUF_REFCNT_ATOMIC
+ * config option.
+ */
+ RTE_STD_C11
+ union {
+ rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */
+ uint16_t refcnt; /**< Non-atomically accessed refcnt */
+ };
+ uint16_t nb_segs; /**< Number of segments. */
+
+ /** Input port (16 bits to support more than 256 virtual ports). */
+ uint16_t port;
+
+ uint64_t ol_flags; /**< Offload features. */
+
+ /* remaining bytes are set on RX when pulling packet from descriptor */
+ MARKER rx_descriptor_fields1;
+
+ /*
+ * The packet type, which is the combination of outer/inner L2, L3, L4
+ * and tunnel types. The packet_type is about data really present in the
+ * mbuf. Example: if vlan stripping is enabled, a received vlan packet
+ * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the
+ * vlan is stripped from the data.
+ */
+ RTE_STD_C11
+ union {
+ uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */
+ struct {
+ uint32_t l2_type:4; /**< (Outer) L2 type. */
+ uint32_t l3_type:4; /**< (Outer) L3 type. */
+ uint32_t l4_type:4; /**< (Outer) L4 type. */
+ uint32_t tun_type:4; /**< Tunnel type. */
+ uint32_t inner_l2_type:4; /**< Inner L2 type. */
+ uint32_t inner_l3_type:4; /**< Inner L3 type. */
+ uint32_t inner_l4_type:4; /**< Inner L4 type. */
+ };
+ };
+
+ uint32_t pkt_len; /**< Total pkt len: sum of all segments. */
+ uint16_t data_len; /**< Amount of data in segment buffer. */
+ /** VLAN TCI (CPU order), valid if PKT_RX_VLAN_STRIPPED is set. */
+ uint16_t vlan_tci;
+
+ union {
+ uint32_t rss; /**< RSS hash result if RSS enabled */
+ struct {
+ RTE_STD_C11
+ union {
+ struct {
+ uint16_t hash;
+ uint16_t id;
+ };
+ uint32_t lo;
+ /**< Second 4 flexible bytes */
+ };
+ uint32_t hi;
+ /**< First 4 flexible bytes or FD ID, dependent on
+ PKT_RX_FDIR_* flag in ol_flags. */
+ } fdir; /**< Filter identifier if FDIR enabled */
+ struct {
+ uint32_t lo;
+ uint32_t hi;
+ } sched; /**< Hierarchical scheduler */
+ uint32_t usr; /**< User defined tags. See rte_distributor_process() */
+ } hash; /**< hash information */
+
+ /** Outer VLAN TCI (CPU order), valid if PKT_RX_QINQ_STRIPPED is set. */
+ uint16_t vlan_tci_outer;
+
+ uint16_t buf_len; /**< Length of segment buffer. */
+
+ /** Valid if PKT_RX_TIMESTAMP is set. The unit and time reference
+ * are not normalized but are always the same for a given port.
+ */
+ uint64_t timestamp;
+
+ /* second cache line - fields only used in slow path or on TX */
+ MARKER cacheline1 __rte_cache_min_aligned;
+
+ RTE_STD_C11
+ union {
+ void *userdata; /**< Can be used for external metadata */
+ uint64_t udata64; /**< Allow 8-byte userdata on 32-bit */
+ };
+
+ struct rte_mempool *pool; /**< Pool from which mbuf was allocated. */
+ struct rte_mbuf *next; /**< Next segment of scattered packet. */
+
+ /* fields to support TX offloads */
+ RTE_STD_C11
+ union {
+ uint64_t tx_offload; /**< combined for easy fetch */
+ __extension__
+ struct {
+ uint64_t l2_len:7;
+ /**< L2 (MAC) Header Length for non-tunneling pkt.
+ * Outer_L4_len + ... + Inner_L2_len for tunneling pkt.
+ */
+ uint64_t l3_len:9; /**< L3 (IP) Header Length. */
+ uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
+ uint64_t tso_segsz:16; /**< TCP TSO segment size */
+
+ /* fields for TX offloading of tunnels */
+ uint64_t outer_l3_len:9; /**< Outer L3 (IP) Hdr Length. */
+ uint64_t outer_l2_len:7; /**< Outer L2 (MAC) Hdr Length. */
+
+ /* uint64_t unused:8; */
+ };
+ };
+
+ /** Size of the application private data. In case of an indirect
+ * mbuf, it stores the direct mbuf private data size. */
+ uint16_t priv_size;
+
+ /** Timesync flags for use with IEEE1588. */
+ uint16_t timesync;
+
+ /** Sequence number. See also rte_reorder_insert(). */
+ uint32_t seqn;
+
+} __rte_cache_aligned;
+
+
+/**
+ * Returns TRUE if given mbuf is indirect, or FALSE otherwise.
+ */
+#define RTE_MBUF_INDIRECT(mb) ((mb)->ol_flags & IND_ATTACHED_MBUF)
+
+/**
+ * Returns TRUE if given mbuf is direct, or FALSE otherwise.
+ */
+#define RTE_MBUF_DIRECT(mb) (!RTE_MBUF_INDIRECT(mb))
+
+/**
+ * Private data in case of pktmbuf pool.
+ *
+ * A structure that contains some pktmbuf_pool-specific data that are
+ * appended after the mempool structure (in private data).
+ */
+struct rte_pktmbuf_pool_private {
+ uint16_t mbuf_data_room_size; /**< Size of data space in each mbuf. */
+ uint16_t mbuf_priv_size; /**< Size of private area in each mbuf. */
+};
+
+/**
+ * A macro that points to an offset into the data in the mbuf.
+ *
+ * The returned pointer is cast to type t. Before using this
+ * function, the user must ensure that the first segment is large
+ * enough to accommodate its data.
+ *
+ * @param m
+ * The packet mbuf.
+ * @param o
+ * The offset into the mbuf data.
+ * @param t
+ * The type to cast the result into.
+ */
+#define rte_pktmbuf_mtod_offset(m, t, o) \
+ ((t)((char *)(m)->buf_addr + (m)->data_off + (o)))
+
+/**
+ * A macro that points to the start of the data in the mbuf.
+ *
+ * The returned pointer is cast to type t. Before using this
+ * function, the user must ensure that the first segment is large
+ * enough to accommodate its data.
+ *
+ * @param m
+ * The packet mbuf.
+ * @param t
+ * The type to cast the result into.
+ */
+#define rte_pktmbuf_mtod(m, t) rte_pktmbuf_mtod_offset(m, t, 0)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _MBUF_H_ */
diff --git a/test/bpf/t1.c b/test/bpf/t1.c
new file mode 100644
index 000000000..e587d5e5b
--- /dev/null
+++ b/test/bpf/t1.c
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+/*
+ * eBPF program sample.
+ * Accepts pointer to first segment packet data as an input parameter.
+ * analog of tcpdump -s 1 -d 'dst 1.2.3.4 && udp && dst port 5000'
+ * (000) ldh [12]
+ * (001) jeq #0x800 jt 2 jf 12
+ * (002) ld [30]
+ * (003) jeq #0x1020304 jt 4 jf 12
+ * (004) ldb [23]
+ * (005) jeq #0x11 jt 6 jf 12
+ * (006) ldh [20]
+ * (007) jset #0x1fff jt 12 jf 8
+ * (008) ldxb 4*([14]&0xf)
+ * (009) ldh [x + 16]
+ * (010) jeq #0x1388 jt 11 jf 12
+ * (011) ret #1
+ * (012) ret #0
+ *
+ * To compile:
+ * clang -O2 -DRTE_CACHE_LINE_SIZE=64 -I${RTE_SDK}/${RTE_TARGET}/include \
+ * -target bpf -c t1.c
+ */
+
+#include <stdint.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+
+uint64_t
+entry(void *pkt)
+{
+ struct ether_header *ether_header = (void *)pkt;
+
+ if (ether_header->ether_type != __builtin_bswap16(0x0800))
+ return 0;
+
+ struct iphdr *iphdr = (void *)(ether_header + 1);
+ if (iphdr->protocol != 17 || (iphdr->frag_off & 0x1ffff) != 0 ||
+ iphdr->daddr != __builtin_bswap32(0x1020304))
+ return 0;
+
+ int hlen = iphdr->ihl * 4;
+ struct udphdr *udphdr = (void *)iphdr + hlen;
+
+ if (udphdr->dest != __builtin_bswap16(5000))
+ return 0;
+
+ return 1;
+}
+
diff --git a/test/bpf/t2.c b/test/bpf/t2.c
new file mode 100644
index 000000000..6228609c5
--- /dev/null
+++ b/test/bpf/t2.c
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+/*
+ * eBPF program sample.
+ * Accepts pointer to struct rte_mbuf as an input parameter.
+ * cleanup mbuf's vlan_tci and all related RX flags
+ * (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED).
+ * Doesn't touch contents of packet data.
+ * To compile:
+ * clang -O2 -DRTE_CACHE_LINE_SIZE=... -I${RTE_SDK}/${RTE_TARGET}/include \
+ * -target bpf -Wno-int-to-void-pointer-cast -c t2.c
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include "mbuf.h"
+
+uint64_t
+entry(void *pkt)
+{
+ struct rte_mbuf *mb;
+
+ mb = pkt;
+ mb->vlan_tci = 0;
+ mb->ol_flags &= ~(PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
+
+ return 1;
+}
+
--
2.13.6
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 5/5] test: add few eBPF samples
2018-03-08 1:30 ` [dpdk-dev] [RFC PATCH 5/5] test: add few eBPF samples Konstantin Ananyev
@ 2018-03-13 14:01 ` Jerin Jacob
2018-03-13 18:14 ` Ananyev, Konstantin
0 siblings, 1 reply; 10+ messages in thread
From: Jerin Jacob @ 2018-03-13 14:01 UTC (permalink / raw)
To: Konstantin Ananyev; +Cc: dev
-----Original Message-----
> Date: Thu, 8 Mar 2018 01:30:02 +0000
> From: Konstantin Ananyev <konstantin.ananyev@intel.com>
> To: dev@dpdk.org
> CC: Konstantin Ananyev <konstantin.ananyev@intel.com>
> Subject: [dpdk-dev] [RFC PATCH 5/5] test: add few eBPF samples
> X-Mailer: git-send-email 1.7.0.7
>
> Add few simple eBPF programs as an example.
>
> Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> diff --git a/test/bpf/mbuf.h b/test/bpf/mbuf.h
> new file mode 100644
> index 000000000..aeef6339d
> --- /dev/null
> +++ b/test/bpf/mbuf.h
> @@ -0,0 +1,556 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2010-2014 Intel Corporation.
> + * Copyright 2014 6WIND S.A.
> + */
> +
> +/*
> + * Snipper from dpdk.org rte_mbuf.h.
> + * used to provide BPF programs information about rte_mbuf layout.
> + */
> +
> +#ifndef _MBUF_H_
> +#define _MBUF_H_
> +
> +#include <stdint.h>
> +#include <rte_common.h>
> +#include <rte_memory.h>
Is it worth to keep an copy of mbuf for standalone purpose?
Since clang is already supported, I think, if someone need mbuf then
they can include DPDK headers. Just thinking in maintainability
perspective.
> diff --git a/test/bpf/t1.c b/test/bpf/t1.c
> new file mode 100644
> index 000000000..e587d5e5b
> --- /dev/null
> +++ b/test/bpf/t1.c
> @@ -0,0 +1,54 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2018 Intel Corporation
> + */
> +
> +/*
> + * eBPF program sample.
> + * Accepts pointer to first segment packet data as an input parameter.
> + * analog of tcpdump -s 1 -d 'dst 1.2.3.4 && udp && dst port 5000'
> + * (000) ldh [12]
> + * (001) jeq #0x800 jt 2 jf 12
> + * (002) ld [30]
> + * (003) jeq #0x1020304 jt 4 jf 12
> + * (004) ldb [23]
> + * (005) jeq #0x11 jt 6 jf 12
> + * (006) ldh [20]
> + * (007) jset #0x1fff jt 12 jf 8
> + * (008) ldxb 4*([14]&0xf)
> + * (009) ldh [x + 16]
> + * (010) jeq #0x1388 jt 11 jf 12
> + * (011) ret #1
> + * (012) ret #0
> + *
> + * To compile:
> + * clang -O2 -DRTE_CACHE_LINE_SIZE=64 -I${RTE_SDK}/${RTE_TARGET}/include \
Does not look like, this application is accessing any DPDK stuff, If so,
Should we remove -DRTE_CACHE_LINE_SIZE=64 -I${RTE_SDK}/${RTE_TARGET}/include?
> + * -target bpf -c t1.c
> + */
> +
> +#include <stdint.h>
> +#include <net/ethernet.h>
> +#include <netinet/ip.h>
> +#include <netinet/udp.h>
> +
> +uint64_t
> +entry(void *pkt)
> +{
> + struct ether_header *ether_header = (void *)pkt;
> +
> + if (ether_header->ether_type != __builtin_bswap16(0x0800))
> + return 0;
> +
> + struct iphdr *iphdr = (void *)(ether_header + 1);
> + if (iphdr->protocol != 17 || (iphdr->frag_off & 0x1ffff) != 0 ||
> + iphdr->daddr != __builtin_bswap32(0x1020304))
> + return 0;
> +
> + int hlen = iphdr->ihl * 4;
> + struct udphdr *udphdr = (void *)iphdr + hlen;
> +
> + if (udphdr->dest != __builtin_bswap16(5000))
> + return 0;
> +
> + return 1;
> +}
> +
> diff --git a/test/bpf/t2.c b/test/bpf/t2.c
> new file mode 100644
> index 000000000..6228609c5
> --- /dev/null
> +++ b/test/bpf/t2.c
> @@ -0,0 +1,31 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2018 Intel Corporation
> + */
> +
> +/*
> + * eBPF program sample.
> + * Accepts pointer to struct rte_mbuf as an input parameter.
> + * cleanup mbuf's vlan_tci and all related RX flags
> + * (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED).
> + * Doesn't touch contents of packet data.
> + * To compile:
> + * clang -O2 -DRTE_CACHE_LINE_SIZE=... -I${RTE_SDK}/${RTE_TARGET}/include \
> + * -target bpf -Wno-int-to-void-pointer-cast -c t2.c
> + */
> +
> +#include <stdint.h>
> +#include <stddef.h>
> +#include "mbuf.h"
Can we take Cache line size from rte_config.h which anyway included in
mbuf.h through rte_memory.h?
> +
> +uint64_t
> +entry(void *pkt)
> +{
> + struct rte_mbuf *mb;
> +
> + mb = pkt;
> + mb->vlan_tci = 0;
> + mb->ol_flags &= ~(PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
> +
> + return 1;
> +}
> +
> --
> 2.13.6
>
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 5/5] test: add few eBPF samples
2018-03-13 14:01 ` Jerin Jacob
@ 2018-03-13 18:14 ` Ananyev, Konstantin
2018-03-30 17:42 ` Ananyev, Konstantin
0 siblings, 1 reply; 10+ messages in thread
From: Ananyev, Konstantin @ 2018-03-13 18:14 UTC (permalink / raw)
To: Jerin Jacob; +Cc: dev
> >
> > Add few simple eBPF programs as an example.
> >
> > Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> > diff --git a/test/bpf/mbuf.h b/test/bpf/mbuf.h
> > new file mode 100644
> > index 000000000..aeef6339d
> > --- /dev/null
> > +++ b/test/bpf/mbuf.h
> > @@ -0,0 +1,556 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2010-2014 Intel Corporation.
> > + * Copyright 2014 6WIND S.A.
> > + */
> > +
> > +/*
> > + * Snipper from dpdk.org rte_mbuf.h.
> > + * used to provide BPF programs information about rte_mbuf layout.
> > + */
> > +
> > +#ifndef _MBUF_H_
> > +#define _MBUF_H_
> > +
> > +#include <stdint.h>
> > +#include <rte_common.h>
> > +#include <rte_memory.h>
>
> Is it worth to keep an copy of mbuf for standalone purpose?
> Since clang is already supported, I think, if someone need mbuf then
> they can include DPDK headers. Just thinking in maintainability
> perspective.
That would be ideal.
I made a snippet just to avoid compiler errors for bpf target.
Will try to address it in next version.
>
> > diff --git a/test/bpf/t1.c b/test/bpf/t1.c
> > new file mode 100644
> > index 000000000..e587d5e5b
> > --- /dev/null
> > +++ b/test/bpf/t1.c
> > @@ -0,0 +1,54 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2018 Intel Corporation
> > + */
> > +
> > +/*
> > + * eBPF program sample.
> > + * Accepts pointer to first segment packet data as an input parameter.
> > + * analog of tcpdump -s 1 -d 'dst 1.2.3.4 && udp && dst port 5000'
> > + * (000) ldh [12]
> > + * (001) jeq #0x800 jt 2 jf 12
> > + * (002) ld [30]
> > + * (003) jeq #0x1020304 jt 4 jf 12
> > + * (004) ldb [23]
> > + * (005) jeq #0x11 jt 6 jf 12
> > + * (006) ldh [20]
> > + * (007) jset #0x1fff jt 12 jf 8
> > + * (008) ldxb 4*([14]&0xf)
> > + * (009) ldh [x + 16]
> > + * (010) jeq #0x1388 jt 11 jf 12
> > + * (011) ret #1
> > + * (012) ret #0
> > + *
> > + * To compile:
> > + * clang -O2 -DRTE_CACHE_LINE_SIZE=64 -I${RTE_SDK}/${RTE_TARGET}/include \
>
> Does not look like, this application is accessing any DPDK stuff, If so,
> Should we remove -DRTE_CACHE_LINE_SIZE=64 -I${RTE_SDK}/${RTE_TARGET}/include?
Yes, will do.
>
> > + * -target bpf -c t1.c
> > + */
> > +
> > +#include <stdint.h>
> > +#include <net/ethernet.h>
> > +#include <netinet/ip.h>
> > +#include <netinet/udp.h>
> > +
> > +uint64_t
> > +entry(void *pkt)
> > +{
> > + struct ether_header *ether_header = (void *)pkt;
> > +
> > + if (ether_header->ether_type != __builtin_bswap16(0x0800))
> > + return 0;
> > +
> > + struct iphdr *iphdr = (void *)(ether_header + 1);
> > + if (iphdr->protocol != 17 || (iphdr->frag_off & 0x1ffff) != 0 ||
> > + iphdr->daddr != __builtin_bswap32(0x1020304))
> > + return 0;
> > +
> > + int hlen = iphdr->ihl * 4;
> > + struct udphdr *udphdr = (void *)iphdr + hlen;
> > +
> > + if (udphdr->dest != __builtin_bswap16(5000))
> > + return 0;
> > +
> > + return 1;
> > +}
> > +
> > diff --git a/test/bpf/t2.c b/test/bpf/t2.c
> > new file mode 100644
> > index 000000000..6228609c5
> > --- /dev/null
> > +++ b/test/bpf/t2.c
> > @@ -0,0 +1,31 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2018 Intel Corporation
> > + */
> > +
> > +/*
> > + * eBPF program sample.
> > + * Accepts pointer to struct rte_mbuf as an input parameter.
> > + * cleanup mbuf's vlan_tci and all related RX flags
> > + * (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED).
> > + * Doesn't touch contents of packet data.
> > + * To compile:
> > + * clang -O2 -DRTE_CACHE_LINE_SIZE=... -I${RTE_SDK}/${RTE_TARGET}/include \
> > + * -target bpf -Wno-int-to-void-pointer-cast -c t2.c
> > + */
> > +
> > +#include <stdint.h>
> > +#include <stddef.h>
> > +#include "mbuf.h"
>
> Can we take Cache line size from rte_config.h which anyway included in
> mbuf.h through rte_memory.h?
Makes sense, will update in v2.
Konstantin
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 5/5] test: add few eBPF samples
2018-03-13 18:14 ` Ananyev, Konstantin
@ 2018-03-30 17:42 ` Ananyev, Konstantin
2018-04-02 22:26 ` Jerin Jacob
0 siblings, 1 reply; 10+ messages in thread
From: Ananyev, Konstantin @ 2018-03-30 17:42 UTC (permalink / raw)
To: 'Jerin Jacob'; +Cc: 'dev@dpdk.org'
Hi Jerin,
> > > Add few simple eBPF programs as an example.
> > >
> > > Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> > > diff --git a/test/bpf/mbuf.h b/test/bpf/mbuf.h
> > > new file mode 100644
> > > index 000000000..aeef6339d
> > > --- /dev/null
> > > +++ b/test/bpf/mbuf.h
> > > @@ -0,0 +1,556 @@
> > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > + * Copyright(c) 2010-2014 Intel Corporation.
> > > + * Copyright 2014 6WIND S.A.
> > > + */
> > > +
> > > +/*
> > > + * Snipper from dpdk.org rte_mbuf.h.
> > > + * used to provide BPF programs information about rte_mbuf layout.
> > > + */
> > > +
> > > +#ifndef _MBUF_H_
> > > +#define _MBUF_H_
> > > +
> > > +#include <stdint.h>
> > > +#include <rte_common.h>
> > > +#include <rte_memory.h>
> >
> > Is it worth to keep an copy of mbuf for standalone purpose?
> > Since clang is already supported, I think, if someone need mbuf then
> > they can include DPDK headers. Just thinking in maintainability
> > perspective.
>
> That would be ideal.
> I made a snippet just to avoid compiler errors for bpf target.
> Will try to address it in next version.
>
I looked at it a bit more and it seems that it wouldn't be that straightforward as I thought.
There are things not supported by bpf target (thread local-storage and simd related definitions)
inside include chain.
So to fix it some changes in our core include files might be needed .
The simplest way would probably be to move struct rte_mbuf and related macros definitions into a separate
file (rte_mbuf_common.h or so).
Though it is quite controversial change and I think it is better to postpone it till a separate patch and
probably next release.
So for now I left a snipper test/bpf/mbuf.h in place.
Konstantin
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [dpdk-dev] [RFC PATCH 5/5] test: add few eBPF samples
2018-03-30 17:42 ` Ananyev, Konstantin
@ 2018-04-02 22:26 ` Jerin Jacob
0 siblings, 0 replies; 10+ messages in thread
From: Jerin Jacob @ 2018-04-02 22:26 UTC (permalink / raw)
To: Ananyev, Konstantin; +Cc: 'dev@dpdk.org'
-----Original Message-----
> Date: Fri, 30 Mar 2018 17:42:22 +0000
> From: "Ananyev, Konstantin" <konstantin.ananyev@intel.com>
> To: 'Jerin Jacob' <jerin.jacob@caviumnetworks.com>
> CC: "'dev@dpdk.org'" <dev@dpdk.org>
> Subject: RE: [dpdk-dev] [RFC PATCH 5/5] test: add few eBPF samples
>
> Hi Jerin,
> > > > Add few simple eBPF programs as an example.
> > > >
> > > > Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> > > > diff --git a/test/bpf/mbuf.h b/test/bpf/mbuf.h
> > > > new file mode 100644
> > > > index 000000000..aeef6339d
> > > > --- /dev/null
> > > > +++ b/test/bpf/mbuf.h
> > > > @@ -0,0 +1,556 @@
> > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > + * Copyright(c) 2010-2014 Intel Corporation.
> > > > + * Copyright 2014 6WIND S.A.
> > > > + */
> > > > +
> > > > +/*
> > > > + * Snipper from dpdk.org rte_mbuf.h.
> > > > + * used to provide BPF programs information about rte_mbuf layout.
> > > > + */
> > > > +
> > > > +#ifndef _MBUF_H_
> > > > +#define _MBUF_H_
> > > > +
> > > > +#include <stdint.h>
> > > > +#include <rte_common.h>
> > > > +#include <rte_memory.h>
> > >
> > > Is it worth to keep an copy of mbuf for standalone purpose?
> > > Since clang is already supported, I think, if someone need mbuf then
> > > they can include DPDK headers. Just thinking in maintainability
> > > perspective.
> >
> > That would be ideal.
> > I made a snippet just to avoid compiler errors for bpf target.
> > Will try to address it in next version.
> >
>
> I looked at it a bit more and it seems that it wouldn't be that straightforward as I thought.
> There are things not supported by bpf target (thread local-storage and simd related definitions)
> inside include chain.
> So to fix it some changes in our core include files might be needed .
> The simplest way would probably be to move struct rte_mbuf and related macros definitions into a separate
> file (rte_mbuf_common.h or so).
I think, rte_mbuf_common.h should be the way to go. IMO, KNI also benefited with that.
I guess, There is NO ABI change if we move the generic stuff to rte_mbuf_common.h.
But if you think, it is quite controversial change then we could
postpone to next release.(Only my worry is that, once it is postponed it
may not happen). I am fine with either way.
> Though it is quite controversial change and I think it is better to postpone it till a separate patch and
> probably next release.
> So for now I left a snipper test/bpf/mbuf.h in place.
> Konstantin
^ permalink raw reply [flat|nested] 10+ messages in thread