From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by dpdk.org (Postfix) with ESMTP id 890B42C18 for ; Thu, 8 Mar 2018 02:31:47 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by fmsmga101.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 07 Mar 2018 17:31:46 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.47,438,1515484800"; d="scan'208";a="26147476" Received: from sivswdev02.ir.intel.com (HELO localhost.localdomain) ([10.237.217.46]) by fmsmga002.fm.intel.com with ESMTP; 07 Mar 2018 17:31:45 -0800 From: Konstantin Ananyev To: dev@dpdk.org Cc: Konstantin Ananyev Date: Thu, 8 Mar 2018 01:30:00 +0000 Message-Id: <1520472602-1483-4-git-send-email-konstantin.ananyev@intel.com> X-Mailer: git-send-email 1.7.0.7 In-Reply-To: <1520472602-1483-1-git-send-email-konstantin.ananyev@intel.com> References: <1520472602-1483-1-git-send-email-konstantin.ananyev@intel.com> Subject: [dpdk-dev] [RFC PATCH 3/5] bpf: introduce basic RX/TX BPF filters X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 08 Mar 2018 01:31:48 -0000 Introduce API to install BPF based filters on ethdev RX/TX path. Current implementation is pure SW one, based on ethdev RX/TX callback mechanism. Signed-off-by: Konstantin Ananyev --- lib/librte_bpf/Makefile | 2 + lib/librte_bpf/bpf_pkt.c | 524 +++++++++++++++++++++++++++++++++++++ lib/librte_bpf/rte_bpf_ethdev.h | 50 ++++ lib/librte_bpf/rte_bpf_version.map | 4 + 4 files changed, 580 insertions(+) create mode 100644 lib/librte_bpf/bpf_pkt.c create mode 100644 lib/librte_bpf/rte_bpf_ethdev.h diff --git a/lib/librte_bpf/Makefile b/lib/librte_bpf/Makefile index 44b12c439..501c49c60 100644 --- a/lib/librte_bpf/Makefile +++ b/lib/librte_bpf/Makefile @@ -22,6 +22,7 @@ LIBABIVER := 1 SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf.c SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_exec.c SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_load.c +SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_pkt.c SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_validate.c ifeq ($(CONFIG_RTE_ARCH_X86_64),y) SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_jit_x86.c @@ -29,5 +30,6 @@ endif # install header files SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += rte_bpf.h +SYMLINK-$(CONFIG_RTE_LIBRTE_BPF)-include += rte_bpf_ethdev.h include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_bpf/bpf_pkt.c b/lib/librte_bpf/bpf_pkt.c new file mode 100644 index 000000000..b0177ad82 --- /dev/null +++ b/lib/librte_bpf/bpf_pkt.c @@ -0,0 +1,524 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * information about all installed BPF rx/tx callbacks + */ + +struct bpf_eth_cbi { + uint32_t use; /*usage counter */ + void *cb; /* callback handle */ + struct rte_bpf *bpf; + struct rte_bpf_jit jit; +} __rte_cache_aligned; + +/* + * Odd number means that callback is used by datapath. + * Even number means that callback is not used by datapath. + */ +#define BPF_ETH_CBI_INUSE 1 + +static struct bpf_eth_cbi rx_cbi[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT]; +static struct bpf_eth_cbi tx_cbi[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT]; + +/* + * Marks given callback as used by datapath. + */ +static __rte_always_inline void +bpf_eth_cbi_inuse(struct bpf_eth_cbi *cbi) +{ + cbi->use++; + /* make sure no store/load reordering could happen */ + rte_smp_mb(); +} + +/* + * Marks given callback list as not used by datapath. + */ +static __rte_always_inline void +bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi) +{ + /* make sure all previous loads are completed */ + rte_smp_rmb(); + cbi->use++; +} + +/* + * Waits till datapath finished using given callback. + */ +static void +bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi) +{ + uint32_t nuse, puse; + + /* make sure all previous loads and stores are completed */ + rte_smp_mb(); + + puse = cbi->use; + + /* in use, busy wait till current RX/TX iteration is finished */ + if ((puse & BPF_ETH_CBI_INUSE) != 0) { + do { + rte_pause(); + rte_compiler_barrier(); + nuse = cbi->use; + } while (nuse == puse); + } +} + +static void +bpf_eth_cbi_cleanup(struct bpf_eth_cbi *bc) +{ + bc->bpf = NULL; + memset(&bc->jit, 0, sizeof(bc->jit)); +} + +/* + * BPF packet processing routinies. + */ + +static inline uint32_t +apply_filter(struct rte_mbuf *mb[], const uint64_t rc[], uint32_t num, + uint32_t drop) +{ + uint32_t i, j, k; + struct rte_mbuf *dr[num]; + + for (i = 0, j = 0, k = 0; i != num; i++) { + + /* filter matches */ + if (rc[i] != 0) + mb[j++] = mb[i]; + /* no match */ + else + dr[k++] = mb[i]; + } + + if (drop != 0) { + /* free filtered out mbufs */ + for (i = 0; i != k; i++) + rte_pktmbuf_free(dr[i]); + } else { + /* copy filtered out mbufs beyond good ones */ + for (i = 0; i != k; i++) + mb[j + i] = dr[i]; + } + + return j; +} + +static inline uint32_t +pkt_filter_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num, + uint32_t drop) +{ + uint32_t i; + void *dp[num]; + uint64_t rc[num]; + + for (i = 0; i != num; i++) + dp[i] = rte_pktmbuf_mtod(mb[i], void *); + + rte_bpf_exec_burst(bpf, dp, rc, num); + return apply_filter(mb, rc, num, drop); +} + +static inline uint32_t +pkt_filter_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[], + uint32_t num, uint32_t drop) +{ + uint32_t i; + void *dp; + uint64_t rc[num]; + + for (i = 0; i != num; i++) { + dp = rte_pktmbuf_mtod(mb[i], void *); + rc[i] = (jit->func(dp) != 0); + } + + return apply_filter(mb, rc, num, drop); +} + +static inline uint32_t +pkt_filter_mb_vm(const struct rte_bpf *bpf, struct rte_mbuf *mb[], uint32_t num, + uint32_t drop) +{ + uint64_t rc[num]; + + rte_bpf_exec_burst(bpf, (void **)mb, rc, num); + return apply_filter(mb, rc, num, drop); +} + +static inline uint32_t +pkt_filter_mb_jit(const struct rte_bpf_jit *jit, struct rte_mbuf *mb[], + uint32_t num, uint32_t drop) +{ + uint32_t i; + uint64_t rc[num]; + + for (i = 0; i != num; i++) + rc[i] = (jit->func(mb[i]) != 0); + + return apply_filter(mb, rc, num, drop); +} + +/* + * RX/TX callbacks for raw data bpf. + */ + +static uint16_t +bpf_rx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + struct bpf_eth_cbi *cbi; + uint16_t rc; + + cbi = user_param; + + bpf_eth_cbi_inuse(cbi); + rc = (cbi->cb != NULL) ? + pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 1) : + nb_pkts; + bpf_eth_cbi_unuse(cbi); + return rc; +} + +static uint16_t +bpf_rx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + struct bpf_eth_cbi *cbi; + uint16_t rc; + + cbi = user_param; + bpf_eth_cbi_inuse(cbi); + rc = (cbi->cb != NULL) ? + pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 1) : + nb_pkts; + bpf_eth_cbi_unuse(cbi); + return rc; +} + +static uint16_t +bpf_tx_callback_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param) +{ + struct bpf_eth_cbi *cbi; + uint16_t rc; + + cbi = user_param; + bpf_eth_cbi_inuse(cbi); + rc = (cbi->cb != NULL) ? + pkt_filter_vm(cbi->bpf, pkt, nb_pkts, 0) : + nb_pkts; + bpf_eth_cbi_unuse(cbi); + return rc; +} + +static uint16_t +bpf_tx_callback_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param) +{ + struct bpf_eth_cbi *cbi; + uint16_t rc; + + cbi = user_param; + bpf_eth_cbi_inuse(cbi); + rc = (cbi->cb != NULL) ? + pkt_filter_jit(&cbi->jit, pkt, nb_pkts, 0) : + nb_pkts; + bpf_eth_cbi_unuse(cbi); + return rc; +} + +/* + * RX/TX callbacks for mbuf. + */ + +static uint16_t +bpf_rx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + struct bpf_eth_cbi *cbi; + uint16_t rc; + + cbi = user_param; + bpf_eth_cbi_inuse(cbi); + rc = (cbi->cb != NULL) ? + pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 1) : + nb_pkts; + bpf_eth_cbi_unuse(cbi); + return rc; +} + +static uint16_t +bpf_rx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + struct bpf_eth_cbi *cbi; + uint16_t rc; + + cbi = user_param; + bpf_eth_cbi_inuse(cbi); + rc = (cbi->cb != NULL) ? + pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 1) : + nb_pkts; + bpf_eth_cbi_unuse(cbi); + return rc; +} + +static uint16_t +bpf_tx_callback_mb_vm(__rte_unused uint16_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param) +{ + struct bpf_eth_cbi *cbi; + uint16_t rc; + + cbi = user_param; + bpf_eth_cbi_inuse(cbi); + rc = (cbi->cb != NULL) ? + pkt_filter_mb_vm(cbi->bpf, pkt, nb_pkts, 0) : + nb_pkts; + bpf_eth_cbi_unuse(cbi); + return rc; +} + +static uint16_t +bpf_tx_callback_mb_jit(__rte_unused uint16_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, void *user_param) +{ + struct bpf_eth_cbi *cbi; + uint16_t rc; + + cbi = user_param; + bpf_eth_cbi_inuse(cbi); + rc = (cbi->cb != NULL) ? + pkt_filter_mb_jit(&cbi->jit, pkt, nb_pkts, 0) : + nb_pkts; + bpf_eth_cbi_unuse(cbi); + return rc; +} + +static rte_rx_callback_fn +select_rx_callback(enum rte_bpf_prog_type ptype, uint32_t flags) +{ + if (flags & RTE_BPF_ETH_F_JIT) { + if (ptype == RTE_BPF_PROG_TYPE_UNSPEC) + return bpf_rx_callback_jit; + else if (ptype == RTE_BPF_PROG_TYPE_MBUF) + return bpf_rx_callback_mb_jit; + } else if (ptype == RTE_BPF_PROG_TYPE_UNSPEC) + return bpf_rx_callback_vm; + else if (ptype == RTE_BPF_PROG_TYPE_MBUF) + return bpf_rx_callback_mb_vm; + + return NULL; +} + +static rte_tx_callback_fn +select_tx_callback(enum rte_bpf_prog_type ptype, uint32_t flags) +{ + if (flags & RTE_BPF_ETH_F_JIT) { + if (ptype == RTE_BPF_PROG_TYPE_UNSPEC) + return bpf_tx_callback_jit; + else if (ptype == RTE_BPF_PROG_TYPE_MBUF) + return bpf_tx_callback_mb_jit; + } else if (ptype == RTE_BPF_PROG_TYPE_UNSPEC) + return bpf_tx_callback_vm; + else if (ptype == RTE_BPF_PROG_TYPE_MBUF) + return bpf_tx_callback_mb_vm; + + return NULL; +} + +/* + * helper function to perform BPF unload for given port/queue. + * have to introduce extra complexity (and slowdown) here, + * as right now there is no safe generic way to remove RX/TX callback + * while IO is active. + * Still don't free memory allocated for callback handle itself, + * again right now there is no safe way to do that without stopping RX/TX + * on given port/queue first. + */ +static void +bpf_eth_unload(struct bpf_eth_cbi *bc) +{ + /* mark this cbi as empty */ + bc->cb = NULL; + rte_smp_mb(); + + /* make sure datapath doesn't use bpf anymore, then destroy bpf */ + bpf_eth_cbi_wait(bc); + rte_bpf_destroy(bc->bpf); + bpf_eth_cbi_cleanup(bc); +} + +__rte_experimental void +rte_bpf_eth_rx_unload(uint16_t port, uint16_t queue) +{ + struct bpf_eth_cbi *bc; + void *cb; + + bc = &rx_cbi[port][queue]; + cb = bc->cb; + + if (cb == NULL) + return; + + rte_eth_remove_rx_callback(port, queue, cb); + bpf_eth_unload(bc); +} + +__rte_experimental void +rte_bpf_eth_tx_unload(uint16_t port, uint16_t queue) +{ + struct bpf_eth_cbi *bc; + void *cb; + + bc = &tx_cbi[port][queue]; + cb = bc->cb; + + if (cb == NULL) + return; + + rte_eth_remove_tx_callback(port, queue, cb); + bpf_eth_unload(bc); +} + +__rte_experimental int +rte_bpf_eth_rx_elf_load(uint16_t port, uint16_t queue, + const struct rte_bpf_prm *prm, const char *fname, const char *sname, + uint32_t flags) +{ + int32_t rc; + struct bpf_eth_cbi *bc; + struct rte_bpf *bpf; + rte_rx_callback_fn fn; + + if (prm == NULL) + return -EINVAL; + + /* remove old one, if any */ + rte_bpf_eth_rx_unload(port, queue); + + fn = select_rx_callback(prm->prog_type, flags); + if (fn == NULL) { + RTE_LOG(ERR, USER1, "%s(%u, %u): no callback selected;\n", + __func__, port, queue); + return -EINVAL; + } + + bpf = rte_bpf_elf_load(prm, fname, sname); + if (bpf == NULL) + return -rte_errno; + + /* update global callback info */ + bc = &rx_cbi[port][queue]; + bc->bpf = bpf; + rte_bpf_get_jit(bpf, &bc->jit); + + rc = 0; + + if ((flags & RTE_BPF_ETH_F_JIT) != 0 && bc->jit.func == NULL) { + RTE_LOG(ERR, USER1, "%s(%u, %u): no JIT generated;\n", + __func__, port, queue); + rc = -EINVAL; + } else { + bc->cb = rte_eth_add_rx_callback(port, queue, fn, bc); + if (bc->cb == NULL) + rc = -rte_errno; + } + + if (rc != 0) { + rte_bpf_destroy(bpf); + bpf_eth_cbi_cleanup(bc); + } + + return rc; +} + +__rte_experimental int +rte_bpf_eth_tx_elf_load(uint16_t port, uint16_t queue, + const struct rte_bpf_prm *prm, const char *fname, const char *sname, + uint32_t flags) +{ + int32_t rc; + struct bpf_eth_cbi *bc; + struct rte_bpf *bpf; + rte_tx_callback_fn fn; + + if (prm == NULL) + return -EINVAL; + + /* remove old one, if any */ + rte_bpf_eth_tx_unload(port, queue); + + fn = select_tx_callback(prm->prog_type, flags); + if (fn == NULL) { + RTE_LOG(ERR, USER1, "%s(%u, %u): no callback selected;\n", + __func__, port, queue); + return -EINVAL; + } + + bpf = rte_bpf_elf_load(prm, fname, sname); + if (bpf == NULL) + return -rte_errno; + + /* update global callback info */ + bc = &tx_cbi[port][queue]; + bc->bpf = bpf; + rte_bpf_get_jit(bpf, &bc->jit); + + rc = 0; + + if ((flags & RTE_BPF_ETH_F_JIT) != 0 && bc->jit.func == NULL) { + RTE_LOG(ERR, USER1, "%s(%u, %u): no JIT generated;\n", + __func__, port, queue); + rc = -EINVAL; + } else { + bc->cb = rte_eth_add_tx_callback(port, queue, fn, bc); + if (bc->cb == NULL) + rc = -rte_errno; + } + + if (rc != 0) { + rte_bpf_destroy(bpf); + bpf_eth_cbi_cleanup(bc); + } + + return rc; +} diff --git a/lib/librte_bpf/rte_bpf_ethdev.h b/lib/librte_bpf/rte_bpf_ethdev.h new file mode 100644 index 000000000..abc3b8e5f --- /dev/null +++ b/lib/librte_bpf/rte_bpf_ethdev.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef _RTE_BPF_ETHDEV_H_ +#define _RTE_BPF_ETHDEV_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + RTE_BPF_ETH_F_NONE = 0, + RTE_BPF_ETH_F_JIT = 0x1, /*< compile BPF into native ISA */ +}; + +/* + * API to install BPF filter as RX/TX callbacks for eth devices. + * Note that right now: + * - it is not MT safe, i.e. it is not allowed to do load/unload for the + * same port/queue from different threads in parallel. + * - though it allows to do load/unload at runtime + * (while RX/TX is ongoing on given port/queue). + * - allows only one BPF program per port/queue, + * i.e. new load will replace previously loaded for that port/queue BPF program. + * Filter behaviour - if BPF program returns zero value for a given packet, + * then it will be dropped inside callback and no further processing + * on RX - it will be dropped inside callback and no further processing + * for that packet will happen. + * on TX - packet will remain unsent, and it is responsibility of the user + * to handle such situation (drop, try to send again, etc.). + */ + +void rte_bpf_eth_rx_unload(uint16_t port, uint16_t queue); +void rte_bpf_eth_tx_unload(uint16_t port, uint16_t queue); + +int rte_bpf_eth_rx_elf_load(uint16_t port, uint16_t queue, + const struct rte_bpf_prm *prm, const char *fname, const char *sname, + uint32_t flags); +int rte_bpf_eth_tx_elf_load(uint16_t port, uint16_t queue, + const struct rte_bpf_prm *prm, const char *fname, const char *sname, + uint32_t flags); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_BPF_ETHDEV_H_ */ diff --git a/lib/librte_bpf/rte_bpf_version.map b/lib/librte_bpf/rte_bpf_version.map index ff65144df..a203e088e 100644 --- a/lib/librte_bpf/rte_bpf_version.map +++ b/lib/librte_bpf/rte_bpf_version.map @@ -3,6 +3,10 @@ EXPERIMENTAL { rte_bpf_destroy; rte_bpf_elf_load; + rte_bpf_eth_rx_elf_load; + rte_bpf_eth_rx_unload; + rte_bpf_eth_tx_elf_load; + rte_bpf_eth_tx_unload; rte_bpf_exec; rte_bpf_exec_burst; rte_bpf_get_jit; -- 2.13.6