DPDK patches and discussions
 help / color / mirror / Atom feed
From: Stephen Hemminger <stephen@networkplumber.org>
To: dev@dpdk.org
Cc: Stephen Hemminger <stephen@networkplumber.org>
Subject: [dpdk-dev] [RFC 5/8] pdump: add classic BPF filtering
Date: Mon,  7 Oct 2019 09:52:29 -0700	[thread overview]
Message-ID: <20191007165232.14535-6-stephen@networkplumber.org> (raw)
In-Reply-To: <20191007165232.14535-1-stephen@networkplumber.org>

Simple classic BPF interpreter based off of libpcap.

This is a copy of the BPF interpreter from libpcap which is
modified to handle mbuf meta data. The existing pcap_offline_filter
does not expose a way to match VLAN tags. Copying the BPF interpreter
also means that rte_pdump still does not have a hard dependency
on libpcap.

The API for pdump is versioned because the filter needs to
know both byte code and length of program to validate it.

This patch does cause a small checkpatch warning because
it keeps the original variable names from the pcap code.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 app/pdump/main.c                       |  16 +-
 app/test/test_pdump.c                  |   4 +-
 lib/librte_pdump/Makefile              |   2 +-
 lib/librte_pdump/pdump_bpf.h           | 168 +++++++++
 lib/librte_pdump/rte_pcap_filter.c     | 462 +++++++++++++++++++++++++
 lib/librte_pdump/rte_pdump.c           | 145 ++++++--
 lib/librte_pdump/rte_pdump.h           |  54 ++-
 lib/librte_pdump/rte_pdump_version.map |   7 +
 8 files changed, 806 insertions(+), 52 deletions(-)
 create mode 100644 lib/librte_pdump/pdump_bpf.h
 create mode 100644 lib/librte_pdump/rte_pcap_filter.c

diff --git a/app/pdump/main.c b/app/pdump/main.c
index c1b901279f4b..c3eb554ef28b 100644
--- a/app/pdump/main.c
+++ b/app/pdump/main.c
@@ -828,20 +828,20 @@ enable_pdump(void)
 						pt->queue,
 						RTE_PDUMP_FLAG_RX,
 						pt->rx_ring,
-						pt->mp, NULL);
+						pt->mp, NULL, 0);
 				ret1 = rte_pdump_enable_by_deviceid(
 						pt->device_id,
 						pt->queue,
 						RTE_PDUMP_FLAG_TX,
 						pt->tx_ring,
-						pt->mp, NULL);
+						pt->mp, NULL, 0);
 			} else if (pt->dump_by_type == PORT_ID) {
 				ret = rte_pdump_enable(pt->port, pt->queue,
 						RTE_PDUMP_FLAG_RX,
-						pt->rx_ring, pt->mp, NULL);
+						pt->rx_ring, pt->mp, NULL, 0);
 				ret1 = rte_pdump_enable(pt->port, pt->queue,
 						RTE_PDUMP_FLAG_TX,
-						pt->tx_ring, pt->mp, NULL);
+						pt->tx_ring, pt->mp, NULL, 0);
 			}
 		} else if (pt->dir == RTE_PDUMP_FLAG_RX) {
 			if (pt->dump_by_type == DEVICE_ID)
@@ -849,22 +849,22 @@ enable_pdump(void)
 						pt->device_id,
 						pt->queue,
 						pt->dir, pt->rx_ring,
-						pt->mp, NULL);
+						pt->mp, NULL, 0);
 			else if (pt->dump_by_type == PORT_ID)
 				ret = rte_pdump_enable(pt->port, pt->queue,
 						pt->dir,
-						pt->rx_ring, pt->mp, NULL);
+						pt->rx_ring, pt->mp, NULL, 0);
 		} else if (pt->dir == RTE_PDUMP_FLAG_TX) {
 			if (pt->dump_by_type == DEVICE_ID)
 				ret = rte_pdump_enable_by_deviceid(
 						pt->device_id,
 						pt->queue,
 						pt->dir,
-						pt->tx_ring, pt->mp, NULL);
+						pt->tx_ring, pt->mp, NULL, 0);
 			else if (pt->dump_by_type == PORT_ID)
 				ret = rte_pdump_enable(pt->port, pt->queue,
 						pt->dir,
-						pt->tx_ring, pt->mp, NULL);
+						pt->tx_ring, pt->mp, NULL, 0);
 		}
 		if (ret < 0 || ret1 < 0) {
 			cleanup_pdump_resources();
diff --git a/app/test/test_pdump.c b/app/test/test_pdump.c
index af206968b38d..f0187a4cd279 100644
--- a/app/test/test_pdump.c
+++ b/app/test/test_pdump.c
@@ -79,7 +79,7 @@ run_pdump_client_tests(void)
 
 	for (itr = 0; itr < NUM_ITR; itr++) {
 		ret = rte_pdump_enable(portid, QUEUE_ID, flags, ring_client,
-				       mp, NULL);
+				       mp, NULL, 0);
 		if (ret < 0) {
 			printf("rte_pdump_enable failed\n");
 			return -1;
@@ -94,7 +94,7 @@ run_pdump_client_tests(void)
 		printf("pdump_disable success\n");
 
 		ret = rte_pdump_enable_by_deviceid(deviceid, QUEUE_ID, flags,
-						   ring_client, mp, NULL);
+						   ring_client, mp, NULL, 0);
 		if (ret < 0) {
 			printf("rte_pdump_enable_by_deviceid failed\n");
 			return -1;
diff --git a/lib/librte_pdump/Makefile b/lib/librte_pdump/Makefile
index 89593689a7d5..4a631c06a0ec 100644
--- a/lib/librte_pdump/Makefile
+++ b/lib/librte_pdump/Makefile
@@ -15,7 +15,7 @@ EXPORT_MAP := rte_pdump_version.map
 LIBABIVER := 3
 
 # all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_PDUMP) := rte_pdump.c
+SRCS-$(CONFIG_RTE_LIBRTE_PDUMP) := rte_pdump.c rte_pcap_filter.c
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_PDUMP)-include := rte_pdump.h
diff --git a/lib/librte_pdump/pdump_bpf.h b/lib/librte_pdump/pdump_bpf.h
new file mode 100644
index 000000000000..8f6d00f3cee2
--- /dev/null
+++ b/lib/librte_pdump/pdump_bpf.h
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This is code is derived from the libpcap bpf_filter which
+ * in turn is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ */
+
+#ifndef _PDUMP_BPF_H_
+#define _PDUMP_BPF_H__
+
+/*
+ * This is based off of libpcap's cut-down version of bpf.h;
+ * it includes only  the stuff needed for the BPF interpreter.
+ *
+ * Note: this is the original classic BPF generated by libpcap
+ *  not the new eBPF used elsewhere.
+ */
+
+typedef	int bpf_int32;
+typedef	unsigned int bpf_u_int32;
+
+/*
+ * Alignment macros.  BPF_WORDALIGN rounds up to the next
+ * even multiple of BPF_ALIGNMENT.
+ */
+#define BPF_ALIGNMENT sizeof(bpf_int32)
+#define BPF_WORDALIGN(x) (((x)+(BPF_ALIGNMENT-1))&~(BPF_ALIGNMENT-1))
+
+/*
+ * Number of scratch memory words (for BPF_LD|BPF_MEM and BPF_ST).
+ */
+#define BPF_MEMWORDS 16
+
+/*
+ * The instruction encodings.
+ *
+ * Please inform tcpdump-workers@lists.tcpdump.org if you use any
+ * of the reserved values, so that we can note that they're used
+ * (and perhaps implement it in the reference BPF implementation
+ * and encourage its implementation elsewhere).
+ */
+
+/*
+ * The upper 8 bits of the opcode aren't used. BSD/OS used 0x8000.
+ */
+
+/* instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define		BPF_LD		0x00
+#define		BPF_LDX		0x01
+#define		BPF_ST		0x02
+#define		BPF_STX		0x03
+#define		BPF_ALU		0x04
+#define		BPF_JMP		0x05
+#define		BPF_RET		0x06
+#define		BPF_MISC	0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code)	((code) & 0x18)
+#define		BPF_W		0x00
+#define		BPF_H		0x08
+#define		BPF_B		0x10
+/*				0x18	reserved; used by BSD/OS */
+#define BPF_MODE(code)	((code) & 0xe0)
+#define		BPF_IMM		0x00
+#define		BPF_ABS		0x20
+#define		BPF_IND		0x40
+#define		BPF_MEM		0x60
+#define		BPF_LEN		0x80
+#define		BPF_MSH		0xa0
+/*				0xc0	reserved; used by BSD/OS */
+/*				0xe0	reserved; used by BSD/OS */
+
+/* alu/jmp fields */
+#define BPF_OP(code)	((code) & 0xf0)
+#define		BPF_ADD		0x00
+#define		BPF_SUB		0x10
+#define		BPF_MUL		0x20
+#define		BPF_DIV		0x30
+#define		BPF_OR		0x40
+#define		BPF_AND		0x50
+#define		BPF_LSH		0x60
+#define		BPF_RSH		0x70
+#define		BPF_NEG		0x80
+#define		BPF_MOD		0x90
+#define		BPF_XOR		0xa0
+/*				0xb0	reserved */
+/*				0xc0	reserved */
+/*				0xd0	reserved */
+/*				0xe0	reserved */
+/*				0xf0	reserved */
+
+#define		BPF_JA		0x00
+#define		BPF_JEQ		0x10
+#define		BPF_JGT		0x20
+#define		BPF_JGE		0x30
+#define		BPF_JSET	0x40
+/*				0x50	reserved; used on BSD/OS */
+/*				0x60	reserved */
+/*				0x70	reserved */
+/*				0x80	reserved */
+/*				0x90	reserved */
+/*				0xa0	reserved */
+/*				0xb0	reserved */
+/*				0xc0	reserved */
+/*				0xd0	reserved */
+/*				0xe0	reserved */
+/*				0xf0	reserved */
+#define BPF_SRC(code)	((code) & 0x08)
+#define		BPF_K		0x00
+#define		BPF_X		0x08
+
+/* ret - BPF_K and BPF_X also apply */
+#define BPF_RVAL(code)	((code) & 0x18)
+#define		BPF_A		0x10
+/*				0x18	reserved */
+
+/* misc */
+#define BPF_MISCOP(code) ((code) & 0xf8)
+#define		BPF_TAX		0x00
+/*				0x08	reserved */
+/*				0x10	reserved */
+/*				0x18	reserved */
+/* #define	BPF_COP		0x20	NetBSD "coprocessor" extensions */
+/*				0x28	reserved */
+/*				0x30	reserved */
+/*				0x38	reserved */
+/* #define	BPF_COPX	0x40	NetBSD "coprocessor" extensions */
+/*					also used on BSD/OS */
+/*				0x48	reserved */
+/*				0x50	reserved */
+/*				0x58	reserved */
+/*				0x60	reserved */
+/*				0x68	reserved */
+/*				0x70	reserved */
+/*				0x78	reserved */
+#define		BPF_TXA		0x80
+/*				0x88	reserved */
+/*				0x90	reserved */
+/*				0x98	reserved */
+/*				0xa0	reserved */
+/*				0xa8	reserved */
+/*				0xb0	reserved */
+/*				0xb8	reserved */
+/*				0xc0	reserved; used on BSD/OS */
+/*				0xc8	reserved */
+/*				0xd0	reserved */
+/*				0xd8	reserved */
+/*				0xe0	reserved */
+/*				0xe8	reserved */
+/*				0xf0	reserved */
+/*				0xf8	reserved */
+
+/*
+ * The instruction data structure.
+ */
+struct bpf_insn {
+	u_short	code;
+	u_char	jt;
+	u_char	jf;
+	bpf_u_int32 k;
+};
+
+#endif /* _PDUMP_BPF_H_ */
diff --git a/lib/librte_pdump/rte_pcap_filter.c b/lib/librte_pdump/rte_pcap_filter.c
new file mode 100644
index 000000000000..1d8caeee6628
--- /dev/null
+++ b/lib/librte_pdump/rte_pcap_filter.c
@@ -0,0 +1,462 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This is code is derived from the libpcap bpf_filter which
+ * in turn is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_pdump.h>
+
+#include "pdump_bpf.h"
+
+/* These magic values are used to do negative offset to find vlan */
+#define SKF_AD_OFF    (-0x1000)
+#define SKF_AD_VLAN_TAG	44
+#define SKF_AD_VLAN_TAG_PRESENT 48
+
+#define EXTRACT32(p) rte_be_to_cpu_32(*(const unaligned_uint32_t *)(p))
+#define EXTRACT16(p) rte_be_to_cpu_16(*(const unaligned_uint16_t *)(p))
+
+static inline u_short vlan_present(const struct rte_mbuf *m)
+{
+	return	(m->ol_flags & (PKT_TX_VLAN|PKT_RX_VLAN_STRIPPED)) != 0;
+}
+
+/*
+ * Execute the filter program starting at pc on the packet p
+ * wirelen is the length of the original packet
+ * buflen is the amount of data present
+ * aux_data is auxiliary data, currently used only when interpreting
+ * filters intended for the Linux kernel in cases where the kernel
+ * rejects the filter; it contains VLAN tag information
+ * For the kernel, p is assumed to be a pointer to an mbuf if buflen is 0,
+ * in all other cases, p is a pointer to a buffer and buflen is its size.
+ *
+ * Thanks to Ani Sinha <ani@arista.com> for providing initial implementation
+ */
+int
+rte_pcap_filter(const void *filter, const struct rte_mbuf *m)
+{
+	const struct bpf_insn *pc = filter;
+	uint32_t buflen = rte_pktmbuf_data_len(m);
+	uint32_t wirelen = rte_pktmbuf_pkt_len(m);
+	const uint8_t *p = rte_pktmbuf_mtod(m, const uint8_t *);
+	uint32_t A, X;
+	bpf_u_int32 k;
+	uint32_t mem[BPF_MEMWORDS];
+
+	/* No filter means accept all. */
+	if (pc == NULL)
+		return -1;
+
+	A = 0;
+	X = 0;
+	--pc;
+	for (;;) {
+		++pc;
+
+		switch (pc->code) {
+		default:
+			/* this must be caught by validation */
+			rte_panic("invalid BPF opcode\n");
+			return 0;
+
+		case BPF_RET|BPF_K:
+			return pc->k;
+
+		case BPF_RET|BPF_A:
+			return A;
+
+		case BPF_LD|BPF_W|BPF_ABS:
+			k = pc->k;
+			if (k > buflen || sizeof(int32_t) > buflen - k)
+				return 0;
+
+			A = EXTRACT32(&p[k]);
+			continue;
+
+		case BPF_LD|BPF_H|BPF_ABS:
+			k = pc->k;
+			if (k > buflen || sizeof(int16_t) > buflen - k)
+				return 0;
+
+			A = EXTRACT16(&p[k]);
+			continue;
+
+		case BPF_LD|BPF_B|BPF_ABS:
+			switch (pc->k) {
+			case SKF_AD_OFF + SKF_AD_VLAN_TAG:
+				A = m->vlan_tci;
+				break;
+			case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
+				A = vlan_present(m);
+				break;
+			default:
+				k = pc->k;
+				if (k >= buflen)
+					return 0;
+
+				A = p[k];
+				break;
+			}
+			continue;
+
+		case BPF_LD|BPF_W|BPF_LEN:
+			A = wirelen;
+			continue;
+
+		case BPF_LDX|BPF_W|BPF_LEN:
+			X = wirelen;
+			continue;
+
+		case BPF_LD|BPF_W|BPF_IND:
+			k = X + pc->k;
+			if (pc->k > buflen || X > buflen - pc->k ||
+			    sizeof(int32_t) > buflen - k) {
+				return 0;
+			}
+			A = EXTRACT32(&p[k]);
+			continue;
+
+		case BPF_LD|BPF_H|BPF_IND:
+			k = X + pc->k;
+			if (X > buflen ||
+			    pc->k > buflen - X ||
+			    sizeof(int16_t) > buflen - k)
+				return 0;
+
+			A = EXTRACT16(&p[k]);
+			continue;
+
+		case BPF_LD|BPF_B|BPF_IND:
+			k = X + pc->k;
+			if (pc->k >= buflen || X >= buflen - pc->k)
+				return 0;
+
+			A = p[k];
+			continue;
+
+		case BPF_LDX|BPF_MSH|BPF_B:
+			k = pc->k;
+			if (k >= buflen)
+				return 0;
+
+			X = (p[pc->k] & 0xf) << 2;
+			continue;
+
+		case BPF_LD|BPF_IMM:
+			A = pc->k;
+			continue;
+
+		case BPF_LDX|BPF_IMM:
+			X = pc->k;
+			continue;
+
+		case BPF_LD|BPF_MEM:
+			A = mem[pc->k];
+			continue;
+
+		case BPF_LDX|BPF_MEM:
+			X = mem[pc->k];
+			continue;
+
+		case BPF_ST:
+			mem[pc->k] = A;
+			continue;
+
+		case BPF_STX:
+			mem[pc->k] = X;
+			continue;
+
+		case BPF_JMP|BPF_JA:
+			/*
+			 * XXX - we currently implement "ip6 protochain"
+			 * with backward jumps, so sign-extend pc->k.
+			 */
+			pc += (bpf_int32)pc->k;
+			continue;
+
+		case BPF_JMP|BPF_JGT|BPF_K:
+			pc += (pc->k < A) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JGE|BPF_K:
+			pc += (pc->k <= A) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JEQ|BPF_K:
+			pc += (pc->k == A) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JSET|BPF_K:
+			pc += (A & pc->k) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JGT|BPF_X:
+			pc += (A > X) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JGE|BPF_X:
+			pc += (A >= X) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JEQ|BPF_X:
+			pc += (A == X) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JSET|BPF_X:
+			pc += (A & X) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_ALU|BPF_ADD|BPF_X:
+			A += X;
+			continue;
+
+		case BPF_ALU|BPF_SUB|BPF_X:
+			A -= X;
+			continue;
+
+		case BPF_ALU|BPF_MUL|BPF_X:
+			A *= X;
+			continue;
+
+		case BPF_ALU|BPF_DIV|BPF_X:
+			if (X == 0)
+				return 0;
+			A /= X;
+			continue;
+
+		case BPF_ALU|BPF_MOD|BPF_X:
+			if (X == 0)
+				return 0;
+			A %= X;
+			continue;
+
+		case BPF_ALU|BPF_AND|BPF_X:
+			A &= X;
+			continue;
+
+		case BPF_ALU|BPF_OR|BPF_X:
+			A |= X;
+			continue;
+
+		case BPF_ALU|BPF_XOR|BPF_X:
+			A ^= X;
+			continue;
+
+		case BPF_ALU|BPF_LSH|BPF_X:
+			if (X < 32)
+				A <<= X;
+			else
+				A = 0;
+			continue;
+
+		case BPF_ALU|BPF_RSH|BPF_X:
+			if (X < 32)
+				A >>= X;
+			else
+				A = 0;
+			continue;
+
+		case BPF_ALU|BPF_ADD|BPF_K:
+			A += pc->k;
+			continue;
+
+		case BPF_ALU|BPF_SUB|BPF_K:
+			A -= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_MUL|BPF_K:
+			A *= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_DIV|BPF_K:
+			A /= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_MOD|BPF_K:
+			A %= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_AND|BPF_K:
+			A &= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_OR|BPF_K:
+			A |= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_XOR|BPF_K:
+			A ^= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_LSH|BPF_K:
+			A <<= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_RSH|BPF_K:
+			A >>= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_NEG:
+			/*
+			 * Most BPF arithmetic is unsigned, but negation
+			 * can't be unsigned; respecify it as subtracting
+			 * the accumulator from 0U, so that 1) we don't
+			 * get compiler warnings about negating an unsigned
+			 * value and 2) don't get UBSan warnings about
+			 * the result of negating 0x80000000 being undefined.
+			 */
+			A = (0U - A);
+			continue;
+
+		case BPF_MISC|BPF_TAX:
+			X = A;
+			continue;
+
+		case BPF_MISC|BPF_TXA:
+			A = X;
+			continue;
+		}
+	}
+}
+
+/*
+ * Return true if the 'fcode' is a valid filter program.
+ * The constraints are that each jump be forward and to a valid
+ * code, that memory accesses are within valid ranges (to the
+ * extent that this can be checked statically; loads of packet
+ * data have to be, and are, also checked at run time), and that
+ * the code terminates with either an accept or reject.
+ */
+int
+rte_pcap_validate_filter(const void *filter, uint32_t len)
+{
+	const struct bpf_insn *f = filter;
+	unsigned int i, from;
+
+	if (len < 1)
+		return 0;
+
+	for (i = 0; i < len; ++i) {
+		const struct bpf_insn *p = &f[i];
+
+		switch (BPF_CLASS(p->code)) {
+		/*
+		 * Check that memory operations use valid addresses.
+		 */
+		case BPF_LD:
+		case BPF_LDX:
+			switch (BPF_MODE(p->code)) {
+			case BPF_IMM:
+				break;
+			case BPF_ABS:
+			case BPF_IND:
+			case BPF_MSH:
+				/*
+				 * There's no maximum packet data size
+				 * in userland.  The runtime packet length
+				 * check suffices.
+				 */
+				break;
+			case BPF_MEM:
+				if (p->k >= BPF_MEMWORDS)
+					return 0;
+				break;
+			case BPF_LEN:
+				break;
+			default:
+				return 0;
+			}
+			break;
+		case BPF_ST:
+		case BPF_STX:
+			if (p->k >= BPF_MEMWORDS)
+				return 0;
+			break;
+		case BPF_ALU:
+			switch (BPF_OP(p->code)) {
+			case BPF_ADD:
+			case BPF_SUB:
+			case BPF_MUL:
+			case BPF_OR:
+			case BPF_AND:
+			case BPF_XOR:
+			case BPF_LSH:
+			case BPF_RSH:
+			case BPF_NEG:
+				break;
+			case BPF_DIV:
+			case BPF_MOD:
+				/*
+				 * Check for constant division or modulus
+				 * by 0.
+				 */
+				if (BPF_SRC(p->code) == BPF_K && p->k == 0)
+					return 0;
+				break;
+			default:
+				return 0;
+			}
+			break;
+		case BPF_JMP:
+			/*
+			 * Check that jumps are within the code block,
+			 * and that unconditional branches don't go
+			 * backwards as a result of an overflow.
+			 * Unconditional branches have a 32-bit offset,
+			 * so they could overflow; we check to make
+			 * sure they don't.  Conditional branches have
+			 * an 8-bit offset, and the from address is <=
+			 * BPF_MAXINSNS, and we assume that BPF_MAXINSNS
+			 * is sufficiently small that adding 255 to it
+			 * won't overflow.
+			 *
+			 * We know that len is <= BPF_MAXINSNS, and we
+			 * assume that BPF_MAXINSNS is < the maximum size
+			 * of a unsigned int, so that i + 1 doesn't overflow.
+			 *
+			 * For userland, we don't know that the from
+			 * or len are <= BPF_MAXINSNS, but we know that
+			 * from <= len, and, except on a 64-bit system,
+			 * it's unlikely that len, if it truly reflects
+			 * the size of the program we've been handed,
+			 * will be anywhere near the maximum size of
+			 * a unsigned int.  We also don't check for backward
+			 * branches, as we currently support them in
+			 * userland for the protochain operation.
+			 */
+			from = i + 1;
+			switch (BPF_OP(p->code)) {
+			case BPF_JA:
+				if (from + p->k >= (unsigned int)len)
+					return 0;
+				break;
+			case BPF_JEQ:
+			case BPF_JGT:
+			case BPF_JGE:
+			case BPF_JSET:
+				if (from + p->jt >= (unsigned int)len ||
+				    from + p->jf >= (unsigned int)len)
+					return 0;
+				break;
+			default:
+				return 0;
+			}
+			break;
+		case BPF_RET:
+			break;
+		case BPF_MISC:
+			break;
+		default:
+			return 0;
+		}
+	}
+
+	return BPF_CLASS(f[len - 1].code) == BPF_RET;
+}
diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c
index 41f2ec17a26b..1206671c6f60 100644
--- a/lib/librte_pdump/rte_pdump.c
+++ b/lib/librte_pdump/rte_pdump.c
@@ -8,11 +8,13 @@
 #include <rte_lcore.h>
 #include <rte_log.h>
 #include <rte_errno.h>
+#include <rte_malloc.h>
 #include <rte_string_fns.h>
 
 #include "rte_pdump.h"
 
 #define DEVICE_ID_SIZE 64
+#define BPF_INS_SIZE sizeof(uint64_t)
 
 /* Macro for printing using RTE_LOG */
 static int pdump_logtype;
@@ -32,6 +34,8 @@ enum pdump_version {
 	V1 = 1
 };
 
+#define PDUMP_FILTER_V1	0x7064756d7066696c
+
 struct pdump_request {
 	uint16_t ver;
 	uint16_t op;
@@ -42,14 +46,14 @@ struct pdump_request {
 			uint16_t queue;
 			struct rte_ring *ring;
 			struct rte_mempool *mp;
-			void *filter;
+			const void *filter;
 		} en_v1;
 		struct disable_v1 {
 			char device[DEVICE_ID_SIZE];
 			uint16_t queue;
 			struct rte_ring *ring;
 			struct rte_mempool *mp;
-			void *filter;
+			const void *filter;
 		} dis_v1;
 	} data;
 };
@@ -64,7 +68,7 @@ static struct pdump_rxtx_cbs {
 	struct rte_ring *ring;
 	struct rte_mempool *mp;
 	const struct rte_eth_rxtx_callback *cb;
-	void *filter;
+	const void *filter;
 } rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT],
 tx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
 
@@ -86,6 +90,9 @@ pdump_copy(uint16_t port, struct rte_mbuf **pkts,
 	ring = cbs->ring;
 	mp = cbs->mp;
 	for (i = 0; i < nb_pkts; i++) {
+		if (rte_pcap_filter(cbs->filter, pkts[i]) == 0)
+			continue;
+
 		p = rte_pktmbuf_copy(pkts[i], mp, 0, UINT32_MAX);
 		if (p) {
 			p->port = port;
@@ -124,8 +131,8 @@ pdump_tx(uint16_t port, uint16_t qidx __rte_unused,
 
 static int
 pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue,
-				struct rte_ring *ring, struct rte_mempool *mp,
-				uint16_t operation)
+			    struct rte_ring *ring, struct rte_mempool *mp,
+			    uint16_t operation, const void *filter)
 {
 	uint16_t qid;
 	struct pdump_rxtx_cbs *cbs = NULL;
@@ -143,6 +150,7 @@ pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue,
 			}
 			cbs->ring = ring;
 			cbs->mp = mp;
+			cbs->filter = filter;
 			cbs->cb = rte_eth_add_first_rx_callback(port, qid,
 								pdump_rx, cbs);
 			if (cbs->cb == NULL) {
@@ -178,8 +186,8 @@ pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue,
 
 static int
 pdump_register_tx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue,
-				struct rte_ring *ring, struct rte_mempool *mp,
-				uint16_t operation)
+			    struct rte_ring *ring, struct rte_mempool *mp,
+			    uint16_t operation, const void *filter)
 {
 
 	uint16_t qid;
@@ -198,6 +206,7 @@ pdump_register_tx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue,
 			}
 			cbs->ring = ring;
 			cbs->mp = mp;
+			cbs->filter = filter;
 			cbs->cb = rte_eth_add_tx_callback(port, qid, pdump_tx,
 								cbs);
 			if (cbs->cb == NULL) {
@@ -241,6 +250,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p)
 	uint16_t operation;
 	struct rte_ring *ring;
 	struct rte_mempool *mp;
+	const void *filter = NULL;
 
 	flags = p->flags;
 	operation = p->op;
@@ -256,6 +266,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p)
 		queue = p->data.en_v1.queue;
 		ring = p->data.en_v1.ring;
 		mp = p->data.en_v1.mp;
+		filter = p->data.en_v1.filter;
 	} else {
 		ret = rte_eth_dev_get_port_by_name(p->data.dis_v1.device,
 				&port);
@@ -299,7 +310,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p)
 	if (flags & RTE_PDUMP_FLAG_RX) {
 		end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_rx_q : queue + 1;
 		ret = pdump_register_rx_callbacks(end_q, port, queue, ring, mp,
-							operation);
+						  operation, filter);
 		if (ret < 0)
 			return ret;
 	}
@@ -308,7 +319,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p)
 	if (flags & RTE_PDUMP_FLAG_TX) {
 		end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_tx_q : queue + 1;
 		ret = pdump_register_tx_callbacks(end_q, port, queue, ring, mp,
-							operation);
+						  operation, filter);
 		if (ret < 0)
 			return ret;
 	}
@@ -424,12 +435,41 @@ pdump_validate_port(uint16_t port, char *name)
 }
 
 static int
-pdump_prepare_client_request(char *device, uint16_t queue,
-				uint32_t flags,
-				uint16_t operation,
-				struct rte_ring *ring,
-				struct rte_mempool *mp,
-				void *filter)
+pdump_validate_filter(const void *filter, unsigned int len)
+{
+	size_t alloc_len;
+
+	if (filter == NULL)
+		return 0;
+
+	/* must be in malloc memory to be accesible in primary */
+	if (rte_malloc_validate(filter, &alloc_len) != 0) {
+		PDUMP_LOG(ERR, "filter is not in rte_malloc memory\n");
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	if (len * BPF_INS_SIZE > alloc_len) {
+		PDUMP_LOG(ERR, "filter length error\n");
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	if (!rte_pcap_validate_filter(filter, len)) {
+		PDUMP_LOG(ERR, "filter is not valid BPF code\n");
+		rte_errno = EINVAL;
+		return -1;
+	}
+	return 0;
+}
+
+static int
+pdump_prepare_client_request(const char *device, uint16_t queue,
+			     uint32_t flags,
+			     uint16_t operation,
+			     struct rte_ring *ring,
+			     struct rte_mempool *mp,
+			     const void *filter)
 {
 	int ret = -1;
 	struct rte_mp_msg mp_req, *mp_rep;
@@ -476,14 +516,13 @@ pdump_prepare_client_request(char *device, uint16_t queue,
 }
 
 int
-rte_pdump_enable(uint16_t port, uint16_t queue, uint32_t flags,
-			struct rte_ring *ring,
-			struct rte_mempool *mp,
-			void *filter)
+rte_pdump_enable_v1911(uint16_t port, uint16_t queue, uint32_t flags,
+		       struct rte_ring *ring, struct rte_mempool *mp,
+		       const void *filter, uint32_t filter_len)
 {
 
-	int ret = 0;
 	char name[DEVICE_ID_SIZE];
+	int ret;
 
 	ret = pdump_validate_port(port, name);
 	if (ret < 0)
@@ -492,36 +531,86 @@ rte_pdump_enable(uint16_t port, uint16_t queue, uint32_t flags,
 	if (ret < 0)
 		return ret;
 	ret = pdump_validate_flags(flags);
+	if (ret < 0)
+		return ret;
+	ret = pdump_validate_filter(filter, filter_len);
 	if (ret < 0)
 		return ret;
 
 	ret = pdump_prepare_client_request(name, queue, flags,
-						ENABLE, ring, mp, filter);
+					   ENABLE, ring, mp, filter);
 
 	return ret;
 }
+BIND_DEFAULT_SYMBOL(rte_pdump_enable, _v1911, 19.11);
+MAP_STATIC_SYMBOL(int rte_pdump_enable(uint16_t port, uint16_t queue,
+				       uint32_t flags, struct rte_ring *ring,
+				       struct rte_mempool *mp,
+				       const void *filter, uint32_t len),
+		  rte_pdump_enable_v1911);
 
 int
-rte_pdump_enable_by_deviceid(char *device_id, uint16_t queue,
-				uint32_t flags,
-				struct rte_ring *ring,
-				struct rte_mempool *mp,
-				void *filter)
+rte_pdump_enable_v1607(uint16_t port, uint16_t queue, uint32_t flags,
+		       struct rte_ring *ring,
+		       struct rte_mempool *mp,
+		       void *filter)
 {
-	int ret = 0;
+	if (filter != NULL)
+		PDUMP_LOG(WARNING, "filter not supported in this version\n");
+
+	return rte_pdump_enable_v1911(port, queue, flags, ring, mp,
+				      NULL, 0);
+}
+VERSION_SYMBOL(rte_pdump_enable, _v1607, 16.07);
+
+int
+rte_pdump_enable_by_deviceid_v1911(const char *device_id, uint16_t queue,
+				   uint32_t flags,
+				   struct rte_ring *ring,
+				   struct rte_mempool *mp,
+				   const void *filter, uint32_t filter_len)
+{
+	int ret;
 
 	ret = pdump_validate_ring_mp(ring, mp);
 	if (ret < 0)
 		return ret;
 	ret = pdump_validate_flags(flags);
+	if (ret < 0)
+		return ret;
+	ret = pdump_validate_filter(filter, filter_len);
 	if (ret < 0)
 		return ret;
 
 	ret = pdump_prepare_client_request(device_id, queue, flags,
-						ENABLE, ring, mp, filter);
+					   ENABLE, ring, mp, filter);
 
 	return ret;
 }
+BIND_DEFAULT_SYMBOL(rte_pdump_enable_by_deviceid, _v1911, 19.11);
+MAP_STATIC_SYMBOL(int rte_pdump_enable_by_deviceid(const char *device_id,
+						   uint16_t queue,
+						   uint32_t flags,
+						   struct rte_ring *ring,
+						   struct rte_mempool *mp,
+						   const void *filter,
+						   uint32_t len),
+		  rte_pdump_enable_by_deviceid_v1911);
+
+int
+rte_pdump_enable_by_deviceid_v1607(char *device_id, uint16_t queue,
+				   uint32_t flags,
+				   struct rte_ring *ring,
+				   struct rte_mempool *mp,
+				   void *filter)
+{
+	if (filter != NULL)
+		PDUMP_LOG(WARNING, "filter not supported in this version\n");
+
+	return rte_pdump_enable_by_deviceid_v1911(device_id, queue, flags,
+						  ring, mp, NULL, 0);
+}
+VERSION_SYMBOL(rte_pdump_enable_by_deviceid, _v1607, 16.07);
 
 int
 rte_pdump_disable(uint16_t port, uint16_t queue, uint32_t flags)
diff --git a/lib/librte_pdump/rte_pdump.h b/lib/librte_pdump/rte_pdump.h
index 6b00fc17aeb2..12cb46f8b0e9 100644
--- a/lib/librte_pdump/rte_pdump.h
+++ b/lib/librte_pdump/rte_pdump.h
@@ -68,17 +68,25 @@ rte_pdump_uninit(void);
  * @param mp
  *  mempool on to which original packets will be mirrored or duplicated.
  * @param filter
- *  place holder for packet filtering.
+ *  filter to apply to incoming packet (classic BPF)
+ * @param len
+ *  length of filter (in BPF instructions)
  *
  * @return
  *    0 on success, -1 on error, rte_errno is set accordingly.
  */
-
 int
 rte_pdump_enable(uint16_t port, uint16_t queue, uint32_t flags,
-		struct rte_ring *ring,
-		struct rte_mempool *mp,
-		void *filter);
+		 struct rte_ring *ring, struct rte_mempool *mp,
+		 const void *filter, uint32_t len);
+int
+rte_pdump_enable_v1607(uint16_t port, uint16_t queue, uint32_t flags,
+		       struct rte_ring *ring, struct rte_mempool *mp,
+		       void *filter);
+int
+rte_pdump_enable_v1911(uint16_t port, uint16_t queue, uint32_t flags,
+		       struct rte_ring *ring, struct rte_mempool *mp,
+		       const void *filter, uint32_t len);
 
 /**
  * Disables packet capturing on given port and queue.
@@ -118,18 +126,29 @@ rte_pdump_disable(uint16_t port, uint16_t queue, uint32_t flags);
  * @param mp
  *  mempool on to which original packets will be mirrored or duplicated.
  * @param filter
- *  place holder for packet filtering.
+ *  filter to apply to incoming packet (classic BPF)
+ * @param len
+ *  length of filter (in BPF instructions)
  *
  * @return
  *    0 on success, -1 on error, rte_errno is set accordingly.
  */
-
 int
-rte_pdump_enable_by_deviceid(char *device_id, uint16_t queue,
-				uint32_t flags,
-				struct rte_ring *ring,
-				struct rte_mempool *mp,
-				void *filter);
+rte_pdump_enable_by_deviceid(const char *device_id, uint16_t queue,
+			     uint32_t flags,
+			     struct rte_ring *ring,
+			     struct rte_mempool *mp,
+			     const void *filter, uint32_t len);
+int
+rte_pdump_enable_by_deviceid_v1607(char *device_id, uint16_t queue,
+				   uint32_t flags, struct rte_ring *ring,
+				   struct rte_mempool *mp,
+				   void *filter);
+int
+rte_pdump_enable_by_deviceid_v1911(const char *device_id, uint16_t queue,
+				   uint32_t flags, struct rte_ring *ring,
+				   struct rte_mempool *mp,
+				   const void *filter, uint32_t len);
 
 /**
  * Disables packet capturing on given device_id and queue.
@@ -151,7 +170,16 @@ rte_pdump_enable_by_deviceid(char *device_id, uint16_t queue,
  */
 int
 rte_pdump_disable_by_deviceid(char *device_id, uint16_t queue,
-				uint32_t flags);
+			      uint32_t flags);
+
+
+/* internal */
+int
+rte_pcap_filter(const void *filter, const struct rte_mbuf *m);
+
+/* internal */
+int
+rte_pcap_validate_filter(const void *filter, uint32_t len);
 
 #ifdef __cplusplus
 }
diff --git a/lib/librte_pdump/rte_pdump_version.map b/lib/librte_pdump/rte_pdump_version.map
index 3e744f30123c..e78ba5a8350a 100644
--- a/lib/librte_pdump/rte_pdump_version.map
+++ b/lib/librte_pdump/rte_pdump_version.map
@@ -10,3 +10,10 @@ DPDK_16.07 {
 
 	local: *;
 };
+
+DPDK_19.11 {
+	global:
+
+	rte_pdump_enable;
+	rte_pdump_enable_by_deviceid;
+} DPDK_16.07;
-- 
2.20.1


  parent reply	other threads:[~2019-10-07 16:53 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-07 16:52 [dpdk-dev] [RFC 0/8] Packet Capture enhancements Stephen Hemminger
2019-10-07 16:52 ` [dpdk-dev] [RFC 1/8] pdump: use new pktmbuf copy function Stephen Hemminger
2019-10-07 16:52 ` [dpdk-dev] [RFC 2/8] pdump: use dynamic logtype Stephen Hemminger
2019-10-07 16:52 ` [dpdk-dev] [RFC 3/8] pdump: tag copied mbuf with port Stephen Hemminger
2019-10-07 16:52 ` [dpdk-dev] [RFC 4/8] pdump: stamp packets with current timestamp Stephen Hemminger
2019-10-07 16:52 ` Stephen Hemminger [this message]
2019-10-07 17:07   ` [dpdk-dev] [RFC 5/8] pdump: add classic BPF filtering Jerin Jacob
2019-10-07 17:33     ` Stephen Hemminger
2019-10-07 19:33       ` Jerin Jacob
2019-10-07 21:45         ` Stephen Hemminger
2019-10-08  3:47           ` Jerin Jacob
2019-10-08  4:01             ` Stephen Hemminger
2019-10-08  4:15               ` Jerin Jacob
2019-10-08  4:22                 ` Stephen Hemminger
2019-10-08 21:08                   ` Morten Brørup
2019-10-09  8:21                     ` Ananyev, Konstantin
2019-10-09 14:59                       ` Stephen Hemminger
2019-10-07 16:52 ` [dpdk-dev] [RFC 6/8] pdump: add packet header truncation Stephen Hemminger
2019-10-07 16:52 ` [dpdk-dev] [RFC 7/8] pcapng: add new library for writing pcapng files Stephen Hemminger
2019-10-07 16:52 ` [dpdk-dev] [RFC 8/8] app/capture: add packet capture using pcapng Stephen Hemminger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191007165232.14535-6-stephen@networkplumber.org \
    --to=stephen@networkplumber.org \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).