DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH v02] net/af_packet: add rollover and defrag options
@ 2024-10-31 13:50 Gur Stavi
  0 siblings, 0 replies; only message in thread
From: Gur Stavi @ 2024-10-31 13:50 UTC (permalink / raw)
  To: Gur Stavi; +Cc: dev, John W. Linville, Stephen Hemminger, Ferruh Yigit

net_af_packet PMD multi "queue" support relies on Linux FANOUT capability.
Linux FANOUT is a SW based load balancer that is similar to HW RSS which
is more common for DPDK PMDs. Instead of multiple HW descriptor queues,
AF PACKET uses multiple sockets.
HW RSS will typically drop a packet if its selected RX queue is empty.
However, Linux FANOUT, as a SW load balancer, can be configured to avoid
this packet drop by rolling over to the next socket.
This rollover functionality was ALWAYS enabled in net_af_packet. It is
surrounded by ifdef, but only to allow compilation on ancient Linux
versions that did not have it.

Since DPDK applications are usually designed for HW based PMDs, this
rollover functionality, which the developers are likely unaware of, could
be confusing.

Another option that is part of Linux FANOUT is DEFRAG that instructs Linux
to compose complete IP packet out of fragments before delivering it to the
PACKET socket. Again, this behavior typically does not exist for HW based
PMDs and may confuse users.

This patch adds 2 options to control these features:
rollover=[0|1],defrag=[0|1]
For backward compatibility both features are enabled by default even
though most users will probably want both of them disabled.

Signed-off-by: Gur Stavi <gur.stavi@huawei.com>
---
v2:
* Improve error handling for parsing of arg strings. Add a wrapper around
  strtoul to replace atoi.
* Fix checkpatch indentation error due to copy-paste.

v1: https://mails.dpdk.org/archives/dev/2024-October/307451.html
---
 doc/guides/nics/af_packet.rst             |  4 ++
 drivers/net/af_packet/rte_eth_af_packet.c | 77 ++++++++++++++++++-----
 2 files changed, 65 insertions(+), 16 deletions(-)

diff --git a/doc/guides/nics/af_packet.rst b/doc/guides/nics/af_packet.rst
index 66b977e1a2..4bc748b50b 100644
--- a/doc/guides/nics/af_packet.rst
+++ b/doc/guides/nics/af_packet.rst
@@ -29,6 +29,8 @@ Some of these, in turn, will be used to configure the PACKET_MMAP settings.
 *   ``framesz`` - PACKET_MMAP frame size (optional, default 2048B; Note: multiple
     of 16B);
 *   ``framecnt`` - PACKET_MMAP frame count (optional, default 512).
+*   ``rollover`` - disable(0)/enable(1) PACKET_FANOUT_ROLLOVER (optional, default 1).
+*   ``defrag`` - disable(0)/enable(1) PACKET_FANOUT_FLAG_DEFRAG (optional, default 1).

 Because this implementation is based on PACKET_MMAP, and PACKET_MMAP has its
 own pre-requisites, it should be noted that the inner workings of PACKET_MMAP
@@ -47,6 +49,8 @@ For the full details behind PACKET_MMAP's structures and settings, consider
 reading the `PACKET_MMAP documentation in the Kernel
 <https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt>`_.

+For details of ``rollover`` anb ``defrag`` refer to the *packet(7)* man page.
+
 Prerequisites
 -------------

diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index 68870dd3e2..1a043e8398 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -36,6 +36,8 @@
 #define ETH_AF_PACKET_FRAMESIZE_ARG	"framesz"
 #define ETH_AF_PACKET_FRAMECOUNT_ARG	"framecnt"
 #define ETH_AF_PACKET_QDISC_BYPASS_ARG	"qdisc_bypass"
+#define ETH_AF_PACKET_ROLLOVER		"rollover"
+#define ETH_AF_PACKET_DEFRAG		"defrag"

 #define DFLT_FRAME_SIZE		(1 << 11)
 #define DFLT_FRAME_COUNT	(1 << 9)
@@ -91,6 +93,8 @@ static const char *valid_arguments[] = {
 	ETH_AF_PACKET_FRAMESIZE_ARG,
 	ETH_AF_PACKET_FRAMECOUNT_ARG,
 	ETH_AF_PACKET_QDISC_BYPASS_ARG,
+	ETH_AF_PACKET_ROLLOVER,
+	ETH_AF_PACKET_DEFRAG,
 	NULL
 };

@@ -659,6 +663,20 @@ open_packet_iface(const char *key __rte_unused,
 	return 0;
 }

+static int
+uint_arg_parse(const char *arg_str, unsigned int *arg_val)
+{
+	unsigned long val;
+	char *endptr;
+
+	val = strtoul(arg_str, &endptr, 10);
+	if (*arg_str == '\0' || *endptr != '\0')
+		return -EINVAL;
+
+	*arg_val = val;
+	return 0;
+}
+
 static int
 rte_pmd_init_internals(struct rte_vdev_device *dev,
                        const int sockfd,
@@ -676,6 +694,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
 	const unsigned int numa_node = dev->device.numa_node;
 	struct rte_eth_dev_data *data = NULL;
 	struct rte_kvargs_pair *pair = NULL;
+	const char *ifname = NULL;
 	struct ifreq ifr;
 	size_t ifnamelen;
 	unsigned k_idx;
@@ -686,6 +705,8 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
 	int rc, tpver, discard;
 	int qsockfd = -1;
 	unsigned int i, q, rdsize;
+	unsigned int rollover = 1;
+	unsigned int defrag = 1;
 #if defined(PACKET_FANOUT)
 	int fanout_arg;
 #endif
@@ -693,9 +714,30 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
 	for (k_idx = 0; k_idx < kvlist->count; k_idx++) {
 		pair = &kvlist->pairs[k_idx];
 		if (strstr(pair->key, ETH_AF_PACKET_IFACE_ARG) != NULL)
-			break;
+			ifname = pair->value;
+		if (strcmp(pair->key, ETH_AF_PACKET_ROLLOVER) == 0) {
+			rc = uint_arg_parse(pair->value, &rollover);
+			if (rc || rollover > 1) {
+				PMD_LOG(ERR,
+					"%s: invalid rollover value: %s",
+					name, pair->value);
+				return -1;
+			}
+			continue;
+		}
+		if (strcmp(pair->key, ETH_AF_PACKET_DEFRAG) == 0) {
+			rc = uint_arg_parse(pair->value, &defrag);
+			if (rc || defrag > 1) {
+				PMD_LOG(ERR,
+					"%s: invalid defrag value: %s",
+					name, pair->value);
+				return -1;
+			}
+			continue;
+		}
 	}
-	if (pair == NULL) {
+
+	if (ifname == NULL) {
 		PMD_LOG(ERR,
 			"%s: no interface specified for AF_PACKET ethdev",
 		        name);
@@ -738,21 +780,21 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
 	req->tp_frame_size = framesize;
 	req->tp_frame_nr = framecnt;

-	ifnamelen = strlen(pair->value);
+	ifnamelen = strlen(ifname);
 	if (ifnamelen < sizeof(ifr.ifr_name)) {
-		memcpy(ifr.ifr_name, pair->value, ifnamelen);
+		memcpy(ifr.ifr_name, ifname, ifnamelen);
 		ifr.ifr_name[ifnamelen] = '\0';
 	} else {
 		PMD_LOG(ERR,
 			"%s: I/F name too long (%s)",
-			name, pair->value);
+			name, ifname);
 		goto free_internals;
 	}
 	if (ioctl(sockfd, SIOCGIFINDEX, &ifr) == -1) {
 		PMD_LOG_ERRNO(ERR, "%s: ioctl failed (SIOCGIFINDEX)", name);
 		goto free_internals;
 	}
-	(*internals)->if_name = strdup(pair->value);
+	(*internals)->if_name = strdup(ifname);
 	if ((*internals)->if_name == NULL)
 		goto free_internals;
 	(*internals)->if_index = ifr.ifr_ifindex;
@@ -770,9 +812,12 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,

 #if defined(PACKET_FANOUT)
 	fanout_arg = (getpid() ^ (*internals)->if_index) & 0xffff;
-	fanout_arg |= (PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG) << 16;
+	fanout_arg |= PACKET_FANOUT_HASH << 16;
+	if (defrag)
+		fanout_arg |= PACKET_FANOUT_FLAG_DEFRAG << 16;
 #if defined(PACKET_FANOUT_FLAG_ROLLOVER)
-	fanout_arg |= PACKET_FANOUT_FLAG_ROLLOVER << 16;
+	if (rollover)
+		fanout_arg |= PACKET_FANOUT_FLAG_ROLLOVER << 16;
 #endif
 #endif

@@ -792,7 +837,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
 		if (rc == -1) {
 			PMD_LOG_ERRNO(ERR,
 				"%s: could not set PACKET_VERSION on AF_PACKET socket for %s",
-				name, pair->value);
+				name, ifname);
 			goto error;
 		}

@@ -802,7 +847,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
 		if (rc == -1) {
 			PMD_LOG_ERRNO(ERR,
 				"%s: could not set PACKET_LOSS on AF_PACKET socket for %s",
-				name, pair->value);
+				name, ifname);
 			goto error;
 		}

@@ -813,7 +858,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
 			if (rc == -1) {
 				PMD_LOG_ERRNO(ERR,
 					"%s: could not set PACKET_QDISC_BYPASS on AF_PACKET socket for %s",
-					name, pair->value);
+					name, ifname);
 				goto error;
 			}
 #endif
@@ -823,7 +868,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
 		if (rc == -1) {
 			PMD_LOG_ERRNO(ERR,
 				"%s: could not set PACKET_RX_RING on AF_PACKET socket for %s",
-				name, pair->value);
+				name, ifname);
 			goto error;
 		}

@@ -831,7 +876,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
 		if (rc == -1) {
 			PMD_LOG_ERRNO(ERR,
 				"%s: could not set PACKET_TX_RING on AF_PACKET "
-				"socket for %s", name, pair->value);
+				"socket for %s", name, ifname);
 			goto error;
 		}

@@ -844,7 +889,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
 		if (rx_queue->map == MAP_FAILED) {
 			PMD_LOG_ERRNO(ERR,
 				"%s: call to mmap failed on AF_PACKET socket for %s",
-				name, pair->value);
+				name, ifname);
 			goto error;
 		}

@@ -881,7 +926,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
 		if (rc == -1) {
 			PMD_LOG_ERRNO(ERR,
 				"%s: could not bind AF_PACKET socket to %s",
-				name, pair->value);
+				name, ifname);
 			goto error;
 		}

@@ -891,7 +936,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
 		if (rc == -1) {
 			PMD_LOG_ERRNO(ERR,
 				"%s: could not set PACKET_FANOUT on AF_PACKET socket for %s",
-				name, pair->value);
+				name, ifname);
 			goto error;
 		}
 #endif

base-commit: 98613d32e3dac58d685f4f236cf8cc9733abaaf3
--
2.40.1


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2024-10-31 11:33 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-10-31 13:50 [PATCH v02] net/af_packet: add rollover and defrag options Gur Stavi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).