From: Tudor Cornea <tudor.cornea@gmail.com>
To: stephen@networkplumber.org
Cc: linville@tuxdriver.com, ferruh.yigit@amd.com,
andrew.rybchenko@oktetlabs.ru, dev@dpdk.org,
Tudor Cornea <tudor.cornea@gmail.com>
Subject: [PATCH v2] net/af_packet: allow changing fanout mode
Date: Mon, 6 Jan 2025 17:35:08 +0200 [thread overview]
Message-ID: <20250106153508.11262-1-tudor.cornea@gmail.com> (raw)
In-Reply-To: <20241212080442.1628366-1-tudor.cornea@gmail.com>
This allows us to control the algorithm used to spread traffic between
sockets, adding more fine grained control. If the user does not
specify a fanout mode, the PMD driver will default to
PACKET_FANOUT_HASH.
Signed-off-by: Tudor Cornea <tudor.cornea@gmail.com>
---
v2:
* Renamed the patch
* Replaced packet_fanout argument with fanout_mode, which allows
more fine grained control
---
doc/guides/nics/af_packet.rst | 4 +-
drivers/net/af_packet/rte_eth_af_packet.c | 92 ++++++++++++++++++++---
2 files changed, 83 insertions(+), 13 deletions(-)
diff --git a/doc/guides/nics/af_packet.rst b/doc/guides/nics/af_packet.rst
index a343d3a961..3443f95004 100644
--- a/doc/guides/nics/af_packet.rst
+++ b/doc/guides/nics/af_packet.rst
@@ -25,6 +25,8 @@ Some of these, in turn, will be used to configure the PACKET_MMAP settings.
* ``qpairs`` - number of Rx and Tx queues (optional, default 1);
* ``qdisc_bypass`` - set PACKET_QDISC_BYPASS option in AF_PACKET (optional,
disabled by default);
+* ``fanout_mode`` - set fanout algorithm. Possible choices: hash,lb,cpu,rollover,rnd,qm (optional,
+ default hash);
* ``blocksz`` - PACKET_MMAP block size (optional, default 4096);
* ``framesz`` - PACKET_MMAP frame size (optional, default 2048B; Note: multiple
of 16B);
@@ -64,7 +66,7 @@ framecnt=512):
.. code-block:: console
- --vdev=eth_af_packet0,iface=tap0,blocksz=4096,framesz=2048,framecnt=512,qpairs=1,qdisc_bypass=0
+ --vdev=eth_af_packet0,iface=tap0,blocksz=4096,framesz=2048,framecnt=512,qpairs=1,qdisc_bypass=0,fanout_mode=hash
Features and Limitations
------------------------
diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
index ceb8d9356a..8449975384 100644
--- a/drivers/net/af_packet/rte_eth_af_packet.c
+++ b/drivers/net/af_packet/rte_eth_af_packet.c
@@ -36,6 +36,7 @@
#define ETH_AF_PACKET_FRAMESIZE_ARG "framesz"
#define ETH_AF_PACKET_FRAMECOUNT_ARG "framecnt"
#define ETH_AF_PACKET_QDISC_BYPASS_ARG "qdisc_bypass"
+#define ETH_AF_PACKET_FANOUT_MODE_ARG "fanout_mode"
#define DFLT_FRAME_SIZE (1 << 11)
#define DFLT_FRAME_COUNT (1 << 9)
@@ -96,6 +97,7 @@ static const char *valid_arguments[] = {
ETH_AF_PACKET_FRAMESIZE_ARG,
ETH_AF_PACKET_FRAMECOUNT_ARG,
ETH_AF_PACKET_QDISC_BYPASS_ARG,
+ ETH_AF_PACKET_FANOUT_MODE_ARG,
NULL
};
@@ -700,6 +702,61 @@ open_packet_iface(const char *key __rte_unused,
return 0;
}
+#if defined(PACKET_FANOUT)
+#define PACKET_FANOUT_INVALID -1
+
+static int
+get_fanout_group_id(int if_index)
+{
+ return (getpid() ^ if_index) & 0xffff;
+}
+
+static int
+get_fanout_mode(const char *fanout_mode)
+{
+ int mode = PACKET_FANOUT_FLAG_DEFRAG;
+
+#if defined(PACKET_FANOUT_FLAG_ROLLOVER)
+ mode |= PACKET_FANOUT_FLAG_ROLLOVER;
+#endif
+
+ if (!fanout_mode) {
+ /* Default */
+ mode |= PACKET_FANOUT_HASH;
+ } else if (!strcmp(fanout_mode, "hash")) {
+ mode |= PACKET_FANOUT_HASH;
+ } else if (!strcmp(fanout_mode, "lb")) {
+ mode |= PACKET_FANOUT_LB;
+ } else if (!strcmp(fanout_mode, "cpu")) {
+ mode |= PACKET_FANOUT_CPU;
+ } else if (!strcmp(fanout_mode, "rollover")) {
+ mode |= PACKET_FANOUT_ROLLOVER;
+ } else if (!strcmp(fanout_mode, "rnd")) {
+ mode |= PACKET_FANOUT_RND;
+ } else if (!strcmp(fanout_mode, "qm")) {
+ mode |= PACKET_FANOUT_QM;
+ } else {
+ /* Invalid Fanout Mode */
+ mode = PACKET_FANOUT_INVALID;
+ }
+
+ return mode;
+}
+
+static int
+get_fanout(const char *fanout_mode, int if_index)
+{
+ int group_id = get_fanout_group_id(if_index);
+ int mode = get_fanout_mode(fanout_mode);
+ int fanout = PACKET_FANOUT_INVALID;
+
+ if (mode != PACKET_FANOUT_INVALID)
+ fanout = group_id | (mode << 16);
+
+ return fanout;
+}
+#endif
+
static int
rte_pmd_init_internals(struct rte_vdev_device *dev,
const int sockfd,
@@ -709,6 +766,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
unsigned int framesize,
unsigned int framecnt,
unsigned int qdisc_bypass,
+ const char *fanout_mode,
struct pmd_internals **internals,
struct rte_eth_dev **eth_dev,
struct rte_kvargs *kvlist)
@@ -810,11 +868,12 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
sockaddr.sll_ifindex = (*internals)->if_index;
#if defined(PACKET_FANOUT)
- fanout_arg = (getpid() ^ (*internals)->if_index) & 0xffff;
- fanout_arg |= (PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG) << 16;
-#if defined(PACKET_FANOUT_FLAG_ROLLOVER)
- fanout_arg |= PACKET_FANOUT_FLAG_ROLLOVER << 16;
-#endif
+ fanout_arg = get_fanout(fanout_mode, (*internals)->if_index);
+
+ if (fanout_arg == PACKET_FANOUT_INVALID) {
+ PMD_LOG(ERR, "Invalid fanout mode: %s", fanout_mode);
+ goto error;
+ }
#endif
for (q = 0; q < nb_queues; q++) {
@@ -927,13 +986,16 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
}
#if defined(PACKET_FANOUT)
- rc = setsockopt(qsockfd, SOL_PACKET, PACKET_FANOUT,
- &fanout_arg, sizeof(fanout_arg));
- if (rc == -1) {
- PMD_LOG_ERRNO(ERR,
- "%s: could not set PACKET_FANOUT on AF_PACKET socket for %s",
- name, pair->value);
- goto error;
+ if (nb_queues > 1) {
+ rc = setsockopt(qsockfd, SOL_PACKET, PACKET_FANOUT,
+ &fanout_arg, sizeof(fanout_arg));
+ if (rc == -1) {
+ PMD_LOG_ERRNO(ERR,
+ "%s: could not set PACKET_FANOUT "
+ "on AF_PACKET socket for %s",
+ name, pair->value);
+ goto error;
+ }
}
#endif
}
@@ -1003,6 +1065,7 @@ rte_eth_from_packet(struct rte_vdev_device *dev,
unsigned int framecount = DFLT_FRAME_COUNT;
unsigned int qpairs = 1;
unsigned int qdisc_bypass = 1;
+ const char *fanout_mode = NULL;
/* do some parameter checking */
if (*sockfd < 0)
@@ -1065,6 +1128,10 @@ rte_eth_from_packet(struct rte_vdev_device *dev,
}
continue;
}
+ if (strstr(pair->key, ETH_AF_PACKET_FANOUT_MODE_ARG) != NULL) {
+ fanout_mode = pair->value;
+ continue;
+ }
}
if (framesize > blocksize) {
@@ -1091,6 +1158,7 @@ rte_eth_from_packet(struct rte_vdev_device *dev,
blocksize, blockcount,
framesize, framecount,
qdisc_bypass,
+ fanout_mode,
&internals, ð_dev,
kvlist) < 0)
return -1;
--
2.34.1
prev parent reply other threads:[~2025-01-06 15:35 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-12 8:04 [PATCH] net/af_packet: allow disabling packet fanout Tudor Cornea
2024-12-12 17:11 ` Stephen Hemminger
2024-12-16 9:58 ` Tudor Cornea
2025-01-06 15:29 ` Tudor Cornea
2025-01-06 15:35 ` Tudor Cornea [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250106153508.11262-1-tudor.cornea@gmail.com \
--to=tudor.cornea@gmail.com \
--cc=andrew.rybchenko@oktetlabs.ru \
--cc=dev@dpdk.org \
--cc=ferruh.yigit@amd.com \
--cc=linville@tuxdriver.com \
--cc=stephen@networkplumber.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).