From: Stephen Hemminger <stephen@networkplumber.org>
To: Tudor Cornea <tudor.cornea@gmail.com>
Cc: linville@tuxdriver.com, ferruh.yigit@amd.com,
andrew.rybchenko@oktetlabs.ru, dev@dpdk.org
Subject: Re: [PATCH v2] net/af_packet: allow changing fanout mode
Date: Tue, 14 Jan 2025 10:38:53 -0800 [thread overview]
Message-ID: <20250114103853.330cd3b6@hermes.local> (raw)
In-Reply-To: <20250106153508.11262-1-tudor.cornea@gmail.com>
On Mon, 6 Jan 2025 17:35:08 +0200
Tudor Cornea <tudor.cornea@gmail.com> wrote:
> This allows us to control the algorithm used to spread traffic between
> sockets, adding more fine grained control. If the user does not
> specify a fanout mode, the PMD driver will default to
> PACKET_FANOUT_HASH.
>
> Signed-off-by: Tudor Cornea <tudor.cornea@gmail.com>
>
> ---
> v2:
> * Renamed the patch
> * Replaced packet_fanout argument with fanout_mode, which allows
> more fine grained control
> ---
> doc/guides/nics/af_packet.rst | 4 +-
> drivers/net/af_packet/rte_eth_af_packet.c | 92 ++++++++++++++++++++---
> 2 files changed, 83 insertions(+), 13 deletions(-)
>
> diff --git a/doc/guides/nics/af_packet.rst b/doc/guides/nics/af_packet.rst
> index a343d3a961..3443f95004 100644
> --- a/doc/guides/nics/af_packet.rst
> +++ b/doc/guides/nics/af_packet.rst
> @@ -25,6 +25,8 @@ Some of these, in turn, will be used to configure the PACKET_MMAP settings.
> * ``qpairs`` - number of Rx and Tx queues (optional, default 1);
> * ``qdisc_bypass`` - set PACKET_QDISC_BYPASS option in AF_PACKET (optional,
> disabled by default);
> +* ``fanout_mode`` - set fanout algorithm. Possible choices: hash,lb,cpu,rollover,rnd,qm (optional,
> + default hash);
Use proper punctuation here, need space after comma.
A description of what the differences are or reference to af-packet man page would help.
https://www.man7.org/linux/man-pages/man7/packet.7.html
The user should not have to huting to find what "qm" means for example.
Should also address the earlier part in this document that is:
The PACKET_FANOUT_HASH behavior of AF_PACKET is used for frame reception.
> * ``blocksz`` - PACKET_MMAP block size (optional, default 4096);
> * ``framesz`` - PACKET_MMAP frame size (optional, default 2048B; Note: multiple
> of 16B);
> @@ -64,7 +66,7 @@ framecnt=512):
>
> .. code-block:: console
>
> - --vdev=eth_af_packet0,iface=tap0,blocksz=4096,framesz=2048,framecnt=512,qpairs=1,qdisc_bypass=0
> + --vdev=eth_af_packet0,iface=tap0,blocksz=4096,framesz=2048,framecnt=512,qpairs=1,qdisc_bypass=0,fanout_mode=hash
>
> Features and Limitations
> ------------------------
> diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c
> index ceb8d9356a..8449975384 100644
> --- a/drivers/net/af_packet/rte_eth_af_packet.c
> +++ b/drivers/net/af_packet/rte_eth_af_packet.c
> @@ -36,6 +36,7 @@
> #define ETH_AF_PACKET_FRAMESIZE_ARG "framesz"
> #define ETH_AF_PACKET_FRAMECOUNT_ARG "framecnt"
> #define ETH_AF_PACKET_QDISC_BYPASS_ARG "qdisc_bypass"
> +#define ETH_AF_PACKET_FANOUT_MODE_ARG "fanout_mode"
>
> #define DFLT_FRAME_SIZE (1 << 11)
> #define DFLT_FRAME_COUNT (1 << 9)
> @@ -96,6 +97,7 @@ static const char *valid_arguments[] = {
> ETH_AF_PACKET_FRAMESIZE_ARG,
> ETH_AF_PACKET_FRAMECOUNT_ARG,
> ETH_AF_PACKET_QDISC_BYPASS_ARG,
> + ETH_AF_PACKET_FANOUT_MODE_ARG,
> NULL
> };
>
> @@ -700,6 +702,61 @@ open_packet_iface(const char *key __rte_unused,
> return 0;
> }
>
> +#if defined(PACKET_FANOUT)
Since PACKET_FANOUT has existed since Linux 3.1, the #ifdef
for this can be removed now.
> +#define PACKET_FANOUT_INVALID -1
> +
> +static int
> +get_fanout_group_id(int if_index)
> +{
> + return (getpid() ^ if_index) & 0xffff;
> +}
> +
> +static int
> +get_fanout_mode(const char *fanout_mode)
> +{
> + int mode = PACKET_FANOUT_FLAG_DEFRAG;
> +
> +#if defined(PACKET_FANOUT_FLAG_ROLLOVER)
No more #ifdefs please
> + mode |= PACKET_FANOUT_FLAG_ROLLOVER;
> +#endif
> +
> + if (!fanout_mode) {
> + /* Default */
> + mode |= PACKET_FANOUT_HASH;
> + } else if (!strcmp(fanout_mode, "hash")) {
> + mode |= PACKET_FANOUT_HASH;
> + } else if (!strcmp(fanout_mode, "lb")) {
> + mode |= PACKET_FANOUT_LB;
> + } else if (!strcmp(fanout_mode, "cpu")) {
> + mode |= PACKET_FANOUT_CPU;
> + } else if (!strcmp(fanout_mode, "rollover")) {
> + mode |= PACKET_FANOUT_ROLLOVER;
> + } else if (!strcmp(fanout_mode, "rnd")) {
> + mode |= PACKET_FANOUT_RND;
> + } else if (!strcmp(fanout_mode, "qm")) {
> + mode |= PACKET_FANOUT_QM;
> + } else {
> + /* Invalid Fanout Mode */
> + mode = PACKET_FANOUT_INVALID;
> + }
Maybe allow longer names like "load_balance" or "queue_map"
> +
> + return mode;
> +}
> +
> +static int
> +get_fanout(const char *fanout_mode, int if_index)
> +{
> + int group_id = get_fanout_group_id(if_index);
> + int mode = get_fanout_mode(fanout_mode);
> + int fanout = PACKET_FANOUT_INVALID;
> +
> + if (mode != PACKET_FANOUT_INVALID)
> + fanout = group_id | (mode << 16);
> +
> + return fanout;
> +}
This could be simplified as:
static int
get_fanout(const char *fanout_mode, int if_index)
{
int mode = get_fanout_mode(fanout_mode);
if (mode != PACKET_FANOUT_INVALID) {
return get_fanout_group_id(if_index) | (mode << 16);
else
return PACKOUT_FANOUT_INVALID;
}
> +#endif
> +
> static int
> rte_pmd_init_internals(struct rte_vdev_device *dev,
> const int sockfd,
> @@ -709,6 +766,7 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
> unsigned int framesize,
> unsigned int framecnt,
> unsigned int qdisc_bypass,
> + const char *fanout_mode,
> struct pmd_internals **internals,
> struct rte_eth_dev **eth_dev,
> struct rte_kvargs *kvlist)
> @@ -810,11 +868,12 @@ rte_pmd_init_internals(struct rte_vdev_device *dev,
> sockaddr.sll_ifindex = (*internals)->if_index;
>
> #if defined(PACKET_FANOUT)
> - fanout_arg = (getpid() ^ (*internals)->if_index) & 0xffff;
> - fanout_arg |= (PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG) << 16;
> -#if defined(PACKET_FANOUT_FLAG_ROLLOVER)
> - fanout_arg |= PACKET_FANOUT_FLAG_ROLLOVER << 16;
> -#endif
> + fanout_arg = get_fanout(fanout_mode, (*internals)->if_index);
> +
> + if (fanout_arg == PACKET_FANOUT_INVALID) {
Looks better without blank line between get_fanout() and the if() but
that is totally a matter of personal taste.
> + PMD_LOG(ERR, "Invalid fanout mode: %s", fanout_mode);
> + goto error;
> + }
prev parent reply other threads:[~2025-01-14 18:38 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-12 8:04 [PATCH] net/af_packet: allow disabling packet fanout Tudor Cornea
2024-12-12 17:11 ` Stephen Hemminger
2024-12-16 9:58 ` Tudor Cornea
2025-01-06 15:29 ` Tudor Cornea
2025-01-06 15:35 ` [PATCH v2] net/af_packet: allow changing fanout mode Tudor Cornea
2025-01-14 18:38 ` Stephen Hemminger [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250114103853.330cd3b6@hermes.local \
--to=stephen@networkplumber.org \
--cc=andrew.rybchenko@oktetlabs.ru \
--cc=dev@dpdk.org \
--cc=ferruh.yigit@amd.com \
--cc=linville@tuxdriver.com \
--cc=tudor.cornea@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).