RE: [PATCH v4] net/null: Add fast mbuf release TX offload

DPDK patches and discussions
 help / color / mirror / Atom feed

From: "Varghese, Vipin" <Vipin.Varghese@amd.com>
To: "Morten Brørup" <mb@smartsharesystems.com>,
	"dev@dpdk.org" <dev@dpdk.org>,
	"Tetsuya Mukawa" <mtetsuyah@gmail.com>,
	"Stephen Hemminger" <stephen@networkplumber.org>,
	"P, Thiyagarajan" <Thiyagarajan.P@amd.com>,
	"Ivan Malov" <ivan.malov@arknetworks.am>,
	"Konstantin Ananyev" <konstantin.ananyev@huawei.com>
Subject: RE: [PATCH v4] net/null: Add fast mbuf release TX offload
Date: Mon, 4 Aug 2025 09:23:18 +0000	[thread overview]
Message-ID: <PH7PR12MB8596621758B80CB147B1A6338223A@PH7PR12MB8596.namprd12.prod.outlook.com> (raw)
In-Reply-To: <20250730140451.622906-1-mb@smartsharesystems.com>

[Public]

Hi Morten,

Please find our testing results below

snipped
>
>
> Added fast mbuf release, re-using the existing mbuf pool pointer in the queue
> structure.
>
> Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> ---
> v4:
> * Force the generic function called by the separate tx_pkt_burst callbacks
>   to be inlined.
> v3:
> * Use separate tx_pkt_burst callbacks depending on per-device TX offload
>   configuration. (Ivan Malov, Konstantin Ananyev)
> * Check TX offload configuration for mutually exclusive MBUF_FAST_FREE and
>   MULTI_SEGS flags. (Ivan Malov, Konstantin Ananyev)
> * Also announce MULTI_SEGS TX offload as a per-queue capability.
> v2:
> * Also announce the MBUF_FAST_FREE TX offload as a per-queue capability.
> * Added missing test of per-device MBUF_FAST_FREE TX offload configuration
>   when configuring the queue.
> ---
>  drivers/net/null/rte_eth_null.c | 143 +++++++++++++++++++++++++-------
>  1 file changed, 115 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c index
> 8a9b74a03b..4f273319f2 100644
> --- a/drivers/net/null/rte_eth_null.c
> +++ b/drivers/net/null/rte_eth_null.c
> @@ -34,6 +34,17 @@ struct pmd_internals;  struct null_queue {
>         struct pmd_internals *internals;
>
> +       /**
> +        * For RX queue:
> +        *  Mempool to allocate mbufs from.
> +        *
> +        * For TX queue:
> +        *  Mempool to free mbufs to, if fast release of mbufs is enabled.
> +        *  UINTPTR_MAX if the mempool for fast release of mbufs has not yet been
> detected.
> +        *  NULL if fast release of mbufs is not enabled.
> +        *
> +        *  @see RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE
> +        */
>         struct rte_mempool *mb_pool;
>         void *dummy_packet;
>
> @@ -141,8 +152,15 @@ eth_null_no_rx(void *q __rte_unused, struct rte_mbuf
> **bufs __rte_unused,
>         return 0;
>  }
>
> -static uint16_t
> -eth_null_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
> +enum eth_tx_free_mode {
> +       ETH_TX_FREE_MODE_NO_MBUF_FAST_FREE, /* MBUF_FAST_FREE
> not possible. */
> +       ETH_TX_FREE_MODE_MBUF_FAST_FREE,    /* MBUF_FAST_FREE
> enabled for the device. */
> +       ETH_TX_FREE_MODE_PER_QUEUE,         /* Varies per TX queue. */
> +};
> +
> +static __rte_always_inline uint16_t
> +eth_null_tx_common(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs,
> +               enum eth_tx_free_mode mode)
>  {
>         struct null_queue *h = q;
>         unsigned int i;
> @@ -151,13 +169,42 @@ eth_null_tx(void *q, struct rte_mbuf **bufs, uint16_t
> nb_bufs)
>         for (i = 0; i < nb_bufs; i++)
>                 bytes += rte_pktmbuf_pkt_len(bufs[i]);
>
> -       rte_pktmbuf_free_bulk(bufs, nb_bufs);
> +       if (mode == ETH_TX_FREE_MODE_MBUF_FAST_FREE ||
> +                       (mode == ETH_TX_FREE_MODE_PER_QUEUE && h->mb_pool
> != NULL)) {
> +               /* RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE */
> +               if (unlikely(h->mb_pool == (void *)UINTPTR_MAX)) {
> +                       if (unlikely(nb_bufs == 0))
> +                               return 0; /* Do not dereference uninitialized bufs[0]. */
> +                       h->mb_pool = bufs[0]->pool;
> +               }
> +               rte_mbuf_raw_free_bulk(h->mb_pool, bufs, nb_bufs);
> +       } else {
> +               rte_pktmbuf_free_bulk(bufs, nb_bufs);
> +       }
>         rte_atomic_fetch_add_explicit(&h->tx_pkts, nb_bufs,
> rte_memory_order_relaxed);
>         rte_atomic_fetch_add_explicit(&h->tx_bytes, bytes,
> rte_memory_order_relaxed);
>
>         return nb_bufs;
>  }
>
> +static uint16_t
> +eth_null_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs) {
> +       return eth_null_tx_common(q, bufs, nb_bufs,
> +ETH_TX_FREE_MODE_PER_QUEUE); }
> +
> +static uint16_t
> +eth_null_tx_no_mbuf_fast_free(void *q, struct rte_mbuf **bufs, uint16_t
> +nb_bufs) {
> +       return eth_null_tx_common(q, bufs, nb_bufs,
> +ETH_TX_FREE_MODE_NO_MBUF_FAST_FREE);
> +}
> +
> +static uint16_t
> +eth_null_tx_mbuf_fast_free(void *q, struct rte_mbuf **bufs, uint16_t
> +nb_bufs) {
> +       return eth_null_tx_common(q, bufs, nb_bufs,
> +ETH_TX_FREE_MODE_MBUF_FAST_FREE); }
> +
>  static uint16_t
>  eth_null_copy_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)  { @@ -178,9
> +225,48 @@ eth_null_copy_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
>         return nb_bufs;
>  }
>
> +static void
> +eth_dev_assign_rxtx_ops(struct rte_eth_dev *dev) {
> +       struct pmd_internals *internals = dev->data->dev_private;
> +
> +       if (internals->packet_copy) {
> +               dev->rx_pkt_burst = eth_null_copy_rx;
> +               dev->tx_pkt_burst = eth_null_copy_tx;
> +       } else {
> +               if (internals->no_rx)
> +                       dev->rx_pkt_burst = eth_null_no_rx;
> +               else
> +                       dev->rx_pkt_burst = eth_null_rx;
> +
> +               dev->tx_pkt_burst = eth_null_tx;
> +               if (dev->data->dev_conf.txmode.offloads &
> RTE_ETH_TX_OFFLOAD_MULTI_SEGS)
> +                       dev->tx_pkt_burst = eth_null_tx_no_mbuf_fast_free;
> +               if (dev->data->dev_conf.txmode.offloads &
> RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE)
> +                       dev->tx_pkt_burst = eth_null_tx_mbuf_fast_free;
> +       }
> +}
> +
>  static int
> -eth_dev_configure(struct rte_eth_dev *dev __rte_unused)
> +eth_dev_configure(struct rte_eth_dev *dev)
>  {
> +       struct pmd_internals *internals = dev->data->dev_private;
> +
> +       if ((dev->data->dev_conf.txmode.offloads &
> +                       (RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE |
> RTE_ETH_TX_OFFLOAD_MULTI_SEGS)) ==
> +                       (RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE |
> RTE_ETH_TX_OFFLOAD_MULTI_SEGS)) {
> +               PMD_LOG(ERR,
> +                       "TX offloads MBUF_FAST_FREE and MULTI_SEGS are mutually
> exclusive");
> +               return -EINVAL;
> +       }
> +       if (dev->data->dev_conf.txmode.offloads &
> RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE &&
> +                       internals->packet_copy) {
> +               PMD_LOG(INFO,
> +                       "TX offload MBUF_FAST_FREE is ignored with %s argument",
> +                       ETH_NULL_PACKET_COPY_ARG);
> +       }
> +       /* Assign RX/TX ops depending on device TX offloads. */
> +       eth_dev_assign_rxtx_ops(dev);
>         return 0;
>  }
>
> @@ -259,7 +345,7 @@ static int
>  eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
>                 uint16_t nb_tx_desc __rte_unused,
>                 unsigned int socket_id __rte_unused,
> -               const struct rte_eth_txconf *tx_conf __rte_unused)
> +               const struct rte_eth_txconf *tx_conf)
>  {
>         struct rte_mbuf *dummy_packet;
>         struct pmd_internals *internals; @@ -273,6 +359,20 @@
> eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
>         if (tx_queue_id >= dev->data->nb_tx_queues)
>                 return -ENODEV;
>
> +       if (((dev->data->dev_conf.txmode.offloads | tx_conf->offloads) &
> +                       (RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE |
> RTE_ETH_TX_OFFLOAD_MULTI_SEGS)) ==
> +                       (RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE |
> RTE_ETH_TX_OFFLOAD_MULTI_SEGS)) {
> +               PMD_LOG(ERR,
> +                       "TX offloads MBUF_FAST_FREE and MULTI_SEGS are mutually
> exclusive");
> +               return -EINVAL;
> +       }
> +       if (tx_conf->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE &&
> +                       internals->packet_copy) {
> +               PMD_LOG(INFO,
> +                       "TX offload MBUF_FAST_FREE is ignored with %s argument",
> +                       ETH_NULL_PACKET_COPY_ARG);
> +       }
> +
>         packet_size = internals->packet_size;
>
>         dev->data->tx_queues[tx_queue_id] = @@ -284,6 +384,10 @@
> eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
>
>         internals->tx_null_queues[tx_queue_id].internals = internals;
>         internals->tx_null_queues[tx_queue_id].dummy_packet = dummy_packet;
> +       internals->tx_null_queues[tx_queue_id].mb_pool =
> +                       (dev->data->dev_conf.txmode.offloads | tx_conf->offloads) &
> +                       RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE ?
> +                       (void *)UINTPTR_MAX : NULL;
>
>         return 0;
>  }
> @@ -309,7 +413,10 @@ eth_dev_info(struct rte_eth_dev *dev,
>         dev_info->max_rx_queues = RTE_DIM(internals->rx_null_queues);
>         dev_info->max_tx_queues = RTE_DIM(internals->tx_null_queues);
>         dev_info->min_rx_bufsize = 0;
> -       dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
> RTE_ETH_TX_OFFLOAD_MT_LOCKFREE;
> +       dev_info->tx_queue_offload_capa =
> RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
> +                       RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
> +       dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MT_LOCKFREE |
> +                       dev_info->tx_queue_offload_capa;
>
>         dev_info->reta_size = internals->reta_size;
>         dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
> @@ -590,16 +697,7 @@ eth_dev_null_create(struct rte_vdev_device *dev, struct
> pmd_options *args)
>         eth_dev->dev_ops = &ops;
>
>         /* finally assign rx and tx ops */
> -       if (internals->packet_copy) {
> -               eth_dev->rx_pkt_burst = eth_null_copy_rx;
> -               eth_dev->tx_pkt_burst = eth_null_copy_tx;
> -       } else if (internals->no_rx) {
> -               eth_dev->rx_pkt_burst = eth_null_no_rx;
> -               eth_dev->tx_pkt_burst = eth_null_tx;
> -       } else {
> -               eth_dev->rx_pkt_burst = eth_null_rx;
> -               eth_dev->tx_pkt_burst = eth_null_tx;
> -       }
> +       eth_dev_assign_rxtx_ops(eth_dev);
>
>         rte_eth_dev_probing_finish(eth_dev);
>         return 0;
> @@ -678,7 +776,6 @@ rte_pmd_null_probe(struct rte_vdev_device *dev)
>         PMD_LOG(INFO, "Initializing pmd_null for %s", name);
>
>         if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
> -               struct pmd_internals *internals;
>                 eth_dev = rte_eth_dev_attach_secondary(name);
>                 if (!eth_dev) {
>                         PMD_LOG(ERR, "Failed to probe %s", name); @@ -687,17 +784,7
> @@ rte_pmd_null_probe(struct rte_vdev_device *dev)
>                 /* TODO: request info from primary to set up Rx and Tx */
>                 eth_dev->dev_ops = &ops;
>                 eth_dev->device = &dev->device;
> -               internals = eth_dev->data->dev_private;
> -               if (internals->packet_copy) {
> -                       eth_dev->rx_pkt_burst = eth_null_copy_rx;
> -                       eth_dev->tx_pkt_burst = eth_null_copy_tx;
> -               } else if (internals->no_rx) {
> -                       eth_dev->rx_pkt_burst = eth_null_no_rx;
> -                       eth_dev->tx_pkt_burst = eth_null_tx;
> -               } else {
> -                       eth_dev->rx_pkt_burst = eth_null_rx;
> -                       eth_dev->tx_pkt_burst = eth_null_tx;
> -               }
> +               eth_dev_assign_rxtx_ops(eth_dev);
>                 rte_eth_dev_probing_finish(eth_dev);
>                 return 0;
>         }
> --
> 2.43.0

Single Port     | TX MPPS | RX MPPS     | Func-latency: eth_null_tx     | Func-latency: rte_pktmbuf_free_bulk
-------------------------------------------------------------------------------------------------
25.07-rc1       | 117.61 | 117.61 | 1918 | 2667
25.07-rc1 - v2  | 117.55 | 117.54 | 1921 | 2660
25.11-rc0       | 118.17 | 118.17 | 1911 | 2679
25.11-rc0 - v4  | 116.18 | 116.18 | 1925 | 2677

Dual Port       | TX MPPS | RX MPPS     | Func-latency: eth_null_tx     | Func-latency: rte_pktmbuf_free_bulk
-------------------------------------------------------------------------------------------------
25.07-rc1       | 117.61        | 117.61        | 1942  | 2557
25.07-rc1 - v2  | 117.54        | 117.54        | 1946  | 2740
25.11-rc0       | 118.21        | 118.21        | 1924  | 2768
25.11-rc0 - v4  | 117.12        | 117.12        | 1943  | 2770

     prev parent reply	other threads:[~2025-08-04  9:23 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-24 18:14 [PATCH] " Morten Brørup
2025-06-26 14:05 ` Stephen Hemminger
2025-06-26 15:44   ` Morten Brørup
2025-06-27 12:07     ` Varghese, Vipin
2025-07-26  4:34       ` Morten Brørup
2025-07-28  8:22         ` Varghese, Vipin
2025-07-26  4:48 ` [PATCH v2] " Morten Brørup
2025-07-26  6:15   ` Ivan Malov
2025-07-28 13:27     ` Morten Brørup
2025-07-28 13:51       ` Ivan Malov
2025-07-28 15:42       ` Konstantin Ananyev
2025-07-28 16:42         ` Morten Brørup
2025-07-30 13:50 ` [PATCH v3] " Morten Brørup
2025-07-30 14:04 ` [PATCH v4] " Morten Brørup
2025-08-04  9:23   ` Varghese, Vipin [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=PH7PR12MB8596621758B80CB147B1A6338223A@PH7PR12MB8596.namprd12.prod.outlook.com \
    --to=vipin.varghese@amd.com \
    --cc=Thiyagarajan.P@amd.com \
    --cc=dev@dpdk.org \
    --cc=ivan.malov@arknetworks.am \
    --cc=konstantin.ananyev@huawei.com \
    --cc=mb@smartsharesystems.com \
    --cc=mtetsuyah@gmail.com \
    --cc=stephen@networkplumber.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).