[PATCH] net/null: Add fast mbuf release TX offload

DPDK patches and discussions
 help / color / mirror / Atom feed

* [PATCH] net/null: Add fast mbuf release TX offload
@ 2025-06-24 18:14 Morten Brørup
  2025-06-26 14:05 ` Stephen Hemminger
  2025-07-26  4:48 ` [PATCH v2] " Morten Brørup
  0 siblings, 2 replies; 12+ messages in thread
From: Morten Brørup @ 2025-06-24 18:14 UTC (permalink / raw)
  To: Tetsuya Mukawa, Stephen Hemminger, dev; +Cc: Morten Brørup

Added fast mbuf release, re-using the existing mbuf pool pointer
in the queue structure.

Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
---
 drivers/net/null/rte_eth_null.c | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index 8a9b74a03b..12c0d8d1ff 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -34,6 +34,17 @@ struct pmd_internals;
 struct null_queue {
 	struct pmd_internals *internals;
 
+	/**
+	 * For RX queue:
+	 *  Mempool to allocate mbufs from.
+	 *
+	 * For TX queue:
+	 *  Mempool to free mbufs to, if fast release of mbufs is enabled.
+	 *  UINTPTR_MAX if the mempool for fast release of mbufs has not yet been detected.
+	 *  NULL if fast release of mbufs is not enabled.
+	 *
+	 *  @see RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE
+	 */
 	struct rte_mempool *mb_pool;
 	void *dummy_packet;
 
@@ -151,7 +162,16 @@ eth_null_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 	for (i = 0; i < nb_bufs; i++)
 		bytes += rte_pktmbuf_pkt_len(bufs[i]);
 
-	rte_pktmbuf_free_bulk(bufs, nb_bufs);
+	if (h->mb_pool != NULL) { /* RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE */
+		if (unlikely(h->mb_pool == (void *)UINTPTR_MAX)) {
+			if (unlikely(nb_bufs == 0))
+				return 0; /* Do not dereference uninitialized bufs[0]. */
+			h->mb_pool = bufs[0]->pool;
+		}
+		rte_mbuf_raw_free_bulk(h->mb_pool, bufs, nb_bufs);
+	} else {
+		rte_pktmbuf_free_bulk(bufs, nb_bufs);
+	}
 	rte_atomic_fetch_add_explicit(&h->tx_pkts, nb_bufs, rte_memory_order_relaxed);
 	rte_atomic_fetch_add_explicit(&h->tx_bytes, bytes, rte_memory_order_relaxed);
 
@@ -259,7 +279,7 @@ static int
 eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
 		uint16_t nb_tx_desc __rte_unused,
 		unsigned int socket_id __rte_unused,
-		const struct rte_eth_txconf *tx_conf __rte_unused)
+		const struct rte_eth_txconf *tx_conf)
 {
 	struct rte_mbuf *dummy_packet;
 	struct pmd_internals *internals;
@@ -284,6 +304,9 @@ eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
 
 	internals->tx_null_queues[tx_queue_id].internals = internals;
 	internals->tx_null_queues[tx_queue_id].dummy_packet = dummy_packet;
+	internals->tx_null_queues[tx_queue_id].mb_pool =
+			tx_conf->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE ?
+			(void *)UINTPTR_MAX : NULL;
 
 	return 0;
 }
@@ -309,7 +332,8 @@ eth_dev_info(struct rte_eth_dev *dev,
 	dev_info->max_rx_queues = RTE_DIM(internals->rx_null_queues);
 	dev_info->max_tx_queues = RTE_DIM(internals->tx_null_queues);
 	dev_info->min_rx_bufsize = 0;
-	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS | RTE_ETH_TX_OFFLOAD_MT_LOCKFREE;
+	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
+			RTE_ETH_TX_OFFLOAD_MT_LOCKFREE | RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
 
 	dev_info->reta_size = internals->reta_size;
 	dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
-- 
2.43.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] net/null: Add fast mbuf release TX offload
  2025-06-24 18:14 [PATCH] net/null: Add fast mbuf release TX offload Morten Brørup
@ 2025-06-26 14:05 ` Stephen Hemminger
  2025-06-26 15:44   ` Morten Brørup
  2025-07-26  4:48 ` [PATCH v2] " Morten Brørup
  1 sibling, 1 reply; 12+ messages in thread
From: Stephen Hemminger @ 2025-06-26 14:05 UTC (permalink / raw)
  To: Morten Brørup; +Cc: Tetsuya Mukawa, dev

On Tue, 24 Jun 2025 18:14:16 +0000
Morten Brørup <mb@smartsharesystems.com> wrote:

> Added fast mbuf release, re-using the existing mbuf pool pointer
> in the queue structure.
> 
> Signed-off-by: Morten Brørup <mb@smartsharesystems.com>

Makes sense.

> ---
>  drivers/net/null/rte_eth_null.c | 30 +++++++++++++++++++++++++++---
>  1 file changed, 27 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
> index 8a9b74a03b..12c0d8d1ff 100644
> --- a/drivers/net/null/rte_eth_null.c
> +++ b/drivers/net/null/rte_eth_null.c
> @@ -34,6 +34,17 @@ struct pmd_internals;
>  struct null_queue {
>  	struct pmd_internals *internals;
>  
> +	/**
> +	 * For RX queue:
> +	 *  Mempool to allocate mbufs from.
> +	 *
> +	 * For TX queue:
> +	 *  Mempool to free mbufs to, if fast release of mbufs is enabled.
> +	 *  UINTPTR_MAX if the mempool for fast release of mbufs has not yet been detected.
> +	 *  NULL if fast release of mbufs is not enabled.
> +	 *
> +	 *  @see RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE
> +	 */
>  	struct rte_mempool *mb_pool;

Do all drivers to it this way?
Is it documented in ethdev?

>  	void *dummy_packet;
>  
> @@ -151,7 +162,16 @@ eth_null_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
>  	for (i = 0; i < nb_bufs; i++)
>  		bytes += rte_pktmbuf_pkt_len(bufs[i]);
>  
> -	rte_pktmbuf_free_bulk(bufs, nb_bufs);
> +	if (h->mb_pool != NULL) { /* RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE */
> +		if (unlikely(h->mb_pool == (void *)UINTPTR_MAX)) {
> +			if (unlikely(nb_bufs == 0))
> +				return 0; /* Do not dereference uninitialized bufs[0]. */
> +			h->mb_pool = bufs[0]->pool;
> +		}
> +		rte_mbuf_raw_free_bulk(h->mb_pool, bufs, nb_bufs);
> +	} else {
> +		rte_pktmbuf_free_bulk(bufs, nb_bufs);
> +	}
>  	rte_atomic_fetch_add_explicit(&h->tx_pkts, nb_bufs, rte_memory_order_relaxed);
>  	rte_atomic_fetch_add_explicit(&h->tx_bytes, bytes, rte_memory_order_relaxed);
>  
> @@ -259,7 +279,7 @@ static int
>  eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
>  		uint16_t nb_tx_desc __rte_unused,
>  		unsigned int socket_id __rte_unused,
> -		const struct rte_eth_txconf *tx_conf __rte_unused)
> +		const struct rte_eth_txconf *tx_conf)
>  {
>  	struct rte_mbuf *dummy_packet;
>  	struct pmd_internals *internals;
> @@ -284,6 +304,9 @@ eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
>  
>  	internals->tx_null_queues[tx_queue_id].internals = internals;
>  	internals->tx_null_queues[tx_queue_id].dummy_packet = dummy_packet;
> +	internals->tx_null_queues[tx_queue_id].mb_pool =
> +			tx_conf->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE ?
> +			(void *)UINTPTR_MAX : NULL;
>  
>  	return 0;
>  }
> @@ -309,7 +332,8 @@ eth_dev_info(struct rte_eth_dev *dev,
>  	dev_info->max_rx_queues = RTE_DIM(internals->rx_null_queues);
>  	dev_info->max_tx_queues = RTE_DIM(internals->tx_null_queues);
>  	dev_info->min_rx_bufsize = 0;
> -	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS | RTE_ETH_TX_OFFLOAD_MT_LOCKFREE;
> +	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
> +			RTE_ETH_TX_OFFLOAD_MT_LOCKFREE | RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
>  
>  	dev_info->reta_size = internals->reta_size;
>  	dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;


^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH] net/null: Add fast mbuf release TX offload
  2025-06-26 14:05 ` Stephen Hemminger
@ 2025-06-26 15:44   ` Morten Brørup
  2025-06-27 12:07     ` Varghese, Vipin
  0 siblings, 1 reply; 12+ messages in thread
From: Morten Brørup @ 2025-06-26 15:44 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Tetsuya Mukawa, dev

> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Thursday, 26 June 2025 16.06
> 
> On Tue, 24 Jun 2025 18:14:16 +0000
> Morten Brørup <mb@smartsharesystems.com> wrote:
> 
> > Added fast mbuf release, re-using the existing mbuf pool pointer
> > in the queue structure.
> >
> > Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> 
> Makes sense.
> 
> > ---
> >  drivers/net/null/rte_eth_null.c | 30 +++++++++++++++++++++++++++---
> >  1 file changed, 27 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/net/null/rte_eth_null.c
> b/drivers/net/null/rte_eth_null.c
> > index 8a9b74a03b..12c0d8d1ff 100644
> > --- a/drivers/net/null/rte_eth_null.c
> > +++ b/drivers/net/null/rte_eth_null.c
> > @@ -34,6 +34,17 @@ struct pmd_internals;
> >  struct null_queue {
> >  	struct pmd_internals *internals;
> >
> > +	/**
> > +	 * For RX queue:
> > +	 *  Mempool to allocate mbufs from.
> > +	 *
> > +	 * For TX queue:
> > +	 *  Mempool to free mbufs to, if fast release of mbufs is enabled.
> > +	 *  UINTPTR_MAX if the mempool for fast release of mbufs has not
> yet been detected.
> > +	 *  NULL if fast release of mbufs is not enabled.
> > +	 *
> > +	 *  @see RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE
> > +	 */
> >  	struct rte_mempool *mb_pool;
> 
> Do all drivers to it this way?

No, I think most drivers have separate structures for rx and tx queues. This driver doesn't so I'm reusing the existing mempool pointer.
Also, they don't cache the mempool pointer, but look at mbuf[0].pool at every burst; so their tx queue structure doesn't have a mempool pointer field.
And they check an offload flag (either the bit in the raw offload field, or a shadow variable for the relevant offload flag), instead of checking the mempool pointer.

Other drivers can be improved, and I have submitted an optimization patch for the i40e driver with some of the things I do in this patch:
https://inbox.dpdk.org/dev/20250624061238.89259-1-mb@smartsharesystems.com/

> Is it documented in ethdev?

The RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE flag is documented.
How to implement it in the drivers is not.

-Morten


^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH] net/null: Add fast mbuf release TX offload
  2025-06-26 15:44   ` Morten Brørup
@ 2025-06-27 12:07     ` Varghese, Vipin
  2025-07-26  4:34       ` Morten Brørup
  0 siblings, 1 reply; 12+ messages in thread
From: Varghese, Vipin @ 2025-06-27 12:07 UTC (permalink / raw)
  To: Morten Brørup, Stephen Hemminger; +Cc: Tetsuya Mukawa, dev

[Public]

Hi Morten,

We have tested the effect of the patch using func-latency and PPs via  testpmd.
Please find our observations below

 - DPDK tag: 25.07-rc1
 - compiler: gcc 14.2
 - platform: AMD EPYC 8534P 64core 2.3GHz
 - app cmd:
 -- One port: ` sudo build/app/dpdk-testpmd -l 15,16  --vdev=net_null1 --no-pci  -- --nb-cores=1 --nb-ports=1 --txq=1 --rxq=1 --txd=2048 --rxd=2048  -a --forward-mode=io --stats-period=1`
 -- Two port: ` sudo build/app/dpdk-testpmd -l 15,16,17  --vdev=net_null1 --vdev=net_null2 --no-pci  -- --nb-cores=2 --nb-ports=2 --txq=1 --rxq=1 --txd=2048 --rxd=2048  -a --forward-mode=io --stats-period=1`

Result 1 port:
 - Before patch: TX MPPs 117.61, RX-PPs 117.67, Func-latency TX: 1918ns, Func-latency free-bulk: 2667ns
 - After patch: TX MPPs 117.55, RX-PPs 117.54, Func-latency TX: 1921ns, Func-latency free-bulk: 2660ns

Result 2 port:
 - Before patch: TX MPPs 117.61, RX-PPs 117.67, Func-latency TX: 1942ns, Func-latency free-bulk: 2557ns
 - After patch: TX MPPs 117.54, RX-PPs 117.54, Func-latency TX: 1946ns, Func-latency free-bulk: 2740ns

Perf Top: diff before vs after shows 13.84% vs 13.79%

Reviewed-by: Thiyagarjan P <Thiyagarajan.P@amd.com>
Tested-by: Vipin Varghese <Vipin.Varghese@amd.com>

Clarification request: `with fast-mbuf-free on single port we see free-bulk reduction by -7ns. But null_tx increase by +3ns. TX PPs reduction by 0.07 Mpps. Is this anomaly of null_net PMD?`

> >
> > On Tue, 24 Jun 2025 18:14:16 +0000
> > Morten Brørup <mb@smartsharesystems.com> wrote:
> >
> > > Added fast mbuf release, re-using the existing mbuf pool pointer in
> > > the queue structure.
> > >
> > > Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> >
> > Makes sense.
> >
> > > ---
> > >  drivers/net/null/rte_eth_null.c | 30 +++++++++++++++++++++++++++---
> > >  1 file changed, 27 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/drivers/net/null/rte_eth_null.c
> > b/drivers/net/null/rte_eth_null.c
> > > index 8a9b74a03b..12c0d8d1ff 100644
> > > --- a/drivers/net/null/rte_eth_null.c
> > > +++ b/drivers/net/null/rte_eth_null.c
> > > @@ -34,6 +34,17 @@ struct pmd_internals;  struct null_queue {
> > >     struct pmd_internals *internals;
> > >
> > > +   /**
> > > +    * For RX queue:
> > > +    *  Mempool to allocate mbufs from.
> > > +    *
> > > +    * For TX queue:
> > > +    *  Mempool to free mbufs to, if fast release of mbufs is enabled.
> > > +    *  UINTPTR_MAX if the mempool for fast release of mbufs has not
> > yet been detected.
> > > +    *  NULL if fast release of mbufs is not enabled.
> > > +    *
> > > +    *  @see RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE
> > > +    */
> > >     struct rte_mempool *mb_pool;
> >
> > Do all drivers to it this way?
>
> No, I think most drivers have separate structures for rx and tx queues. This driver
> doesn't so I'm reusing the existing mempool pointer.
> Also, they don't cache the mempool pointer, but look at mbuf[0].pool at every burst;
> so their tx queue structure doesn't have a mempool pointer field.
> And they check an offload flag (either the bit in the raw offload field, or a shadow
> variable for the relevant offload flag), instead of checking the mempool pointer.
>
> Other drivers can be improved, and I have submitted an optimization patch for the
> i40e driver with some of the things I do in this patch:
> https://inbox.dpdk.org/dev/20250624061238.89259-1-
> mb@smartsharesystems.com/
>
> > Is it documented in ethdev?
>
> The RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE flag is documented.
> How to implement it in the drivers is not.
>
> -Morten


^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH] net/null: Add fast mbuf release TX offload
  2025-06-27 12:07     ` Varghese, Vipin
@ 2025-07-26  4:34       ` Morten Brørup
  2025-07-28  8:22         ` Varghese, Vipin
  0 siblings, 1 reply; 12+ messages in thread
From: Morten Brørup @ 2025-07-26  4:34 UTC (permalink / raw)
  To: Varghese, Vipin, Stephen Hemminger; +Cc: Tetsuya Mukawa, dev

> From: Varghese, Vipin [mailto:Vipin.Varghese@amd.com]
> Sent: Friday, 27 June 2025 14.07
> 
> [Public]
> 
> Hi Morten,
> 
> We have tested the effect of the patch using func-latency and PPs via
> testpmd.
> Please find our observations below
> 
>  - DPDK tag: 25.07-rc1
>  - compiler: gcc 14.2
>  - platform: AMD EPYC 8534P 64core 2.3GHz
>  - app cmd:
>  -- One port: ` sudo build/app/dpdk-testpmd -l 15,16  --vdev=net_null1 -
> -no-pci  -- --nb-cores=1 --nb-ports=1 --txq=1 --rxq=1 --txd=2048 --
> rxd=2048  -a --forward-mode=io --stats-period=1`
>  -- Two port: ` sudo build/app/dpdk-testpmd -l 15,16,17  --
> vdev=net_null1 --vdev=net_null2 --no-pci  -- --nb-cores=2 --nb-ports=2 -
> -txq=1 --rxq=1 --txd=2048 --rxd=2048  -a --forward-mode=io --stats-
> period=1`
> 
> Result 1 port:
>  - Before patch: TX MPPs 117.61, RX-PPs 117.67, Func-latency TX: 1918ns,
> Func-latency free-bulk: 2667ns
>  - After patch: TX MPPs 117.55, RX-PPs 117.54, Func-latency TX: 1921ns,
> Func-latency free-bulk: 2660ns
> 
> Result 2 port:
>  - Before patch: TX MPPs 117.61, RX-PPs 117.67, Func-latency TX: 1942ns,
> Func-latency free-bulk: 2557ns
>  - After patch: TX MPPs 117.54, RX-PPs 117.54, Func-latency TX: 1946ns,
> Func-latency free-bulk: 2740ns
> 
> Perf Top: diff before vs after shows 13.84% vs 13.79%
> 
> Reviewed-by: Thiyagarjan P <Thiyagarajan.P@amd.com>
> Tested-by: Vipin Varghese <Vipin.Varghese@amd.com>

Thank you for reviewing and testing.

> 
> Clarification request: `with fast-mbuf-free on single port we see free-
> bulk reduction by -7ns. But null_tx increase by +3ns. TX PPs reduction
> by 0.07 Mpps. Is this anomaly of null_net PMD?`

I have finally found the bug in my patch:
It announces device-level capability for FAST_FREE, but ignores device-level FAST_FREE configuration, and uses queue-level FAST_FREE configuration instead.

Due to this bug, your testing probably shows the performance of the non-FAST_FREE code path.
The added comparison for FAST_FREE (code path not taken) might explain the null_tx +3ns increase.

I will send a v2 patch.

> 
> > >
> > > On Tue, 24 Jun 2025 18:14:16 +0000
> > > Morten Brørup <mb@smartsharesystems.com> wrote:
> > >
> > > > Added fast mbuf release, re-using the existing mbuf pool pointer
> in
> > > > the queue structure.
> > > >
> > > > Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> > >
> > > Makes sense.
> > >
> > > > ---
> > > >  drivers/net/null/rte_eth_null.c | 30 +++++++++++++++++++++++++++-
> --
> > > >  1 file changed, 27 insertions(+), 3 deletions(-)
> > > >
> > > > diff --git a/drivers/net/null/rte_eth_null.c
> > > b/drivers/net/null/rte_eth_null.c
> > > > index 8a9b74a03b..12c0d8d1ff 100644
> > > > --- a/drivers/net/null/rte_eth_null.c
> > > > +++ b/drivers/net/null/rte_eth_null.c
> > > > @@ -34,6 +34,17 @@ struct pmd_internals;  struct null_queue {
> > > >     struct pmd_internals *internals;
> > > >
> > > > +   /**
> > > > +    * For RX queue:
> > > > +    *  Mempool to allocate mbufs from.
> > > > +    *
> > > > +    * For TX queue:
> > > > +    *  Mempool to free mbufs to, if fast release of mbufs is
> enabled.
> > > > +    *  UINTPTR_MAX if the mempool for fast release of mbufs has
> not
> > > yet been detected.
> > > > +    *  NULL if fast release of mbufs is not enabled.
> > > > +    *
> > > > +    *  @see RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE
> > > > +    */
> > > >     struct rte_mempool *mb_pool;
> > >
> > > Do all drivers to it this way?
> >
> > No, I think most drivers have separate structures for rx and tx
> queues. This driver
> > doesn't so I'm reusing the existing mempool pointer.
> > Also, they don't cache the mempool pointer, but look at mbuf[0].pool
> at every burst;
> > so their tx queue structure doesn't have a mempool pointer field.
> > And they check an offload flag (either the bit in the raw offload
> field, or a shadow
> > variable for the relevant offload flag), instead of checking the
> mempool pointer.
> >
> > Other drivers can be improved, and I have submitted an optimization
> patch for the
> > i40e driver with some of the things I do in this patch:
> > https://inbox.dpdk.org/dev/20250624061238.89259-1-
> > mb@smartsharesystems.com/
> >
> > > Is it documented in ethdev?
> >
> > The RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE flag is documented.
> > How to implement it in the drivers is not.
> >
> > -Morten


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v2] net/null: Add fast mbuf release TX offload
  2025-06-24 18:14 [PATCH] net/null: Add fast mbuf release TX offload Morten Brørup
  2025-06-26 14:05 ` Stephen Hemminger
@ 2025-07-26  4:48 ` Morten Brørup
  2025-07-26  6:15   ` Ivan Malov
  1 sibling, 1 reply; 12+ messages in thread
From: Morten Brørup @ 2025-07-26  4:48 UTC (permalink / raw)
  To: dev, Tetsuya Mukawa, Stephen Hemminger, Vipin Varghese, Thiyagarjan P
  Cc: Morten Brørup

Added fast mbuf release, re-using the existing mbuf pool pointer
in the queue structure.

Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
---
v2:
* Also announce the offload as a per-queue capability.
* Added missing test of per-device offload configuration when configuring
  the queue.
---
 drivers/net/null/rte_eth_null.c | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
index 8a9b74a03b..09cfc74494 100644
--- a/drivers/net/null/rte_eth_null.c
+++ b/drivers/net/null/rte_eth_null.c
@@ -34,6 +34,17 @@ struct pmd_internals;
 struct null_queue {
 	struct pmd_internals *internals;
 
+	/**
+	 * For RX queue:
+	 *  Mempool to allocate mbufs from.
+	 *
+	 * For TX queue:
+	 *  Mempool to free mbufs to, if fast release of mbufs is enabled.
+	 *  UINTPTR_MAX if the mempool for fast release of mbufs has not yet been detected.
+	 *  NULL if fast release of mbufs is not enabled.
+	 *
+	 *  @see RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE
+	 */
 	struct rte_mempool *mb_pool;
 	void *dummy_packet;
 
@@ -151,7 +162,16 @@ eth_null_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 	for (i = 0; i < nb_bufs; i++)
 		bytes += rte_pktmbuf_pkt_len(bufs[i]);
 
-	rte_pktmbuf_free_bulk(bufs, nb_bufs);
+	if (h->mb_pool != NULL) { /* RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE */
+		if (unlikely(h->mb_pool == (void *)UINTPTR_MAX)) {
+			if (unlikely(nb_bufs == 0))
+				return 0; /* Do not dereference uninitialized bufs[0]. */
+			h->mb_pool = bufs[0]->pool;
+		}
+		rte_mbuf_raw_free_bulk(h->mb_pool, bufs, nb_bufs);
+	} else {
+		rte_pktmbuf_free_bulk(bufs, nb_bufs);
+	}
 	rte_atomic_fetch_add_explicit(&h->tx_pkts, nb_bufs, rte_memory_order_relaxed);
 	rte_atomic_fetch_add_explicit(&h->tx_bytes, bytes, rte_memory_order_relaxed);
 
@@ -259,7 +279,7 @@ static int
 eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
 		uint16_t nb_tx_desc __rte_unused,
 		unsigned int socket_id __rte_unused,
-		const struct rte_eth_txconf *tx_conf __rte_unused)
+		const struct rte_eth_txconf *tx_conf)
 {
 	struct rte_mbuf *dummy_packet;
 	struct pmd_internals *internals;
@@ -284,6 +304,10 @@ eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
 
 	internals->tx_null_queues[tx_queue_id].internals = internals;
 	internals->tx_null_queues[tx_queue_id].dummy_packet = dummy_packet;
+	internals->tx_null_queues[tx_queue_id].mb_pool =
+			(dev->data->dev_conf.txmode.offloads | tx_conf->offloads) &
+			RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE ?
+			(void *)UINTPTR_MAX : NULL;
 
 	return 0;
 }
@@ -309,7 +333,10 @@ eth_dev_info(struct rte_eth_dev *dev,
 	dev_info->max_rx_queues = RTE_DIM(internals->rx_null_queues);
 	dev_info->max_tx_queues = RTE_DIM(internals->tx_null_queues);
 	dev_info->min_rx_bufsize = 0;
-	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS | RTE_ETH_TX_OFFLOAD_MT_LOCKFREE;
+	dev_info->tx_queue_offload_capa = RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
+	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
+			RTE_ETH_TX_OFFLOAD_MT_LOCKFREE |
+			dev_info->tx_queue_offload_capa;
 
 	dev_info->reta_size = internals->reta_size;
 	dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
-- 
2.43.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] net/null: Add fast mbuf release TX offload
  2025-07-26  4:48 ` [PATCH v2] " Morten Brørup
@ 2025-07-26  6:15   ` Ivan Malov
  2025-07-28 13:27     ` Morten Brørup
  0 siblings, 1 reply; 12+ messages in thread
From: Ivan Malov @ 2025-07-26  6:15 UTC (permalink / raw)
  To: Morten Brørup
  Cc: dev, Tetsuya Mukawa, Stephen Hemminger, Vipin Varghese, Thiyagarjan P

[-- Attachment #1: Type: text/plain, Size: 4018 bytes --]

Hi Morten,

Good patch. Please see below.

On Sat, 26 Jul 2025, Morten Brørup wrote:

> Added fast mbuf release, re-using the existing mbuf pool pointer
> in the queue structure.
>
> Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> ---
> v2:
> * Also announce the offload as a per-queue capability.
> * Added missing test of per-device offload configuration when configuring
>  the queue.
> ---
> drivers/net/null/rte_eth_null.c | 33 ++++++++++++++++++++++++++++++---
> 1 file changed, 30 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/null/rte_eth_null.c b/drivers/net/null/rte_eth_null.c
> index 8a9b74a03b..09cfc74494 100644
> --- a/drivers/net/null/rte_eth_null.c
> +++ b/drivers/net/null/rte_eth_null.c
> @@ -34,6 +34,17 @@ struct pmd_internals;
> struct null_queue {
> 	struct pmd_internals *internals;
>
> +	/**
> +	 * For RX queue:
> +	 *  Mempool to allocate mbufs from.
> +	 *
> +	 * For TX queue:

Perhaps spell it 'Rx', 'Tx', but this is up to you.

> +	 *  Mempool to free mbufs to, if fast release of mbufs is enabled.
> +	 *  UINTPTR_MAX if the mempool for fast release of mbufs has not yet been detected.
> +	 *  NULL if fast release of mbufs is not enabled.
> +	 *
> +	 *  @see RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE
> +	 */

May be it would be better to have a separate 'tx_pkt_burst' callback, to avoid
conditional checks below. Though, I understand this will downgrade the per-queue
capability to the per-port only, so feel free to disregard this point.

> 	struct rte_mempool *mb_pool;
> 	void *dummy_packet;
>
> @@ -151,7 +162,16 @@ eth_null_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
> 	for (i = 0; i < nb_bufs; i++)
> 		bytes += rte_pktmbuf_pkt_len(bufs[i]);
>
> -	rte_pktmbuf_free_bulk(bufs, nb_bufs);
> +	if (h->mb_pool != NULL) { /* RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE */
> +		if (unlikely(h->mb_pool == (void *)UINTPTR_MAX)) {
> +			if (unlikely(nb_bufs == 0))
> +				return 0; /* Do not dereference uninitialized bufs[0]. */
> +			h->mb_pool = bufs[0]->pool;
> +		}
> +		rte_mbuf_raw_free_bulk(h->mb_pool, bufs, nb_bufs);
> +	} else {
> +		rte_pktmbuf_free_bulk(bufs, nb_bufs);
> +	}
> 	rte_atomic_fetch_add_explicit(&h->tx_pkts, nb_bufs, rte_memory_order_relaxed);
> 	rte_atomic_fetch_add_explicit(&h->tx_bytes, bytes, rte_memory_order_relaxed);
>
> @@ -259,7 +279,7 @@ static int
> eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
> 		uint16_t nb_tx_desc __rte_unused,
> 		unsigned int socket_id __rte_unused,
> -		const struct rte_eth_txconf *tx_conf __rte_unused)
> +		const struct rte_eth_txconf *tx_conf)
> {
> 	struct rte_mbuf *dummy_packet;
> 	struct pmd_internals *internals;
> @@ -284,6 +304,10 @@ eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
>
> 	internals->tx_null_queues[tx_queue_id].internals = internals;
> 	internals->tx_null_queues[tx_queue_id].dummy_packet = dummy_packet;
> +	internals->tx_null_queues[tx_queue_id].mb_pool =
> +			(dev->data->dev_conf.txmode.offloads | tx_conf->offloads) &
> +			RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE ?
> +			(void *)UINTPTR_MAX : NULL;

Given the fact that FAST_FREE and MULTI_SEGS are effectively conflicting,
wouldn't it be better to have a check for the presence of both flags here? I'm
not sure whether this is already checked at the generic layer above the PMD.

Thank you.

>
> 	return 0;
> }
> @@ -309,7 +333,10 @@ eth_dev_info(struct rte_eth_dev *dev,
> 	dev_info->max_rx_queues = RTE_DIM(internals->rx_null_queues);
> 	dev_info->max_tx_queues = RTE_DIM(internals->tx_null_queues);
> 	dev_info->min_rx_bufsize = 0;
> -	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS | RTE_ETH_TX_OFFLOAD_MT_LOCKFREE;
> +	dev_info->tx_queue_offload_capa = RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
> +	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
> +			RTE_ETH_TX_OFFLOAD_MT_LOCKFREE |
> +			dev_info->tx_queue_offload_capa;
>
> 	dev_info->reta_size = internals->reta_size;
> 	dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
> -- 
> 2.43.0
>
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH] net/null: Add fast mbuf release TX offload
  2025-07-26  4:34       ` Morten Brørup
@ 2025-07-28  8:22         ` Varghese, Vipin
  0 siblings, 0 replies; 12+ messages in thread
From: Varghese, Vipin @ 2025-07-28  8:22 UTC (permalink / raw)
  To: Morten Brørup, Stephen Hemminger, P, Thiyagarajan
  Cc: Tetsuya Mukawa, dev

[AMD Official Use Only - AMD Internal Distribution Only]

Snipped

> > [Public]
> >
> > Hi Morten,
> >
> > We have tested the effect of the patch using func-latency and PPs via
> > testpmd.
> > Please find our observations below
> >
> >  - DPDK tag: 25.07-rc1
> >  - compiler: gcc 14.2
> >  - platform: AMD EPYC 8534P 64core 2.3GHz
> >  - app cmd:
> >  -- One port: ` sudo build/app/dpdk-testpmd -l 15,16  --vdev=net_null1
> > - -no-pci  -- --nb-cores=1 --nb-ports=1 --txq=1 --rxq=1 --txd=2048 --
> > rxd=2048  -a --forward-mode=io --stats-period=1`
> >  -- Two port: ` sudo build/app/dpdk-testpmd -l 15,16,17  --
> > vdev=net_null1 --vdev=net_null2 --no-pci  -- --nb-cores=2 --nb-ports=2
> > -
> > -txq=1 --rxq=1 --txd=2048 --rxd=2048  -a --forward-mode=io --stats-
> > period=1`
> >
> > Result 1 port:
> >  - Before patch: TX MPPs 117.61, RX-PPs 117.67, Func-latency TX:
> > 1918ns, Func-latency free-bulk: 2667ns
> >  - After patch: TX MPPs 117.55, RX-PPs 117.54, Func-latency TX:
> > 1921ns, Func-latency free-bulk: 2660ns
> >
> > Result 2 port:
> >  - Before patch: TX MPPs 117.61, RX-PPs 117.67, Func-latency TX:
> > 1942ns, Func-latency free-bulk: 2557ns
> >  - After patch: TX MPPs 117.54, RX-PPs 117.54, Func-latency TX:
> > 1946ns, Func-latency free-bulk: 2740ns
> >
> > Perf Top: diff before vs after shows 13.84% vs 13.79%
> >
> > Reviewed-by: Thiyagarjan P <Thiyagarajan.P@amd.com>
> > Tested-by: Vipin Varghese <Vipin.Varghese@amd.com>
>
> Thank you for reviewing and testing.
>
> >
> > Clarification request: `with fast-mbuf-free on single port we see
> > free- bulk reduction by -7ns. But null_tx increase by +3ns. TX PPs
> > reduction by 0.07 Mpps. Is this anomaly of null_net PMD?`
>
> I have finally found the bug in my patch:
> It announces device-level capability for FAST_FREE, but ignores device-level
> FAST_FREE configuration, and uses queue-level FAST_FREE configuration
> instead.
>
> Due to this bug, your testing probably shows the performance of the non-
> FAST_FREE code path.
> The added comparison for FAST_FREE (code path not taken) might explain the
> null_tx +3ns increase.
>
> I will send a v2 patch.

Will check

>
> >
> > > >
> > > > On Tue, 24 Jun 2025 18:14:16 +0000 Morten Brørup
> > > > <mb@smartsharesystems.com> wrote:
> > > >
> > > > > Added fast mbuf release, re-using the existing mbuf pool pointer
> > in
> > > > > the queue structure.
> > > > >
> > > > > Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> > > >
> > > > Makes sense.
> > > >
> > > > > ---
> > > > >  drivers/net/null/rte_eth_null.c | 30
> > > > > +++++++++++++++++++++++++++-
> > --
> > > > >  1 file changed, 27 insertions(+), 3 deletions(-)
> > > > >
> > > > > diff --git a/drivers/net/null/rte_eth_null.c
> > > > b/drivers/net/null/rte_eth_null.c
> > > > > index 8a9b74a03b..12c0d8d1ff 100644
> > > > > --- a/drivers/net/null/rte_eth_null.c
> > > > > +++ b/drivers/net/null/rte_eth_null.c
> > > > > @@ -34,6 +34,17 @@ struct pmd_internals;  struct null_queue {
> > > > >     struct pmd_internals *internals;
> > > > >
> > > > > +   /**
> > > > > +    * For RX queue:
> > > > > +    *  Mempool to allocate mbufs from.
> > > > > +    *
> > > > > +    * For TX queue:
> > > > > +    *  Mempool to free mbufs to, if fast release of mbufs is
> > enabled.
> > > > > +    *  UINTPTR_MAX if the mempool for fast release of mbufs has
> > not
> > > > yet been detected.
> > > > > +    *  NULL if fast release of mbufs is not enabled.
> > > > > +    *
> > > > > +    *  @see RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE
> > > > > +    */
> > > > >     struct rte_mempool *mb_pool;
> > > >
> > > > Do all drivers to it this way?
> > >
> > > No, I think most drivers have separate structures for rx and tx
> > queues. This driver
> > > doesn't so I'm reusing the existing mempool pointer.
> > > Also, they don't cache the mempool pointer, but look at mbuf[0].pool
> > at every burst;
> > > so their tx queue structure doesn't have a mempool pointer field.
> > > And they check an offload flag (either the bit in the raw offload
> > field, or a shadow
> > > variable for the relevant offload flag), instead of checking the
> > mempool pointer.
> > >
> > > Other drivers can be improved, and I have submitted an optimization
> > patch for the
> > > i40e driver with some of the things I do in this patch:
> > > https://inbox.dpdk.org/dev/20250624061238.89259-1-
> > > mb@smartsharesystems.com/
> > >
> > > > Is it documented in ethdev?
> > >
> > > The RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE flag is documented.
> > > How to implement it in the drivers is not.
> > >
> > > -Morten


^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v2] net/null: Add fast mbuf release TX offload
  2025-07-26  6:15   ` Ivan Malov
@ 2025-07-28 13:27     ` Morten Brørup
  2025-07-28 13:51       ` Ivan Malov
  2025-07-28 15:42       ` Konstantin Ananyev
  0 siblings, 2 replies; 12+ messages in thread
From: Morten Brørup @ 2025-07-28 13:27 UTC (permalink / raw)
  To: Ivan Malov
  Cc: dev, Tetsuya Mukawa, Stephen Hemminger, Vipin Varghese, Thiyagarjan P

> From: Ivan Malov [mailto:ivan.malov@arknetworks.am]
> Sent: Saturday, 26 July 2025 08.15
> 
> Hi Morten,
> 
> Good patch. Please see below.
> 
> On Sat, 26 Jul 2025, Morten Brørup wrote:
> 
> > Added fast mbuf release, re-using the existing mbuf pool pointer
> > in the queue structure.
> >
> > Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> > ---
> > v2:
> > * Also announce the offload as a per-queue capability.
> > * Added missing test of per-device offload configuration when
> configuring
> >  the queue.
> > ---
> > drivers/net/null/rte_eth_null.c | 33 ++++++++++++++++++++++++++++++---
> > 1 file changed, 30 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/net/null/rte_eth_null.c
> b/drivers/net/null/rte_eth_null.c
> > index 8a9b74a03b..09cfc74494 100644
> > --- a/drivers/net/null/rte_eth_null.c
> > +++ b/drivers/net/null/rte_eth_null.c
> > @@ -34,6 +34,17 @@ struct pmd_internals;
> > struct null_queue {
> > 	struct pmd_internals *internals;
> >
> > +	/**
> > +	 * For RX queue:
> > +	 *  Mempool to allocate mbufs from.
> > +	 *
> > +	 * For TX queue:
> 
> Perhaps spell it 'Rx', 'Tx', but this is up to you.

I just checked, and it seems all three spellings "rx", "Rx" and "RX" are being used in DPDK.
I personally prefer RX, so I'll keep that.

> 
> > +	 *  Mempool to free mbufs to, if fast release of mbufs is enabled.
> > +	 *  UINTPTR_MAX if the mempool for fast release of mbufs has not
> yet been detected.
> > +	 *  NULL if fast release of mbufs is not enabled.
> > +	 *
> > +	 *  @see RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE
> > +	 */
> 
> May be it would be better to have a separate 'tx_pkt_burst' callback, to
> avoid
> conditional checks below. Though, I understand this will downgrade the
> per-queue
> capability to the per-port only, so feel free to disregard this point.

I considered this, and I can imagine an application using FAST_FREE for its primary queues, and normal free for some secondary queues.
Looking at other drivers - which have implemented a runtime check, not separate callbacks for FAST_FREE - I guess they came to the same conclusion.

> 
> > 	struct rte_mempool *mb_pool;
> > 	void *dummy_packet;
> >
> > @@ -151,7 +162,16 @@ eth_null_tx(void *q, struct rte_mbuf **bufs,
> uint16_t nb_bufs)
> > 	for (i = 0; i < nb_bufs; i++)
> > 		bytes += rte_pktmbuf_pkt_len(bufs[i]);
> >
> > -	rte_pktmbuf_free_bulk(bufs, nb_bufs);
> > +	if (h->mb_pool != NULL) { /* RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE */
> > +		if (unlikely(h->mb_pool == (void *)UINTPTR_MAX)) {
> > +			if (unlikely(nb_bufs == 0))
> > +				return 0; /* Do not dereference uninitialized
> bufs[0]. */
> > +			h->mb_pool = bufs[0]->pool;
> > +		}
> > +		rte_mbuf_raw_free_bulk(h->mb_pool, bufs, nb_bufs);
> > +	} else {
> > +		rte_pktmbuf_free_bulk(bufs, nb_bufs);
> > +	}
> > 	rte_atomic_fetch_add_explicit(&h->tx_pkts, nb_bufs,
> rte_memory_order_relaxed);
> > 	rte_atomic_fetch_add_explicit(&h->tx_bytes, bytes,
> rte_memory_order_relaxed);
> >
> > @@ -259,7 +279,7 @@ static int
> > eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
> > 		uint16_t nb_tx_desc __rte_unused,
> > 		unsigned int socket_id __rte_unused,
> > -		const struct rte_eth_txconf *tx_conf __rte_unused)
> > +		const struct rte_eth_txconf *tx_conf)
> > {
> > 	struct rte_mbuf *dummy_packet;
> > 	struct pmd_internals *internals;
> > @@ -284,6 +304,10 @@ eth_tx_queue_setup(struct rte_eth_dev *dev,
> uint16_t tx_queue_id,
> >
> > 	internals->tx_null_queues[tx_queue_id].internals = internals;
> > 	internals->tx_null_queues[tx_queue_id].dummy_packet =
> dummy_packet;
> > +	internals->tx_null_queues[tx_queue_id].mb_pool =
> > +			(dev->data->dev_conf.txmode.offloads | tx_conf-
> >offloads) &
> > +			RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE ?
> > +			(void *)UINTPTR_MAX : NULL;
> 
> Given the fact that FAST_FREE and MULTI_SEGS are effectively
> conflicting,
> wouldn't it be better to have a check for the presence of both flags
> here? I'm
> not sure whether this is already checked at the generic layer above the
> PMD.

Interesting thought - got me looking deeper into this.

It seems MULTI_SEGS is primarily a capability flag.
The description of the MULTI_SEGS flag [1] could be interpreted that way too:
/** Device supports multi segment send. */

[1]: https://elixir.bootlin.com/dpdk/v25.07/source/lib/ethdev/rte_ethdev.h#L1614

E.g. the i40e driver offers MULTI_SEGS capability per-device, but not per-queue. And it doesn't use the MULTI_SEGS flag for any purpose (beyond capability reporting).

Furthermore, enabling MULTI_SEGS on TX (per device or per queue) wouldn't mean that all transmitted packets are segmented; it only means that the driver must be able to handle segmented packets.
I.e. MULTI_SEGS could be enabled on a device, and yet it would be acceptable to enable FAST_FREE on a queue on that device.

> 
> Thank you.

Thank you for reviewing.

> 
> >
> > 	return 0;
> > }
> > @@ -309,7 +333,10 @@ eth_dev_info(struct rte_eth_dev *dev,
> > 	dev_info->max_rx_queues = RTE_DIM(internals->rx_null_queues);
> > 	dev_info->max_tx_queues = RTE_DIM(internals->tx_null_queues);
> > 	dev_info->min_rx_bufsize = 0;
> > -	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
> RTE_ETH_TX_OFFLOAD_MT_LOCKFREE;
> > +	dev_info->tx_queue_offload_capa =
> RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
> > +	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
> > +			RTE_ETH_TX_OFFLOAD_MT_LOCKFREE |
> > +			dev_info->tx_queue_offload_capa;
> >
> > 	dev_info->reta_size = internals->reta_size;
> > 	dev_info->flow_type_rss_offloads = internals-
> >flow_type_rss_offloads;
> > --
> > 2.43.0
> >
> >

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v2] net/null: Add fast mbuf release TX offload
  2025-07-28 13:27     ` Morten Brørup
@ 2025-07-28 13:51       ` Ivan Malov
  2025-07-28 15:42       ` Konstantin Ananyev
  1 sibling, 0 replies; 12+ messages in thread
From: Ivan Malov @ 2025-07-28 13:51 UTC (permalink / raw)
  To: Morten Brørup
  Cc: dev, Tetsuya Mukawa, Stephen Hemminger, Vipin Varghese, Thiyagarjan P

[-- Attachment #1: Type: text/plain, Size: 5839 bytes --]

Hi Morten,

On Mon, 28 Jul 2025, Morten Brørup wrote:

>> From: Ivan Malov [mailto:ivan.malov@arknetworks.am]
>> Sent: Saturday, 26 July 2025 08.15
>>
>> Hi Morten,
>>
>> Good patch. Please see below.
>>
>> On Sat, 26 Jul 2025, Morten Brørup wrote:
>>
>>> Added fast mbuf release, re-using the existing mbuf pool pointer
>>> in the queue structure.
>>>
>>> Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
>>> ---
>>> v2:
>>> * Also announce the offload as a per-queue capability.
>>> * Added missing test of per-device offload configuration when
>> configuring
>>>  the queue.
>>> ---
>>> drivers/net/null/rte_eth_null.c | 33 ++++++++++++++++++++++++++++++---
>>> 1 file changed, 30 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/net/null/rte_eth_null.c
>> b/drivers/net/null/rte_eth_null.c
>>> index 8a9b74a03b..09cfc74494 100644
>>> --- a/drivers/net/null/rte_eth_null.c
>>> +++ b/drivers/net/null/rte_eth_null.c
>>> @@ -34,6 +34,17 @@ struct pmd_internals;
>>> struct null_queue {
>>> 	struct pmd_internals *internals;
>>>
>>> +	/**
>>> +	 * For RX queue:
>>> +	 *  Mempool to allocate mbufs from.
>>> +	 *
>>> +	 * For TX queue:
>>
>> Perhaps spell it 'Rx', 'Tx', but this is up to you.
>
> I just checked, and it seems all three spellings "rx", "Rx" and "RX" are being used in DPDK.
> I personally prefer RX, so I'll keep that.
>
>>
>>> +	 *  Mempool to free mbufs to, if fast release of mbufs is enabled.
>>> +	 *  UINTPTR_MAX if the mempool for fast release of mbufs has not
>> yet been detected.
>>> +	 *  NULL if fast release of mbufs is not enabled.
>>> +	 *
>>> +	 *  @see RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE
>>> +	 */
>>
>> May be it would be better to have a separate 'tx_pkt_burst' callback, to
>> avoid
>> conditional checks below. Though, I understand this will downgrade the
>> per-queue
>> capability to the per-port only, so feel free to disregard this point.
>
> I considered this, and I can imagine an application using FAST_FREE for its primary queues, and normal free for some secondary queues.
> Looking at other drivers - which have implemented a runtime check, not separate callbacks for FAST_FREE - I guess they came to the same conclusion.
>
>>
>>> 	struct rte_mempool *mb_pool;
>>> 	void *dummy_packet;
>>>
>>> @@ -151,7 +162,16 @@ eth_null_tx(void *q, struct rte_mbuf **bufs,
>> uint16_t nb_bufs)
>>> 	for (i = 0; i < nb_bufs; i++)
>>> 		bytes += rte_pktmbuf_pkt_len(bufs[i]);
>>>
>>> -	rte_pktmbuf_free_bulk(bufs, nb_bufs);
>>> +	if (h->mb_pool != NULL) { /* RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE */
>>> +		if (unlikely(h->mb_pool == (void *)UINTPTR_MAX)) {
>>> +			if (unlikely(nb_bufs == 0))
>>> +				return 0; /* Do not dereference uninitialized
>> bufs[0]. */
>>> +			h->mb_pool = bufs[0]->pool;
>>> +		}
>>> +		rte_mbuf_raw_free_bulk(h->mb_pool, bufs, nb_bufs);
>>> +	} else {
>>> +		rte_pktmbuf_free_bulk(bufs, nb_bufs);
>>> +	}
>>> 	rte_atomic_fetch_add_explicit(&h->tx_pkts, nb_bufs,
>> rte_memory_order_relaxed);
>>> 	rte_atomic_fetch_add_explicit(&h->tx_bytes, bytes,
>> rte_memory_order_relaxed);
>>>
>>> @@ -259,7 +279,7 @@ static int
>>> eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
>>> 		uint16_t nb_tx_desc __rte_unused,
>>> 		unsigned int socket_id __rte_unused,
>>> -		const struct rte_eth_txconf *tx_conf __rte_unused)
>>> +		const struct rte_eth_txconf *tx_conf)
>>> {
>>> 	struct rte_mbuf *dummy_packet;
>>> 	struct pmd_internals *internals;
>>> @@ -284,6 +304,10 @@ eth_tx_queue_setup(struct rte_eth_dev *dev,
>> uint16_t tx_queue_id,
>>>
>>> 	internals->tx_null_queues[tx_queue_id].internals = internals;
>>> 	internals->tx_null_queues[tx_queue_id].dummy_packet =
>> dummy_packet;
>>> +	internals->tx_null_queues[tx_queue_id].mb_pool =
>>> +			(dev->data->dev_conf.txmode.offloads | tx_conf-
>>> offloads) &
>>> +			RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE ?
>>> +			(void *)UINTPTR_MAX : NULL;
>>
>> Given the fact that FAST_FREE and MULTI_SEGS are effectively
>> conflicting,
>> wouldn't it be better to have a check for the presence of both flags
>> here? I'm
>> not sure whether this is already checked at the generic layer above the
>> PMD.
>
> Interesting thought - got me looking deeper into this.
>
> It seems MULTI_SEGS is primarily a capability flag.
> The description of the MULTI_SEGS flag [1] could be interpreted that way too:
> /** Device supports multi segment send. */
>
> [1]: https://elixir.bootlin.com/dpdk/v25.07/source/lib/ethdev/rte_ethdev.h#L1614
>
> E.g. the i40e driver offers MULTI_SEGS capability per-device, but not per-queue. And it doesn't use the MULTI_SEGS flag for any purpose (beyond capability reporting).
>
> Furthermore, enabling MULTI_SEGS on TX (per device or per queue) wouldn't mean that all transmitted packets are segmented; it only means that the driver must be able to handle segmented packets.
> I.e. MULTI_SEGS could be enabled on a device, and yet it would be acceptable to enable FAST_FREE on a queue on that device.

Yes, you are correct and I apologise. It's capability, not the requestor bit.

Thank you.

>
>>
>> Thank you.
>
> Thank you for reviewing.
>
>>
>>>
>>> 	return 0;
>>> }
>>> @@ -309,7 +333,10 @@ eth_dev_info(struct rte_eth_dev *dev,
>>> 	dev_info->max_rx_queues = RTE_DIM(internals->rx_null_queues);
>>> 	dev_info->max_tx_queues = RTE_DIM(internals->tx_null_queues);
>>> 	dev_info->min_rx_bufsize = 0;
>>> -	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
>> RTE_ETH_TX_OFFLOAD_MT_LOCKFREE;
>>> +	dev_info->tx_queue_offload_capa =
>> RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
>>> +	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
>>> +			RTE_ETH_TX_OFFLOAD_MT_LOCKFREE |
>>> +			dev_info->tx_queue_offload_capa;
>>>
>>> 	dev_info->reta_size = internals->reta_size;
>>> 	dev_info->flow_type_rss_offloads = internals-
>>> flow_type_rss_offloads;
>>> --
>>> 2.43.0
>>>
>>>
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v2] net/null: Add fast mbuf release TX offload
  2025-07-28 13:27     ` Morten Brørup
  2025-07-28 13:51       ` Ivan Malov
@ 2025-07-28 15:42       ` Konstantin Ananyev
  2025-07-28 16:42         ` Morten Brørup
  1 sibling, 1 reply; 12+ messages in thread
From: Konstantin Ananyev @ 2025-07-28 15:42 UTC (permalink / raw)
  To: Morten Brørup, Ivan Malov
  Cc: dev, Tetsuya Mukawa, Stephen Hemminger, Vipin Varghese, Thiyagarjan P



> > Hi Morten,
> >
> > Good patch. Please see below.
> >
> > On Sat, 26 Jul 2025, Morten Brørup wrote:
> >
> > > Added fast mbuf release, re-using the existing mbuf pool pointer
> > > in the queue structure.
> > >
> > > Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> > > ---
> > > v2:
> > > * Also announce the offload as a per-queue capability.
> > > * Added missing test of per-device offload configuration when
> > configuring
> > >  the queue.
> > > ---
> > > drivers/net/null/rte_eth_null.c | 33 ++++++++++++++++++++++++++++++---
> > > 1 file changed, 30 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/drivers/net/null/rte_eth_null.c
> > b/drivers/net/null/rte_eth_null.c
> > > index 8a9b74a03b..09cfc74494 100644
> > > --- a/drivers/net/null/rte_eth_null.c
> > > +++ b/drivers/net/null/rte_eth_null.c
> > > @@ -34,6 +34,17 @@ struct pmd_internals;
> > > struct null_queue {
> > > 	struct pmd_internals *internals;
> > >
> > > +	/**
> > > +	 * For RX queue:
> > > +	 *  Mempool to allocate mbufs from.
> > > +	 *
> > > +	 * For TX queue:
> >
> > Perhaps spell it 'Rx', 'Tx', but this is up to you.
> 
> I just checked, and it seems all three spellings "rx", "Rx" and "RX" are being used in DPDK.
> I personally prefer RX, so I'll keep that.
> 
> >
> > > +	 *  Mempool to free mbufs to, if fast release of mbufs is enabled.
> > > +	 *  UINTPTR_MAX if the mempool for fast release of mbufs has not
> > yet been detected.
> > > +	 *  NULL if fast release of mbufs is not enabled.
> > > +	 *
> > > +	 *  @see RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE
> > > +	 */
> >
> > May be it would be better to have a separate 'tx_pkt_burst' callback, to
> > avoid
> > conditional checks below. Though, I understand this will downgrade the
> > per-queue
> > capability to the per-port only, so feel free to disregard this point.
> 
> I considered this, and I can imagine an application using FAST_FREE for its primary queues, and normal free for some secondary
> queues.
> Looking at other drivers - which have implemented a runtime check, not separate callbacks for FAST_FREE - I guess they came to the
> same conclusion.
> 
> >
> > > 	struct rte_mempool *mb_pool;
> > > 	void *dummy_packet;
> > >
> > > @@ -151,7 +162,16 @@ eth_null_tx(void *q, struct rte_mbuf **bufs,
> > uint16_t nb_bufs)
> > > 	for (i = 0; i < nb_bufs; i++)
> > > 		bytes += rte_pktmbuf_pkt_len(bufs[i]);
> > >
> > > -	rte_pktmbuf_free_bulk(bufs, nb_bufs);
> > > +	if (h->mb_pool != NULL) { /* RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE */
> > > +		if (unlikely(h->mb_pool == (void *)UINTPTR_MAX)) {
> > > +			if (unlikely(nb_bufs == 0))
> > > +				return 0; /* Do not dereference uninitialized
> > bufs[0]. */
> > > +			h->mb_pool = bufs[0]->pool;
> > > +		}
> > > +		rte_mbuf_raw_free_bulk(h->mb_pool, bufs, nb_bufs);
> > > +	} else {
> > > +		rte_pktmbuf_free_bulk(bufs, nb_bufs);
> > > +	}
> > > 	rte_atomic_fetch_add_explicit(&h->tx_pkts, nb_bufs,
> > rte_memory_order_relaxed);
> > > 	rte_atomic_fetch_add_explicit(&h->tx_bytes, bytes,
> > rte_memory_order_relaxed);
> > >
> > > @@ -259,7 +279,7 @@ static int
> > > eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
> > > 		uint16_t nb_tx_desc __rte_unused,
> > > 		unsigned int socket_id __rte_unused,
> > > -		const struct rte_eth_txconf *tx_conf __rte_unused)
> > > +		const struct rte_eth_txconf *tx_conf)
> > > {
> > > 	struct rte_mbuf *dummy_packet;
> > > 	struct pmd_internals *internals;
> > > @@ -284,6 +304,10 @@ eth_tx_queue_setup(struct rte_eth_dev *dev,
> > uint16_t tx_queue_id,
> > >
> > > 	internals->tx_null_queues[tx_queue_id].internals = internals;
> > > 	internals->tx_null_queues[tx_queue_id].dummy_packet =
> > dummy_packet;
> > > +	internals->tx_null_queues[tx_queue_id].mb_pool =
> > > +			(dev->data->dev_conf.txmode.offloads | tx_conf-
> > >offloads) &
> > > +			RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE ?
> > > +			(void *)UINTPTR_MAX : NULL;
> >
> > Given the fact that FAST_FREE and MULTI_SEGS are effectively
> > conflicting,
> > wouldn't it be better to have a check for the presence of both flags
> > here? I'm
> > not sure whether this is already checked at the generic layer above the
> > PMD.
> 
> Interesting thought - got me looking deeper into this.
> 
> It seems MULTI_SEGS is primarily a capability flag.
> The description of the MULTI_SEGS flag [1] could be interpreted that way too:
> /** Device supports multi segment send. */
> 
> [1]: https://elixir.bootlin.com/dpdk/v25.07/source/lib/ethdev/rte_ethdev.h#L1614

In fact, I believe it serves both purposes: report capabilities and request for offloads to enable.
Few example, I believe request this offload:
https://elixir.bootlin.com/dpdk/v25.07/source/examples/ip_fragmentation/main.c#L156
https://elixir.bootlin.com/dpdk/v25.07/source/examples/ipsec-secgw/ipsec-secgw.c#L1985
https://elixir.bootlin.com/dpdk/v25.07/source/examples/ip_reassembly/main.c#L177

> 
> E.g. the i40e driver offers MULTI_SEGS capability per-device, but not per-queue. And it doesn't use the MULTI_SEGS flag for any
> purpose (beyond capability reporting).
> 
> Furthermore, enabling MULTI_SEGS on TX (per device or per queue) wouldn't mean that all transmitted packets are segmented; it
> only means that the driver must be able to handle segmented packets.

Yep.
 
> I.e. MULTI_SEGS could be enabled on a device, and yet it would be acceptable to enable FAST_FREE on a queue on that device.

In theory yes... you probably can have one TX queue with FAST_FREE (no multi-seg packets) and another TX queue serving mulit-seg packets.
Again, probably some drivers can even support both offloads on the same TX queue, 
as long as conditions for the FAST_FREE offload are still satisfied: single mempool, refcnt==1, no indirect mbufs, etc. 
Though in practice, using MULTI_SEG usually implies usage all these mbuf features that are not-compatible with FAST_FREE.
BTW,  I see many of DPDK examples - do use both FAST_FREE and MULTI_SEG.
TBH - I don't understand how it works together, from my memories - for many cases it just shouldn't.
 
> 
> >
> > Thank you.
> 
> Thank you for reviewing.
> 
> >
> > >
> > > 	return 0;
> > > }
> > > @@ -309,7 +333,10 @@ eth_dev_info(struct rte_eth_dev *dev,
> > > 	dev_info->max_rx_queues = RTE_DIM(internals->rx_null_queues);
> > > 	dev_info->max_tx_queues = RTE_DIM(internals->tx_null_queues);
> > > 	dev_info->min_rx_bufsize = 0;
> > > -	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
> > RTE_ETH_TX_OFFLOAD_MT_LOCKFREE;
> > > +	dev_info->tx_queue_offload_capa =
> > RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
> > > +	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
> > > +			RTE_ETH_TX_OFFLOAD_MT_LOCKFREE |
> > > +			dev_info->tx_queue_offload_capa;
> > >
> > > 	dev_info->reta_size = internals->reta_size;
> > > 	dev_info->flow_type_rss_offloads = internals-
> > >flow_type_rss_offloads;
> > > --
> > > 2.43.0
> > >
> > >

^ permalink raw reply	[flat|nested] 12+ messages in thread

* RE: [PATCH v2] net/null: Add fast mbuf release TX offload
  2025-07-28 15:42       ` Konstantin Ananyev
@ 2025-07-28 16:42         ` Morten Brørup
  0 siblings, 0 replies; 12+ messages in thread
From: Morten Brørup @ 2025-07-28 16:42 UTC (permalink / raw)
  To: Konstantin Ananyev, Ivan Malov
  Cc: dev, Tetsuya Mukawa, Stephen Hemminger, Vipin Varghese, Thiyagarjan P

> From: Konstantin Ananyev [mailto:konstantin.ananyev@huawei.com]
> Sent: Monday, 28 July 2025 17.42
> 
> > > Hi Morten,
> > >
> > > Good patch. Please see below.
> > >
> > > On Sat, 26 Jul 2025, Morten Brørup wrote:
> > >
> > > > Added fast mbuf release, re-using the existing mbuf pool pointer
> > > > in the queue structure.
> > > >
> > > > Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> > > > ---
> > > > v2:
> > > > * Also announce the offload as a per-queue capability.
> > > > * Added missing test of per-device offload configuration when
> > > configuring
> > > >  the queue.
> > > > ---
> > > > drivers/net/null/rte_eth_null.c | 33 ++++++++++++++++++++++++++++++---
> > > > 1 file changed, 30 insertions(+), 3 deletions(-)
> > > >
> > > > diff --git a/drivers/net/null/rte_eth_null.c
> > > b/drivers/net/null/rte_eth_null.c
> > > > index 8a9b74a03b..09cfc74494 100644
> > > > --- a/drivers/net/null/rte_eth_null.c
> > > > +++ b/drivers/net/null/rte_eth_null.c
> > > > @@ -34,6 +34,17 @@ struct pmd_internals;
> > > > struct null_queue {
> > > > 	struct pmd_internals *internals;
> > > >
> > > > +	/**
> > > > +	 * For RX queue:
> > > > +	 *  Mempool to allocate mbufs from.
> > > > +	 *
> > > > +	 * For TX queue:
> > >
> > > Perhaps spell it 'Rx', 'Tx', but this is up to you.
> >
> > I just checked, and it seems all three spellings "rx", "Rx" and "RX" are
> being used in DPDK.
> > I personally prefer RX, so I'll keep that.
> >
> > >
> > > > +	 *  Mempool to free mbufs to, if fast release of mbufs is
> enabled.
> > > > +	 *  UINTPTR_MAX if the mempool for fast release of mbufs has not
> > > yet been detected.
> > > > +	 *  NULL if fast release of mbufs is not enabled.
> > > > +	 *
> > > > +	 *  @see RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE
> > > > +	 */
> > >
> > > May be it would be better to have a separate 'tx_pkt_burst' callback, to
> > > avoid
> > > conditional checks below. Though, I understand this will downgrade the
> > > per-queue
> > > capability to the per-port only, so feel free to disregard this point.
> >
> > I considered this, and I can imagine an application using FAST_FREE for its
> primary queues, and normal free for some secondary
> > queues.
> > Looking at other drivers - which have implemented a runtime check, not
> separate callbacks for FAST_FREE - I guess they came to the
> > same conclusion.
> >
> > >
> > > > 	struct rte_mempool *mb_pool;
> > > > 	void *dummy_packet;
> > > >
> > > > @@ -151,7 +162,16 @@ eth_null_tx(void *q, struct rte_mbuf **bufs,
> > > uint16_t nb_bufs)
> > > > 	for (i = 0; i < nb_bufs; i++)
> > > > 		bytes += rte_pktmbuf_pkt_len(bufs[i]);
> > > >
> > > > -	rte_pktmbuf_free_bulk(bufs, nb_bufs);
> > > > +	if (h->mb_pool != NULL) { /* RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE */
> > > > +		if (unlikely(h->mb_pool == (void *)UINTPTR_MAX)) {
> > > > +			if (unlikely(nb_bufs == 0))
> > > > +				return 0; /* Do not dereference uninitialized
> > > bufs[0]. */
> > > > +			h->mb_pool = bufs[0]->pool;
> > > > +		}
> > > > +		rte_mbuf_raw_free_bulk(h->mb_pool, bufs, nb_bufs);
> > > > +	} else {
> > > > +		rte_pktmbuf_free_bulk(bufs, nb_bufs);
> > > > +	}
> > > > 	rte_atomic_fetch_add_explicit(&h->tx_pkts, nb_bufs,
> > > rte_memory_order_relaxed);
> > > > 	rte_atomic_fetch_add_explicit(&h->tx_bytes, bytes,
> > > rte_memory_order_relaxed);
> > > >
> > > > @@ -259,7 +279,7 @@ static int
> > > > eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
> > > > 		uint16_t nb_tx_desc __rte_unused,
> > > > 		unsigned int socket_id __rte_unused,
> > > > -		const struct rte_eth_txconf *tx_conf __rte_unused)
> > > > +		const struct rte_eth_txconf *tx_conf)
> > > > {
> > > > 	struct rte_mbuf *dummy_packet;
> > > > 	struct pmd_internals *internals;
> > > > @@ -284,6 +304,10 @@ eth_tx_queue_setup(struct rte_eth_dev *dev,
> > > uint16_t tx_queue_id,
> > > >
> > > > 	internals->tx_null_queues[tx_queue_id].internals = internals;
> > > > 	internals->tx_null_queues[tx_queue_id].dummy_packet =
> > > dummy_packet;
> > > > +	internals->tx_null_queues[tx_queue_id].mb_pool =
> > > > +			(dev->data->dev_conf.txmode.offloads | tx_conf-
> > > >offloads) &
> > > > +			RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE ?
> > > > +			(void *)UINTPTR_MAX : NULL;
> > >
> > > Given the fact that FAST_FREE and MULTI_SEGS are effectively
> > > conflicting,
> > > wouldn't it be better to have a check for the presence of both flags
> > > here? I'm
> > > not sure whether this is already checked at the generic layer above the
> > > PMD.
> >
> > Interesting thought - got me looking deeper into this.
> >
> > It seems MULTI_SEGS is primarily a capability flag.
> > The description of the MULTI_SEGS flag [1] could be interpreted that way
> too:
> > /** Device supports multi segment send. */
> >
> > [1]:
> https://elixir.bootlin.com/dpdk/v25.07/source/lib/ethdev/rte_ethdev.h#L1614
> 
> In fact, I believe it serves both purposes: report capabilities and request
> for offloads to enable.
> Few example, I believe request this offload:
> https://elixir.bootlin.com/dpdk/v25.07/source/examples/ip_fragmentation/main.c
> #L156
> https://elixir.bootlin.com/dpdk/v25.07/source/examples/ipsec-secgw/ipsec-
> secgw.c#L1985
> https://elixir.bootlin.com/dpdk/v25.07/source/examples/ip_reassembly/main.c#L1
> 77

Another flag with unclear description. :-(

> 
> >
> > E.g. the i40e driver offers MULTI_SEGS capability per-device, but not per-
> queue. And it doesn't use the MULTI_SEGS flag for any
> > purpose (beyond capability reporting).
> >
> > Furthermore, enabling MULTI_SEGS on TX (per device or per queue) wouldn't
> mean that all transmitted packets are segmented; it
> > only means that the driver must be able to handle segmented packets.
> 
> Yep.
> 
> > I.e. MULTI_SEGS could be enabled on a device, and yet it would be acceptable
> to enable FAST_FREE on a queue on that device.
> 
> In theory yes... you probably can have one TX queue with FAST_FREE (no multi-
> seg packets) and another TX queue serving mulit-seg packets.
> Again, probably some drivers can even support both offloads on the same TX
> queue,
> as long as conditions for the FAST_FREE offload are still satisfied: single
> mempool, refcnt==1, no indirect mbufs, etc.
> Though in practice, using MULTI_SEG usually implies usage all these mbuf
> features that are not-compatible with FAST_FREE.
> BTW,  I see many of DPDK examples - do use both FAST_FREE and MULTI_SEG.
> TBH - I don't understand how it works together, from my memories - for many
> cases it just shouldn't.

Agree: Using FAST_FREE and MULTI_SEG together shouldn't work.
But if a driver (e.g. i40e) doesn't support configuring MULTI_SEG per queue, only per device, it would be impossible to configure FAST_FREE for one queue and MULTI_SEG for another queue.
Cleaning up this mess would break applications that assume MULTI_SEG is for capability only, and thus don't set it when configuring the device or queue. And applications that configure MULTI_SEG on a device and FAST_FREE on a queue.

Slightly related: I suspect that FAST_FREE might not be implemented 100 % correctly in all drivers, so I submitted a patch [2] to verify that FAST_FREE'ed mbufs conform to the required state of mbufs held in the mbuf pool. (No specific drivers in mind, just a weak hunch.)
Any drivers implementing FAST_FREE should use rte_mbuf_raw_free_bulk() instead of rte_mempool_put_bulk() to benefit from this patch, especially when conformance testing the driver.

[2]: https://inbox.dpdk.org/dev/20250722093431.555214-1-mb@smartsharesystems.com/

> 
> >
> > >
> > > Thank you.
> >
> > Thank you for reviewing.
> >
> > >
> > > >
> > > > 	return 0;
> > > > }
> > > > @@ -309,7 +333,10 @@ eth_dev_info(struct rte_eth_dev *dev,
> > > > 	dev_info->max_rx_queues = RTE_DIM(internals->rx_null_queues);
> > > > 	dev_info->max_tx_queues = RTE_DIM(internals->tx_null_queues);
> > > > 	dev_info->min_rx_bufsize = 0;
> > > > -	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
> > > RTE_ETH_TX_OFFLOAD_MT_LOCKFREE;
> > > > +	dev_info->tx_queue_offload_capa =
> > > RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
> > > > +	dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
> > > > +			RTE_ETH_TX_OFFLOAD_MT_LOCKFREE |
> > > > +			dev_info->tx_queue_offload_capa;
> > > >
> > > > 	dev_info->reta_size = internals->reta_size;
> > > > 	dev_info->flow_type_rss_offloads = internals-
> > > >flow_type_rss_offloads;
> > > > --
> > > > 2.43.0
> > > >
> > > >

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2025-07-28 16:42 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-06-24 18:14 [PATCH] net/null: Add fast mbuf release TX offload Morten Brørup
2025-06-26 14:05 ` Stephen Hemminger
2025-06-26 15:44   ` Morten Brørup
2025-06-27 12:07     ` Varghese, Vipin
2025-07-26  4:34       ` Morten Brørup
2025-07-28  8:22         ` Varghese, Vipin
2025-07-26  4:48 ` [PATCH v2] " Morten Brørup
2025-07-26  6:15   ` Ivan Malov
2025-07-28 13:27     ` Morten Brørup
2025-07-28 13:51       ` Ivan Malov
2025-07-28 15:42       ` Konstantin Ananyev
2025-07-28 16:42         ` Morten Brørup

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).