DPDK patches and discussions
 help / color / mirror / Atom feed
From: Bruce Richardson <bruce.richardson@intel.com>
To: dev@dpdk.org
Cc: Bruce Richardson <bruce.richardson@intel.com>,
	Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>,
	Ian Stokes <ian.stokes@intel.com>,
	Vladimir Medvedkin <vladimir.medvedkin@intel.com>,
	Anatoly Burakov <anatoly.burakov@intel.com>
Subject: [RFC PATCH 13/21] net/iavf: use common Tx free fn for AVX-512
Date: Fri, 22 Nov 2024 12:54:06 +0000	[thread overview]
Message-ID: <20241122125418.2857301-14-bruce.richardson@intel.com> (raw)
In-Reply-To: <20241122125418.2857301-1-bruce.richardson@intel.com>

Switch the iavf driver to use the common Tx free function. This requires
one additional parameter to that function, since iavf sometimes uses
context descriptors which means that we have double the descriptors per
SW ring slot.

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
 .../common/intel_eth/ieth_rxtx_vec_common.h   |   6 +-
 drivers/net/i40e/i40e_rxtx_vec_avx512.c       |   2 +-
 drivers/net/iavf/iavf_rxtx_vec_avx512.c       | 119 +-----------------
 drivers/net/ice/ice_rxtx_vec_avx512.c         |   2 +-
 4 files changed, 7 insertions(+), 122 deletions(-)

diff --git a/drivers/common/intel_eth/ieth_rxtx_vec_common.h b/drivers/common/intel_eth/ieth_rxtx_vec_common.h
index 61b48c88da..a4490f2dca 100644
--- a/drivers/common/intel_eth/ieth_rxtx_vec_common.h
+++ b/drivers/common/intel_eth/ieth_rxtx_vec_common.h
@@ -158,7 +158,7 @@ ieth_tx_free_bufs(struct ieth_tx_queue *txq, ieth_desc_done_fn desc_done)
 }
 
 static __rte_always_inline int
-ieth_tx_free_bufs_vector(struct ieth_tx_queue *txq, ieth_desc_done_fn desc_done)
+ieth_tx_free_bufs_vector(struct ieth_tx_queue *txq, ieth_desc_done_fn desc_done, bool ctx_descs)
 {
 	int nb_free = 0;
 	struct rte_mbuf *free[IETH_VPMD_TX_MAX_FREE_BUF];
@@ -168,13 +168,13 @@ ieth_tx_free_bufs_vector(struct ieth_tx_queue *txq, ieth_desc_done_fn desc_done)
 	if (!desc_done(txq, txq->tx_next_dd))
 		return 0;
 
-	const uint32_t n = txq->tx_rs_thresh;
+	const uint32_t n = txq->tx_rs_thresh >> ctx_descs;
 
 	/* first buffer to free from S/W ring is at index
 	 * tx_next_dd - (tx_rs_thresh - 1)
 	 */
 	struct ieth_vec_tx_entry *txep = txq->sw_ring_v;
-	txep += txq->tx_next_dd - (n - 1);
+	txep += (txq->tx_next_dd >> ctx_descs) - (n - 1);
 
 	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE && (n & 31) == 0) {
 		struct rte_mempool *mp = txep[0].mbuf->pool;
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
index 23415c4949..0ab3a4f02c 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
@@ -829,7 +829,7 @@ i40e_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD;
 
 	if (txq->nb_tx_free < txq->tx_free_thresh)
-		ieth_tx_free_bufs_vector(txq, i40e_tx_desc_done);
+		ieth_tx_free_bufs_vector(txq, i40e_tx_desc_done, false);
 
 	nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
 	if (unlikely(nb_pkts == 0))
diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx512.c b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
index c774c0c365..391fbfcd4d 100644
--- a/drivers/net/iavf/iavf_rxtx_vec_avx512.c
+++ b/drivers/net/iavf/iavf_rxtx_vec_avx512.c
@@ -1844,121 +1844,6 @@ iavf_recv_scattered_pkts_vec_avx512_flex_rxd_offload(void *rx_queue,
 								true);
 }
 
-static __rte_always_inline int
-iavf_tx_free_bufs_avx512(struct ieth_tx_queue *txq)
-{
-	struct ieth_vec_tx_entry *txep;
-	uint32_t n;
-	uint32_t i;
-	int nb_free = 0;
-	struct rte_mbuf *m, *free[IAVF_VPMD_TX_MAX_FREE_BUF];
-
-	/* check DD bits on threshold descriptor */
-	if ((txq->iavf_tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
-	     rte_cpu_to_le_64(IAVF_TXD_QW1_DTYPE_MASK)) !=
-	    rte_cpu_to_le_64(IAVF_TX_DESC_DTYPE_DESC_DONE))
-		return 0;
-
-	n = txq->tx_rs_thresh >> txq->use_ctx;
-
-	 /* first buffer to free from S/W ring is at index
-	  * tx_next_dd - (tx_rs_thresh-1)
-	  */
-	txep = (void *)txq->sw_ring;
-	txep += (txq->tx_next_dd >> txq->use_ctx) - (n - 1);
-
-	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE && (n & 31) == 0) {
-		struct rte_mempool *mp = txep[0].mbuf->pool;
-		struct rte_mempool_cache *cache = rte_mempool_default_cache(mp,
-								rte_lcore_id());
-		void **cache_objs;
-
-		if (!cache || cache->len == 0)
-			goto normal;
-
-		cache_objs = &cache->objs[cache->len];
-
-		if (n > RTE_MEMPOOL_CACHE_MAX_SIZE) {
-			rte_mempool_ops_enqueue_bulk(mp, (void *)txep, n);
-			goto done;
-		}
-
-		/* The cache follows the following algorithm
-		 *   1. Add the objects to the cache
-		 *   2. Anything greater than the cache min value (if it crosses the
-		 *   cache flush threshold) is flushed to the ring.
-		 */
-		/* Add elements back into the cache */
-		uint32_t copied = 0;
-		/* n is multiple of 32 */
-		while (copied < n) {
-#ifdef RTE_ARCH_64
-			const __m512i a = _mm512_loadu_si512(&txep[copied]);
-			const __m512i b = _mm512_loadu_si512(&txep[copied + 8]);
-			const __m512i c = _mm512_loadu_si512(&txep[copied + 16]);
-			const __m512i d = _mm512_loadu_si512(&txep[copied + 24]);
-
-			_mm512_storeu_si512(&cache_objs[copied], a);
-			_mm512_storeu_si512(&cache_objs[copied + 8], b);
-			_mm512_storeu_si512(&cache_objs[copied + 16], c);
-			_mm512_storeu_si512(&cache_objs[copied + 24], d);
-#else
-			const __m512i a = _mm512_loadu_si512(&txep[copied]);
-			const __m512i b = _mm512_loadu_si512(&txep[copied + 16]);
-			_mm512_storeu_si512(&cache_objs[copied], a);
-			_mm512_storeu_si512(&cache_objs[copied + 16], b);
-#endif
-			copied += 32;
-		}
-		cache->len += n;
-
-		if (cache->len >= cache->flushthresh) {
-			rte_mempool_ops_enqueue_bulk(mp,
-						     &cache->objs[cache->size],
-						     cache->len - cache->size);
-			cache->len = cache->size;
-		}
-		goto done;
-	}
-
-normal:
-	m = rte_pktmbuf_prefree_seg(txep[0].mbuf);
-	if (likely(m)) {
-		free[0] = m;
-		nb_free = 1;
-		for (i = 1; i < n; i++) {
-			m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
-			if (likely(m)) {
-				if (likely(m->pool == free[0]->pool)) {
-					free[nb_free++] = m;
-				} else {
-					rte_mempool_put_bulk(free[0]->pool,
-							     (void *)free,
-							     nb_free);
-					free[0] = m;
-					nb_free = 1;
-				}
-			}
-		}
-		rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
-	} else {
-		for (i = 1; i < n; i++) {
-			m = rte_pktmbuf_prefree_seg(txep[i].mbuf);
-			if (m)
-				rte_mempool_put(m->pool, m);
-		}
-	}
-
-done:
-	/* buffers were freed, update counters */
-	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
-	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
-	if (txq->tx_next_dd >= txq->nb_tx_desc)
-		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
-
-	return txq->tx_rs_thresh;
-}
-
 static __rte_always_inline void
 tx_backlog_entry_avx512(struct ieth_vec_tx_entry *txep,
 			struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
@@ -2320,7 +2205,7 @@ iavf_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint64_t rs = IAVF_TX_DESC_CMD_RS | flags;
 
 	if (txq->nb_tx_free < txq->tx_free_thresh)
-		iavf_tx_free_bufs_avx512(txq);
+		ieth_tx_free_bufs_vector(txq, iavf_tx_desc_done, false);
 
 	nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
 	if (unlikely(nb_pkts == 0))
@@ -2387,7 +2272,7 @@ iavf_xmit_fixed_burst_vec_avx512_ctx(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint64_t rs = IAVF_TX_DESC_CMD_RS | flags;
 
 	if (txq->nb_tx_free < txq->tx_free_thresh)
-		iavf_tx_free_bufs_avx512(txq);
+		ieth_tx_free_bufs_vector(txq, iavf_tx_desc_done, true);
 
 	nb_commit = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts << 1);
 	nb_commit &= 0xFFFE;
diff --git a/drivers/net/ice/ice_rxtx_vec_avx512.c b/drivers/net/ice/ice_rxtx_vec_avx512.c
index fc8f9ad34a..c3cbd601b3 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx512.c
@@ -949,7 +949,7 @@ ice_xmit_fixed_burst_vec_avx512(void *tx_queue, struct rte_mbuf **tx_pkts,
 	nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
 
 	if (txq->nb_tx_free < txq->tx_free_thresh)
-		ieth_tx_free_bufs_vector(txq, ice_tx_desc_done);
+		ieth_tx_free_bufs_vector(txq, ice_tx_desc_done, false);
 
 	nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
 	if (unlikely(nb_pkts == 0))
-- 
2.43.0


  parent reply	other threads:[~2024-11-22 12:56 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-11-22 12:53 [RFC PATCH 00/21] Reduce code duplication across Intel NIC drivers Bruce Richardson
2024-11-22 12:53 ` [RFC PATCH 01/21] common/intel_eth: add pkt reassembly fn for intel drivers Bruce Richardson
2024-11-22 12:53 ` [RFC PATCH 02/21] common/intel_eth: provide common Tx entry structures Bruce Richardson
2024-11-22 12:53 ` [RFC PATCH 03/21] common/intel_eth: add Tx mbuf ring replenish fn Bruce Richardson
2024-11-22 12:53 ` [RFC PATCH 04/21] drivers/net: align Tx queue struct field names Bruce Richardson
2024-11-22 12:53 ` [RFC PATCH 05/21] drivers/net: add prefix for driver-specific structs Bruce Richardson
2024-11-22 12:53 ` [RFC PATCH 06/21] common/intel_eth: merge ice and i40e Tx queue struct Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 07/21] net/iavf: use common Tx queue structure Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 08/21] net/ixgbe: convert Tx queue context cache field to ptr Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 09/21] net/ixgbe: use common Tx queue structure Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 10/21] common/intel_eth: pack " Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 11/21] common/intel_eth: add post-Tx buffer free function Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 12/21] common/intel_eth: add Tx buffer free fn for AVX-512 Bruce Richardson
2024-11-22 12:54 ` Bruce Richardson [this message]
2024-11-22 12:54 ` [RFC PATCH 14/21] net/ice: move Tx queue mbuf cleanup fn to common Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 15/21] net/i40e: use common Tx queue mbuf cleanup fn Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 16/21] net/ixgbe: " Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 17/21] net/iavf: " Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 18/21] net/ice: use vector SW ring for all vector paths Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 19/21] net/i40e: " Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 20/21] net/iavf: " Bruce Richardson
2024-11-22 12:54 ` [RFC PATCH 21/21] net/ixgbe: use common Tx backlog entry fn Bruce Richardson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241122125418.2857301-14-bruce.richardson@intel.com \
    --to=bruce.richardson@intel.com \
    --cc=anatoly.burakov@intel.com \
    --cc=dev@dpdk.org \
    --cc=ian.stokes@intel.com \
    --cc=konstantin.v.ananyev@yandex.ru \
    --cc=vladimir.medvedkin@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).