[RFCv2 1/2] net/i40e: replace get and put functions

DPDK patches and discussions
 help / color / mirror / Atom feed

From: Kamalakshitha Aligeri <kamalakshitha.aligeri@arm.com>
To: bruce.richardson@intel.com, mb@smartsharesystems.com,
	konstantin.ananyev@huawei.com, olivier.matz@6wind.com,
	andrew.rybchenko@oktetlabs.ru, Honnappa.Nagarahalli@arm.com
Cc: dev@dpdk.org, nd@arm.com,
	Kamalakshitha Aligeri <kamalakshitha.aligeri@arm.com>
Subject: [RFCv2 1/2] net/i40e: replace get and put functions
Date: Tue, 24 Jan 2023 17:49:35 +0000	[thread overview]
Message-ID: <20230124174936.190800-1-kamalakshitha.aligeri@arm.com> (raw)

Integrated zero-copy get and put API's in mempool cache in i40e PMD

Signed-off-by: Kamalakshitha Aligeri <kamalakshitha.aligeri@arm.com>
---
Link: https://patchwork.dpdk.org/project/dpdk/patch/20221227151700.80887-1-mb@smartsharesystems.com/
1. Added support for mempools without cache (Morten Brorup)

 .mailmap                                |  1 +
 drivers/net/i40e/i40e_rxtx_vec_avx512.c | 35 ++++++++-----------------
 drivers/net/i40e/i40e_rxtx_vec_common.h | 23 ++++++++++------
 drivers/net/i40e/i40e_rxtx_vec_neon.c   | 35 ++++++++++++++++---------
 4 files changed, 49 insertions(+), 45 deletions(-)

diff --git a/.mailmap b/.mailmap
index 75884b6fe2..05a42edbcf 100644
--- a/.mailmap
+++ b/.mailmap
@@ -670,6 +670,7 @@ Kai Ji <kai.ji@intel.com>
 Kaiwen Deng <kaiwenx.deng@intel.com>
 Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
 Kamalakannan R <kamalakannan.r@intel.com>
+Kamalakshitha Aligeri <kamalakshitha.aligeri@arm.com>
 Kamil Bednarczyk <kamil.bednarczyk@intel.com>
 Kamil Chalupnik <kamilx.chalupnik@intel.com>
 Kamil Rytarowski <kamil.rytarowski@caviumnetworks.com>
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
index 60c97d5331..a4fba4ddc9 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
@@ -35,6 +35,9 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 
 	if (unlikely(!cache))
 		return i40e_rxq_rearm_common(rxq, true);
+	void **cache_objs;
+
+	cache_objs = rte_mempool_cache_zc_get_bulk(cache, rxq->mp, RTE_I40E_RXQ_REARM_THRESH);
 
 	/* We need to pull 'n' more MBUFs into the software ring from mempool
 	 * We inline the mempool function here, so we can vectorize the copy
@@ -45,15 +48,12 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 		/* No. Backfill the cache first, and then fill from it */
 		uint32_t req = RTE_I40E_RXQ_REARM_THRESH + (cache->size -
 				cache->len);
-
 		/* How many do we require
 		 * i.e. number to fill the cache + the request
 		 */
 		int ret = rte_mempool_ops_dequeue_bulk(rxq->mp,
-				&cache->objs[cache->len], req);
-		if (ret == 0) {
-			cache->len += req;
-		} else {
+				cache_objs[cache->len], req);
+		if (ret != 0) {
 			if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
 					rxq->nb_rx_desc) {
 				__m128i dma_addr0;
@@ -63,11 +63,11 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 					rxep[i].mbuf = &rxq->fake_mbuf;
 					_mm_store_si128
 						((__m128i *)&rxdp[i].read,
-							dma_addr0);
+						 dma_addr0);
 				}
 			}
 			rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-					RTE_I40E_RXQ_REARM_THRESH;
+				RTE_I40E_RXQ_REARM_THRESH;
 			return;
 		}
 	}
@@ -90,7 +90,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 	 */
 	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH / 8; i++) {
 		const __m512i mbuf_ptrs = _mm512_loadu_si512
-			(&cache->objs[cache->len - 8]);
+			(cache_objs[cache->len - 8]);
 		_mm512_store_si512(rxep, mbuf_ptrs);
 
 		/* gather iova of mbuf0-7 into one zmm reg */
@@ -906,21 +906,16 @@ i40e_tx_free_bufs_avx512(struct i40e_tx_queue *txq)
 		struct rte_mempool_cache *cache = rte_mempool_default_cache(mp,
 				rte_lcore_id());
 
-		if (!cache || cache->len == 0)
+		if (!cache)
 			goto normal;
 
-		cache_objs = &cache->objs[cache->len];
+		cache_objs = rte_mempool_cache_zc_put_bulk(cache, mp, n);
 
-		if (n > RTE_MEMPOOL_CACHE_MAX_SIZE) {
+		if (!cache_objs) {
 			rte_mempool_ops_enqueue_bulk(mp, (void *)txep, n);
 			goto done;
 		}
 
-		/* The cache follows the following algorithm
-		 *   1. Add the objects to the cache
-		 *   2. Anything greater than the cache min value (if it
-		 *   crosses the cache flush threshold) is flushed to the ring.
-		 */
 		/* Add elements back into the cache */
 		uint32_t copied = 0;
 		/* n is multiple of 32 */
@@ -936,14 +931,6 @@ i40e_tx_free_bufs_avx512(struct i40e_tx_queue *txq)
 			_mm512_storeu_si512(&cache_objs[copied + 24], d);
 			copied += 32;
 		}
-		cache->len += n;
-
-		if (cache->len >= cache->flushthresh) {
-			rte_mempool_ops_enqueue_bulk
-				(mp, &cache->objs[cache->size],
-				cache->len - cache->size);
-			cache->len = cache->size;
-		}
 		goto done;
 	}
 
diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h b/drivers/net/i40e/i40e_rxtx_vec_common.h
index fe1a6ec75e..70e11a2ef2 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
@@ -95,17 +95,24 @@ i40e_tx_free_bufs(struct i40e_tx_queue *txq)
 
 	n = txq->tx_rs_thresh;
 
-	 /* first buffer to free from S/W ring is at index
-	  * tx_next_dd - (tx_rs_thresh-1)
-	  */
+	/* first buffer to free from S/W ring is at index
+	 * tx_next_dd - (tx_rs_thresh-1)
+	 */
 	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
 
 	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
-		for (i = 0; i < n; i++) {
-			free[i] = txep[i].mbuf;
-			/* no need to reset txep[i].mbuf in vector path */
+		struct rte_mempool *mp = txep[0].mbuf->pool;
+		struct rte_mempool_cache *cache = rte_mempool_default_cache(mp, rte_lcore_id());
+		void **cache_objs;
+
+		cache_objs = rte_mempool_cache_zc_put_bulk(cache, mp, n);
+		if (cache_objs) {
+			for (i = 0; i < n; i++) {
+				cache_objs[i] = txep->mbuf;
+				/* no need to reset txep[i].mbuf in vector path */
+				txep++;
+			}
 		}
-		rte_mempool_put_bulk(free[0]->pool, (void **)free, n);
 		goto done;
 	}
 
@@ -121,7 +128,7 @@ i40e_tx_free_bufs(struct i40e_tx_queue *txq)
 				} else {
 					rte_mempool_put_bulk(free[0]->pool,
 							     (void *)free,
-							     nb_free);
+							      nb_free);
 					free[0] = m;
 					nb_free = 1;
 				}
diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c b/drivers/net/i40e/i40e_rxtx_vec_neon.c
index 12e6f1cbcb..5ffc462a47 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c
@@ -30,23 +30,32 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 	uint64_t paddr;
 
 	rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
-	/* Pull 'n' more MBUFs into the software ring */
-	if (unlikely(rte_mempool_get_bulk(rxq->mp,
-					  (void *)rxep,
-					  RTE_I40E_RXQ_REARM_THRESH) < 0)) {
-		if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
-		    rxq->nb_rx_desc) {
-			for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
-				rxep[i].mbuf = &rxq->fake_mbuf;
-				vst1q_u64((uint64_t *)&rxdp[i].read, zero);
+	struct rte_mempool_cache *cache = rte_mempool_default_cache(rxq->mp, rte_lcore_id());
+
+	/*When no cache provided, get the objects directly from backend */
+	if (!cache) {
+		int ret = rte_mempool_ops_dequeue_bulk(rxq->mp, (void *)rxep,
+						       RTE_I40E_RXQ_REARM_THRESH);
+		/* Pull 'n' more MBUFs into the software ring */
+		if (ret != 0) {
+			if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
+					rxq->nb_rx_desc) {
+				for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+					rxep[i].mbuf = &rxq->fake_mbuf;
+					vst1q_u64((uint64_t *)&rxdp[i].read, zero);
+				}
 			}
+			rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+				RTE_I40E_RXQ_REARM_THRESH;
+			return;
 		}
-		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
-			RTE_I40E_RXQ_REARM_THRESH;
-		return;
 	}
 
+	void **cache_objs;
+
+	cache_objs = rte_mempool_cache_zc_get_bulk(cache, rxq->mp, RTE_I40E_RXQ_REARM_THRESH);
+	rte_memcpy(rxep, cache_objs, RTE_I40E_RXQ_REARM_THRESH * sizeof(void *));
+
 	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
 	for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
 		mb0 = rxep[0].mbuf;
-- 
2.25.1

next             reply	other threads:[~2023-01-24 17:50 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-01-24 17:49 Kamalakshitha Aligeri [this message]
2023-01-24 17:49 ` [RFCv2 2/2] test/mempool: add zero-copy API's Kamalakshitha Aligeri

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230124174936.190800-1-kamalakshitha.aligeri@arm.com \
    --to=kamalakshitha.aligeri@arm.com \
    --cc=Honnappa.Nagarahalli@arm.com \
    --cc=andrew.rybchenko@oktetlabs.ru \
    --cc=bruce.richardson@intel.com \
    --cc=dev@dpdk.org \
    --cc=konstantin.ananyev@huawei.com \
    --cc=mb@smartsharesystems.com \
    --cc=nd@arm.com \
    --cc=olivier.matz@6wind.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).