* [RFCv2 1/2] net/i40e: replace get and put functions
@ 2023-01-24 17:49 Kamalakshitha Aligeri
2023-01-24 17:49 ` [RFCv2 2/2] test/mempool: add zero-copy API's Kamalakshitha Aligeri
0 siblings, 1 reply; 2+ messages in thread
From: Kamalakshitha Aligeri @ 2023-01-24 17:49 UTC (permalink / raw)
To: bruce.richardson, mb, konstantin.ananyev, olivier.matz,
andrew.rybchenko, Honnappa.Nagarahalli
Cc: dev, nd, Kamalakshitha Aligeri
Integrated zero-copy get and put API's in mempool cache in i40e PMD
Signed-off-by: Kamalakshitha Aligeri <kamalakshitha.aligeri@arm.com>
---
Link: https://patchwork.dpdk.org/project/dpdk/patch/20221227151700.80887-1-mb@smartsharesystems.com/
1. Added support for mempools without cache (Morten Brorup)
.mailmap | 1 +
drivers/net/i40e/i40e_rxtx_vec_avx512.c | 35 ++++++++-----------------
drivers/net/i40e/i40e_rxtx_vec_common.h | 23 ++++++++++------
drivers/net/i40e/i40e_rxtx_vec_neon.c | 35 ++++++++++++++++---------
4 files changed, 49 insertions(+), 45 deletions(-)
diff --git a/.mailmap b/.mailmap
index 75884b6fe2..05a42edbcf 100644
--- a/.mailmap
+++ b/.mailmap
@@ -670,6 +670,7 @@ Kai Ji <kai.ji@intel.com>
Kaiwen Deng <kaiwenx.deng@intel.com>
Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Kamalakannan R <kamalakannan.r@intel.com>
+Kamalakshitha Aligeri <kamalakshitha.aligeri@arm.com>
Kamil Bednarczyk <kamil.bednarczyk@intel.com>
Kamil Chalupnik <kamilx.chalupnik@intel.com>
Kamil Rytarowski <kamil.rytarowski@caviumnetworks.com>
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx512.c b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
index 60c97d5331..a4fba4ddc9 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx512.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx512.c
@@ -35,6 +35,9 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
if (unlikely(!cache))
return i40e_rxq_rearm_common(rxq, true);
+ void **cache_objs;
+
+ cache_objs = rte_mempool_cache_zc_get_bulk(cache, rxq->mp, RTE_I40E_RXQ_REARM_THRESH);
/* We need to pull 'n' more MBUFs into the software ring from mempool
* We inline the mempool function here, so we can vectorize the copy
@@ -45,15 +48,12 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
/* No. Backfill the cache first, and then fill from it */
uint32_t req = RTE_I40E_RXQ_REARM_THRESH + (cache->size -
cache->len);
-
/* How many do we require
* i.e. number to fill the cache + the request
*/
int ret = rte_mempool_ops_dequeue_bulk(rxq->mp,
- &cache->objs[cache->len], req);
- if (ret == 0) {
- cache->len += req;
- } else {
+ cache_objs[cache->len], req);
+ if (ret != 0) {
if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
rxq->nb_rx_desc) {
__m128i dma_addr0;
@@ -63,11 +63,11 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
rxep[i].mbuf = &rxq->fake_mbuf;
_mm_store_si128
((__m128i *)&rxdp[i].read,
- dma_addr0);
+ dma_addr0);
}
}
rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
- RTE_I40E_RXQ_REARM_THRESH;
+ RTE_I40E_RXQ_REARM_THRESH;
return;
}
}
@@ -90,7 +90,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
*/
for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH / 8; i++) {
const __m512i mbuf_ptrs = _mm512_loadu_si512
- (&cache->objs[cache->len - 8]);
+ (cache_objs[cache->len - 8]);
_mm512_store_si512(rxep, mbuf_ptrs);
/* gather iova of mbuf0-7 into one zmm reg */
@@ -906,21 +906,16 @@ i40e_tx_free_bufs_avx512(struct i40e_tx_queue *txq)
struct rte_mempool_cache *cache = rte_mempool_default_cache(mp,
rte_lcore_id());
- if (!cache || cache->len == 0)
+ if (!cache)
goto normal;
- cache_objs = &cache->objs[cache->len];
+ cache_objs = rte_mempool_cache_zc_put_bulk(cache, mp, n);
- if (n > RTE_MEMPOOL_CACHE_MAX_SIZE) {
+ if (!cache_objs) {
rte_mempool_ops_enqueue_bulk(mp, (void *)txep, n);
goto done;
}
- /* The cache follows the following algorithm
- * 1. Add the objects to the cache
- * 2. Anything greater than the cache min value (if it
- * crosses the cache flush threshold) is flushed to the ring.
- */
/* Add elements back into the cache */
uint32_t copied = 0;
/* n is multiple of 32 */
@@ -936,14 +931,6 @@ i40e_tx_free_bufs_avx512(struct i40e_tx_queue *txq)
_mm512_storeu_si512(&cache_objs[copied + 24], d);
copied += 32;
}
- cache->len += n;
-
- if (cache->len >= cache->flushthresh) {
- rte_mempool_ops_enqueue_bulk
- (mp, &cache->objs[cache->size],
- cache->len - cache->size);
- cache->len = cache->size;
- }
goto done;
}
diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h b/drivers/net/i40e/i40e_rxtx_vec_common.h
index fe1a6ec75e..70e11a2ef2 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_common.h
+++ b/drivers/net/i40e/i40e_rxtx_vec_common.h
@@ -95,17 +95,24 @@ i40e_tx_free_bufs(struct i40e_tx_queue *txq)
n = txq->tx_rs_thresh;
- /* first buffer to free from S/W ring is at index
- * tx_next_dd - (tx_rs_thresh-1)
- */
+ /* first buffer to free from S/W ring is at index
+ * tx_next_dd - (tx_rs_thresh-1)
+ */
txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
- for (i = 0; i < n; i++) {
- free[i] = txep[i].mbuf;
- /* no need to reset txep[i].mbuf in vector path */
+ struct rte_mempool *mp = txep[0].mbuf->pool;
+ struct rte_mempool_cache *cache = rte_mempool_default_cache(mp, rte_lcore_id());
+ void **cache_objs;
+
+ cache_objs = rte_mempool_cache_zc_put_bulk(cache, mp, n);
+ if (cache_objs) {
+ for (i = 0; i < n; i++) {
+ cache_objs[i] = txep->mbuf;
+ /* no need to reset txep[i].mbuf in vector path */
+ txep++;
+ }
}
- rte_mempool_put_bulk(free[0]->pool, (void **)free, n);
goto done;
}
@@ -121,7 +128,7 @@ i40e_tx_free_bufs(struct i40e_tx_queue *txq)
} else {
rte_mempool_put_bulk(free[0]->pool,
(void *)free,
- nb_free);
+ nb_free);
free[0] = m;
nb_free = 1;
}
diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c b/drivers/net/i40e/i40e_rxtx_vec_neon.c
index 12e6f1cbcb..5ffc462a47 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_neon.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c
@@ -30,23 +30,32 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
uint64_t paddr;
rxdp = rxq->rx_ring + rxq->rxrearm_start;
-
- /* Pull 'n' more MBUFs into the software ring */
- if (unlikely(rte_mempool_get_bulk(rxq->mp,
- (void *)rxep,
- RTE_I40E_RXQ_REARM_THRESH) < 0)) {
- if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
- rxq->nb_rx_desc) {
- for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
- rxep[i].mbuf = &rxq->fake_mbuf;
- vst1q_u64((uint64_t *)&rxdp[i].read, zero);
+ struct rte_mempool_cache *cache = rte_mempool_default_cache(rxq->mp, rte_lcore_id());
+
+ /*When no cache provided, get the objects directly from backend */
+ if (!cache) {
+ int ret = rte_mempool_ops_dequeue_bulk(rxq->mp, (void *)rxep,
+ RTE_I40E_RXQ_REARM_THRESH);
+ /* Pull 'n' more MBUFs into the software ring */
+ if (ret != 0) {
+ if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >=
+ rxq->nb_rx_desc) {
+ for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) {
+ rxep[i].mbuf = &rxq->fake_mbuf;
+ vst1q_u64((uint64_t *)&rxdp[i].read, zero);
+ }
}
+ rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+ RTE_I40E_RXQ_REARM_THRESH;
+ return;
}
- rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
- RTE_I40E_RXQ_REARM_THRESH;
- return;
}
+ void **cache_objs;
+
+ cache_objs = rte_mempool_cache_zc_get_bulk(cache, rxq->mp, RTE_I40E_RXQ_REARM_THRESH);
+ rte_memcpy(rxep, cache_objs, RTE_I40E_RXQ_REARM_THRESH * sizeof(void *));
+
/* Initialize the mbufs in vector, process 2 mbufs in one loop */
for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) {
mb0 = rxep[0].mbuf;
--
2.25.1
^ permalink raw reply [flat|nested] 2+ messages in thread
* [RFCv2 2/2] test/mempool: add zero-copy API's
2023-01-24 17:49 [RFCv2 1/2] net/i40e: replace get and put functions Kamalakshitha Aligeri
@ 2023-01-24 17:49 ` Kamalakshitha Aligeri
0 siblings, 0 replies; 2+ messages in thread
From: Kamalakshitha Aligeri @ 2023-01-24 17:49 UTC (permalink / raw)
To: bruce.richardson, mb, konstantin.ananyev, olivier.matz,
andrew.rybchenko, Honnappa.Nagarahalli
Cc: dev, nd, Kamalakshitha Aligeri
Added mempool test cases with zero-copy get and put API's
Signed-off-by: Kamalakshitha Aligeri <kamalakshitha.aligeri@arm.com>
---
Link: https://patchwork.dpdk.org/project/dpdk/patch/20221227151700.80887-1-mb@smartsharesystems.com/
1. Added mempool test cases with zero-copy get and put API's
app/test/test_mempool.c | 124 +++++++++++++++++++++++++++++++++++++++-
1 file changed, 122 insertions(+), 2 deletions(-)
diff --git a/app/test/test_mempool.c b/app/test/test_mempool.c
index 8e493eda47..a635a514a7 100644
--- a/app/test/test_mempool.c
+++ b/app/test/test_mempool.c
@@ -72,6 +72,122 @@ my_obj_init(struct rte_mempool *mp, __rte_unused void *arg,
*objnum = i;
}
+/* basic tests with zero-copy API's (done on one core) */
+static int
+test_mempool_basic_zc_api(struct rte_mempool *mp, int use_external_cache)
+{
+ uint32_t *objnum;
+ void **objtable;
+ void *obj, *obj2;
+ char *obj_data;
+ int ret = 0;
+ unsigned int i, j;
+ int offset;
+ struct rte_mempool_cache *cache;
+ void **cache_objs;
+
+ if (use_external_cache) {
+ /* Create a user-owned mempool cache. */
+ cache = rte_mempool_cache_create(RTE_MEMPOOL_CACHE_MAX_SIZE,
+ SOCKET_ID_ANY);
+ if (cache == NULL)
+ RET_ERR();
+ } else {
+ /* May be NULL if cache is disabled. */
+ cache = rte_mempool_default_cache(mp, rte_lcore_id());
+ }
+
+ /* dump the mempool status */
+ rte_mempool_dump(stdout, mp);
+
+ printf("get an object\n");
+ cache_objs = rte_mempool_cache_zc_get_bulk(cache, mp, 1);
+ obj = *cache_objs;
+ rte_mempool_dump(stdout, mp);
+
+ /* tests that improve coverage */
+ printf("get object count\n");
+ /* We have to count the extra caches, one in this case. */
+ offset = use_external_cache ? 1 * cache->len : 0;
+ if (rte_mempool_avail_count(mp) + offset != MEMPOOL_SIZE - 1)
+ GOTO_ERR(ret, out);
+
+ printf("get private data\n");
+ if (rte_mempool_get_priv(mp) != (char *)mp +
+ RTE_MEMPOOL_HEADER_SIZE(mp, mp->cache_size))
+ GOTO_ERR(ret, out);
+
+#ifndef RTE_EXEC_ENV_FREEBSD /* rte_mem_virt2iova() not supported on bsd */
+ printf("get physical address of an object\n");
+ if (rte_mempool_virt2iova(obj) != rte_mem_virt2iova(obj))
+ GOTO_ERR(ret, out);
+#endif
+
+ printf("put the object back\n");
+ cache_objs = rte_mempool_cache_zc_put_bulk(cache, mp, 1);
+ rte_memcpy(cache_objs, &obj, sizeof(void *));
+ rte_mempool_dump(stdout, mp);
+
+ printf("get 2 objects\n");
+ cache_objs = rte_mempool_cache_zc_get_bulk(cache, mp, 1);
+ obj = *cache_objs;
+ cache_objs = rte_mempool_cache_zc_get_bulk(cache, mp, 1);
+ obj2 = *cache_objs;
+ rte_mempool_dump(stdout, mp);
+
+ printf("put the objects back\n");
+ cache_objs = rte_mempool_cache_zc_put_bulk(cache, mp, 1);
+ rte_memcpy(cache_objs, &obj, sizeof(void *));
+ cache_objs = rte_mempool_cache_zc_put_bulk(cache, mp, 1);
+ rte_memcpy(cache_objs, &obj2, sizeof(void *));
+ rte_mempool_dump(stdout, mp);
+
+ /*
+ * get many objects: we cannot get them all because the cache
+ * on other cores may not be empty.
+ */
+ objtable = malloc(MEMPOOL_SIZE * sizeof(void *));
+ if (objtable == NULL)
+ GOTO_ERR(ret, out);
+
+ for (i = 0; i < MEMPOOL_SIZE; i++) {
+ cache_objs = rte_mempool_cache_zc_get_bulk(cache, mp, 1);
+ objtable[i] = *cache_objs;
+ }
+
+ /*
+ * for each object, check that its content was not modified,
+ * and put objects back in pool
+ */
+ while (i--) {
+ obj = objtable[i];
+ obj_data = obj;
+ objnum = obj;
+ if (*objnum > MEMPOOL_SIZE) {
+ printf("bad object number(%d)\n", *objnum);
+ ret = -1;
+ break;
+ }
+ for (j = sizeof(*objnum); j < mp->elt_size; j++) {
+ if (obj_data[j] != 0)
+ ret = -1;
+ }
+ cache_objs = rte_mempool_cache_zc_put_bulk(cache, mp, 1);
+ rte_memcpy(cache_objs, &objtable[i], sizeof(void *));
+ }
+
+ free(objtable);
+ if (ret == -1)
+ printf("objects were modified!\n");
+
+out:
+ if (use_external_cache) {
+ rte_mempool_cache_flush(cache, mp);
+ rte_mempool_cache_free(cache);
+ }
+
+ return ret;
+}
/* basic tests (done on one core) */
static int
test_mempool_basic(struct rte_mempool *mp, int use_external_cache)
@@ -982,8 +1098,12 @@ test_mempool(void)
if (test_mempool_basic(mp_nocache, 0) < 0)
GOTO_ERR(ret, err);
- /* basic tests with cache */
- if (test_mempool_basic(mp_cache, 0) < 0)
+ /* basic tests with zero-copy API's */
+ if (test_mempool_basic_zc_api(mp_cache, 0) < 0)
+ GOTO_ERR(ret, err);
+
+ /* basic tests with user-owned cache and zero-copy API's */
+ if (test_mempool_basic_zc_api(mp_nocache, 1) < 0)
GOTO_ERR(ret, err);
/* basic tests with user-owned cache */
--
2.25.1
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2023-01-24 17:50 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-24 17:49 [RFCv2 1/2] net/i40e: replace get and put functions Kamalakshitha Aligeri
2023-01-24 17:49 ` [RFCv2 2/2] test/mempool: add zero-copy API's Kamalakshitha Aligeri
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).