DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH 1/2] eventdev/dma: reorganize event DMA ops
@ 2024-04-06 10:13 pbhagavatula
  2024-04-06 10:13 ` [PATCH 2/2] dma/cnxk: remove completion pool pbhagavatula
                   ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: pbhagavatula @ 2024-04-06 10:13 UTC (permalink / raw)
  To: jerinj, Amit Prakash Shukla, Vamsi Attunuru; +Cc: dev, Pavan Nikhilesh

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Re-organize event DMA ops structure to allow holding
source and destination pointers without the need for
additional memory, the mempool allocating memory for
rte_event_dma_adapter_ops can size the structure to
accommodate all the needed source and destination
pointers.

Add multiple words for holding user metadata, adapter
implementation specific metadata and event metadata.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test-eventdev/test_perf_common.c        | 26 ++++--------
 app/test/test_event_dma_adapter.c           | 20 +++------
 doc/guides/prog_guide/event_dma_adapter.rst |  2 +-
 drivers/dma/cnxk/cnxk_dmadev_fp.c           | 39 +++++++----------
 lib/eventdev/rte_event_dma_adapter.c        | 27 ++++--------
 lib/eventdev/rte_event_dma_adapter.h        | 46 +++++++++++++++------
 6 files changed, 72 insertions(+), 88 deletions(-)

diff --git a/app/test-eventdev/test_perf_common.c b/app/test-eventdev/test_perf_common.c
index 93e6132de8de..00fcac716304 100644
--- a/app/test-eventdev/test_perf_common.c
+++ b/app/test-eventdev/test_perf_common.c
@@ -1503,7 +1503,6 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
 		prod = 0;
 		for (; port < perf_nb_event_ports(opt); port++) {
 			struct prod_data *p = &t->prod[port];
-			struct rte_event *response_info;
 			uint32_t flow_id;
 
 			p->dev_id = opt->dev_id;
@@ -1523,13 +1522,10 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
 			for (flow_id = 0; flow_id < t->nb_flows; flow_id++) {
 				rte_mempool_get(t->da_op_pool, (void **)&op);
 
-				op->src_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0);
-				op->dst_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0);
-
-				op->src_seg->addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
-				op->dst_seg->addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
-				op->src_seg->length = 1024;
-				op->dst_seg->length = 1024;
+				op->src_dst_seg[0].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
+				op->src_dst_seg[1].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
+				op->src_dst_seg[0].length = 1024;
+				op->src_dst_seg[1].length = 1024;
 				op->nb_src = 1;
 				op->nb_dst = 1;
 				op->flags = RTE_DMA_OP_FLAG_SUBMIT;
@@ -1537,12 +1533,6 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
 				op->dma_dev_id = dma_dev_id;
 				op->vchan = vchan_id;
 
-				response_info = (struct rte_event *)((uint8_t *)op +
-						 sizeof(struct rte_event_dma_adapter_op));
-				response_info->queue_id = p->queue_id;
-				response_info->sched_type = RTE_SCHED_TYPE_ATOMIC;
-				response_info->flow_id = flow_id;
-
 				p->da.dma_op[flow_id] = op;
 			}
 
@@ -2036,7 +2026,7 @@ perf_dmadev_setup(struct evt_test *test, struct evt_options *opt)
 		return -ENODEV;
 	}
 
-	elt_size = sizeof(struct rte_event_dma_adapter_op) + sizeof(struct rte_event);
+	elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2);
 	t->da_op_pool = rte_mempool_create("dma_op_pool", opt->pool_sz, elt_size, 256,
 					   0, NULL, NULL, NULL, NULL, rte_socket_id(), 0);
 	if (t->da_op_pool == NULL) {
@@ -2085,10 +2075,8 @@ perf_dmadev_destroy(struct evt_test *test, struct evt_options *opt)
 		for (flow_id = 0; flow_id < t->nb_flows; flow_id++) {
 			op = p->da.dma_op[flow_id];
 
-			rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_seg->addr);
-			rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->dst_seg->addr);
-			rte_free(op->src_seg);
-			rte_free(op->dst_seg);
+			rte_pktmbuf_free((struct rte_mbuf *)op->src_dst_seg[0].addr);
+			rte_pktmbuf_free((struct rte_mbuf *)op->src_dst_seg[1].addr);
 			rte_mempool_put(op->op_mp, op);
 		}
 
diff --git a/app/test/test_event_dma_adapter.c b/app/test/test_event_dma_adapter.c
index 35b417b69f7b..d9dff4ff7d3f 100644
--- a/app/test/test_event_dma_adapter.c
+++ b/app/test/test_event_dma_adapter.c
@@ -235,7 +235,6 @@ test_op_forward_mode(void)
 	struct rte_mbuf *dst_mbuf[TEST_MAX_OP];
 	struct rte_event_dma_adapter_op *op;
 	struct rte_event ev[TEST_MAX_OP];
-	struct rte_event response_info;
 	int ret, i;
 
 	ret = rte_pktmbuf_alloc_bulk(params.src_mbuf_pool, src_mbuf, TEST_MAX_OP);
@@ -253,14 +252,11 @@ test_op_forward_mode(void)
 		rte_mempool_get(params.op_mpool, (void **)&op);
 		TEST_ASSERT_NOT_NULL(op, "Failed to allocate dma operation struct\n");
 
-		op->src_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0);
-		op->dst_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0);
-
 		/* Update Op */
-		op->src_seg->addr = rte_pktmbuf_iova(src_mbuf[i]);
-		op->dst_seg->addr = rte_pktmbuf_iova(dst_mbuf[i]);
-		op->src_seg->length = PACKET_LENGTH;
-		op->dst_seg->length = PACKET_LENGTH;
+		op->src_dst_seg[0].addr = rte_pktmbuf_iova(src_mbuf[i]);
+		op->src_dst_seg[1].addr = rte_pktmbuf_iova(dst_mbuf[i]);
+		op->src_dst_seg[0].length = PACKET_LENGTH;
+		op->src_dst_seg[1].length = PACKET_LENGTH;
 		op->nb_src = 1;
 		op->nb_dst = 1;
 		op->flags = RTE_DMA_OP_FLAG_SUBMIT;
@@ -268,10 +264,6 @@ test_op_forward_mode(void)
 		op->dma_dev_id = TEST_DMA_DEV_ID;
 		op->vchan = TEST_DMA_VCHAN_ID;
 
-		response_info.event = dma_response_info.event;
-		rte_memcpy((uint8_t *)op + sizeof(struct rte_event_dma_adapter_op), &response_info,
-			   sizeof(struct rte_event));
-
 		/* Fill in event info and update event_ptr with rte_event_dma_adapter_op */
 		memset(&ev[i], 0, sizeof(struct rte_event));
 		ev[i].event = 0;
@@ -294,8 +286,6 @@ test_op_forward_mode(void)
 
 		TEST_ASSERT_EQUAL(ret, 0, "Data mismatch for dma adapter\n");
 
-		rte_free(op->src_seg);
-		rte_free(op->dst_seg);
 		rte_mempool_put(op->op_mp, op);
 	}
 
@@ -400,7 +390,7 @@ configure_dmadev(void)
 						       rte_socket_id());
 	RTE_TEST_ASSERT_NOT_NULL(params.dst_mbuf_pool, "Can't create DMA_DST_MBUFPOOL\n");
 
-	elt_size = sizeof(struct rte_event_dma_adapter_op) + sizeof(struct rte_event);
+	elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2);
 	params.op_mpool = rte_mempool_create("EVENT_DMA_OP_POOL", DMA_OP_POOL_SIZE, elt_size, 0,
 					     0, NULL, NULL, NULL, NULL, rte_socket_id(), 0);
 	RTE_TEST_ASSERT_NOT_NULL(params.op_mpool, "Can't create DMA_OP_POOL\n");
diff --git a/doc/guides/prog_guide/event_dma_adapter.rst b/doc/guides/prog_guide/event_dma_adapter.rst
index 3443b6a8032e..1fb9b0a07b87 100644
--- a/doc/guides/prog_guide/event_dma_adapter.rst
+++ b/doc/guides/prog_guide/event_dma_adapter.rst
@@ -144,7 +144,7 @@ on which it enqueues events towards the DMA adapter using ``rte_event_enqueue_bu
    uint32_t cap;
    int ret;
 
-   /* Fill in event info and update event_ptr with rte_dma_op */
+   /* Fill in event info and update event_ptr with rte_event_dma_adapter_op */
    memset(&ev, 0, sizeof(ev));
    .
    .
diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c
index f6562b603e45..9f7f9b2eed0e 100644
--- a/drivers/dma/cnxk/cnxk_dmadev_fp.c
+++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c
@@ -457,7 +457,6 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
-	struct rte_event *rsp_info;
 	struct cn10k_sso_hws *work;
 	uint16_t nb_src, nb_dst;
 	rte_mcslock_t mcs_lock_me;
@@ -469,9 +468,7 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 
 	for (count = 0; count < nb_events; count++) {
 		op = ev[count].event_ptr;
-		rsp_info = (struct rte_event *)((uint8_t *)op +
-			     sizeof(struct rte_event_dma_adapter_op));
-		dpivf =	rte_dma_fp_objs[op->dma_dev_id].dev_private;
+		dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
 		dpi_conf = &dpivf->conf[op->vchan];
 
 		if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
@@ -488,15 +485,14 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 		hdr[0] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 54);
 		hdr[0] |= (nb_dst << 6) | nb_src;
 		hdr[1] = ((uint64_t)comp_ptr);
-		hdr[2] = cnxk_dma_adapter_format_event(rsp_info->event);
+		hdr[2] = cnxk_dma_adapter_format_event(ev[count].event);
 
-		src = &op->src_seg[0];
-		dst = &op->dst_seg[0];
+		src = &op->src_dst_seg[0];
+		dst = &op->src_dst_seg[op->nb_src];
 
 		if (CNXK_TAG_IS_HEAD(work->gw_rdata) ||
 		    ((CNXK_TT_FROM_TAG(work->gw_rdata) == SSO_TT_ORDERED) &&
-		    (rsp_info->sched_type & DPI_HDR_TT_MASK) ==
-			    RTE_SCHED_TYPE_ORDERED))
+		     (ev[count].sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED))
 			roc_sso_hws_head_wait(work->base);
 
 		rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
@@ -566,12 +562,12 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event
 		 * For all other cases, src pointers are first pointers.
 		 */
 		if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) {
-			fptr = &op->dst_seg[0];
-			lptr = &op->src_seg[0];
+			fptr = &op->src_dst_seg[nb_src];
+			lptr = &op->src_dst_seg[0];
 			RTE_SWAP(nb_src, nb_dst);
 		} else {
-			fptr = &op->src_seg[0];
-			lptr = &op->dst_seg[0];
+			fptr = &op->src_dst_seg[0];
+			lptr = &op->src_dst_seg[nb_src];
 		}
 
 		hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48;
@@ -612,7 +608,6 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
-	struct rte_event *rsp_info;
 	struct cn9k_sso_hws *work;
 	uint16_t nb_src, nb_dst;
 	rte_mcslock_t mcs_lock_me;
@@ -624,9 +619,7 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 
 	for (count = 0; count < nb_events; count++) {
 		op = ev[count].event_ptr;
-		rsp_info = (struct rte_event *)((uint8_t *)op +
-			    sizeof(struct rte_event_dma_adapter_op));
-		dpivf =	rte_dma_fp_objs[op->dma_dev_id].dev_private;
+		dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
 		dpi_conf = &dpivf->conf[op->vchan];
 
 		if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
@@ -647,18 +640,18 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 		 * For all other cases, src pointers are first pointers.
 		 */
 		if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) {
-			fptr = &op->dst_seg[0];
-			lptr = &op->src_seg[0];
+			fptr = &op->src_dst_seg[nb_src];
+			lptr = &op->src_dst_seg[0];
 			RTE_SWAP(nb_src, nb_dst);
 		} else {
-			fptr = &op->src_seg[0];
-			lptr = &op->dst_seg[0];
+			fptr = &op->src_dst_seg[0];
+			lptr = &op->src_dst_seg[nb_src];
 		}
 
 		hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48;
-		hdr[0] |= cnxk_dma_adapter_format_event(rsp_info->event);
+		hdr[0] |= cnxk_dma_adapter_format_event(ev[count].event);
 
-		if ((rsp_info->sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED)
+		if ((ev[count].sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED)
 			roc_sso_hws_head_wait(work->base);
 
 		rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
diff --git a/lib/eventdev/rte_event_dma_adapter.c b/lib/eventdev/rte_event_dma_adapter.c
index 24dff556dbfe..e52ef46a1b36 100644
--- a/lib/eventdev/rte_event_dma_adapter.c
+++ b/lib/eventdev/rte_event_dma_adapter.c
@@ -236,9 +236,9 @@ edma_circular_buffer_flush_to_dma_dev(struct event_dma_adapter *adapter,
 				      uint16_t vchan, uint16_t *nb_ops_flushed)
 {
 	struct rte_event_dma_adapter_op *op;
-	struct dma_vchan_info *tq;
 	uint16_t *head = &bufp->head;
 	uint16_t *tail = &bufp->tail;
+	struct dma_vchan_info *tq;
 	uint16_t n;
 	uint16_t i;
 	int ret;
@@ -257,11 +257,13 @@ edma_circular_buffer_flush_to_dma_dev(struct event_dma_adapter *adapter,
 	for (i = 0; i < n; i++)	{
 		op = bufp->op_buffer[*head];
 		if (op->nb_src == 1 && op->nb_dst == 1)
-			ret = rte_dma_copy(dma_dev_id, vchan, op->src_seg->addr, op->dst_seg->addr,
-					   op->src_seg->length, op->flags);
+			ret = rte_dma_copy(dma_dev_id, vchan, op->src_dst_seg[0].addr,
+					   op->src_dst_seg[1].addr, op->src_dst_seg[0].length,
+					   op->flags);
 		else
-			ret = rte_dma_copy_sg(dma_dev_id, vchan, op->src_seg, op->dst_seg,
-					      op->nb_src, op->nb_dst, op->flags);
+			ret = rte_dma_copy_sg(dma_dev_id, vchan, &op->src_dst_seg[0],
+					      &op->src_dst_seg[op->nb_src], op->nb_src, op->nb_dst,
+					      op->flags);
 		if (ret < 0)
 			break;
 
@@ -511,8 +513,7 @@ edma_enq_to_dma_dev(struct event_dma_adapter *adapter, struct rte_event *ev, uns
 		if (dma_op == NULL)
 			continue;
 
-		/* Expected to have response info appended to dma_op. */
-
+		dma_op->impl_opaque[0] = ev[i].event;
 		dma_dev_id = dma_op->dma_dev_id;
 		vchan = dma_op->vchan;
 		vchan_qinfo = &adapter->dma_devs[dma_dev_id].vchanq[vchan];
@@ -647,7 +648,6 @@ edma_ops_enqueue_burst(struct event_dma_adapter *adapter, struct rte_event_dma_a
 	uint8_t event_port_id = adapter->event_port_id;
 	uint8_t event_dev_id = adapter->eventdev_id;
 	struct rte_event events[DMA_BATCH_SIZE];
-	struct rte_event *response_info;
 	uint16_t nb_enqueued, nb_ev;
 	uint8_t retry;
 	uint8_t i;
@@ -659,16 +659,7 @@ edma_ops_enqueue_burst(struct event_dma_adapter *adapter, struct rte_event_dma_a
 	for (i = 0; i < num; i++) {
 		struct rte_event *ev = &events[nb_ev++];
 
-		/* Expected to have response info appended to dma_op. */
-		response_info = (struct rte_event *)((uint8_t *)ops[i] +
-							  sizeof(struct rte_event_dma_adapter_op));
-		if (unlikely(response_info == NULL)) {
-			if (ops[i] != NULL && ops[i]->op_mp != NULL)
-				rte_mempool_put(ops[i]->op_mp, ops[i]);
-			continue;
-		}
-
-		rte_memcpy(ev, response_info, sizeof(struct rte_event));
+		ev->event = ops[i]->impl_opaque[0];
 		ev->event_ptr = ops[i];
 		ev->event_type = RTE_EVENT_TYPE_DMADEV;
 		if (adapter->implicit_release_disabled)
diff --git a/lib/eventdev/rte_event_dma_adapter.h b/lib/eventdev/rte_event_dma_adapter.h
index e924ab673df7..048ddba3f354 100644
--- a/lib/eventdev/rte_event_dma_adapter.h
+++ b/lib/eventdev/rte_event_dma_adapter.h
@@ -157,24 +157,46 @@ extern "C" {
  * instance.
  */
 struct rte_event_dma_adapter_op {
-	struct rte_dma_sge *src_seg;
-	/**< Source segments. */
-	struct rte_dma_sge *dst_seg;
-	/**< Destination segments. */
-	uint16_t nb_src;
-	/**< Number of source segments. */
-	uint16_t nb_dst;
-	/**< Number of destination segments. */
 	uint64_t flags;
 	/**< Flags related to the operation.
 	 * @see RTE_DMA_OP_FLAG_*
 	 */
-	int16_t dma_dev_id;
-	/**< DMA device ID to be used */
-	uint16_t vchan;
-	/**< DMA vchan ID to be used */
 	struct rte_mempool *op_mp;
 	/**< Mempool from which op is allocated. */
+	enum rte_dma_status_code status;
+	/**< Status code for this operation. */
+	uint32_t rsvd;
+	/**< Reserved for future use. */
+	uint64_t impl_opaque[2];
+	/**< Implementation-specific opaque data.
+	 * An dma device implementation use this field to hold
+	 * implementation specific values to share between dequeue and enqueue
+	 * operations.
+	 * The application should not modify this field.
+	 */
+	uint64_t user_meta;
+	/**<  Memory to store user specific metadata.
+	 * The dma device implementation should not modify this area.
+	 */
+	uint64_t event_meta;
+	/**< Event metadata that defines event attributes when used in OP_NEW mode.
+	 * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_NEW
+	 * @see struct rte_event::event
+	 */
+	int16_t dma_dev_id;
+	/**< DMA device ID to be used with OP_FORWARD mode.
+	 * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD
+	 */
+	uint16_t vchan;
+	/**< DMA vchan ID to be used with OP_FORWARD mode
+	 * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD
+	 */
+	uint16_t nb_src;
+	/**< Number of source segments. */
+	uint16_t nb_dst;
+	/**< Number of destination segments. */
+	struct rte_dma_sge src_dst_seg[0];
+	/**< Source and destination segments. */
 };
 
 /**
-- 
2.43.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 2/2] dma/cnxk: remove completion pool
  2024-04-06 10:13 [PATCH 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula
@ 2024-04-06 10:13 ` pbhagavatula
  2024-04-16  8:56   ` Vamsi Krishna Attunuru
  2024-04-17  5:58 ` [PATCH v2 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula
  2024-05-16  7:36 ` [PATCH " Amit Prakash Shukla
  2 siblings, 1 reply; 17+ messages in thread
From: pbhagavatula @ 2024-04-06 10:13 UTC (permalink / raw)
  To: jerinj, Vamsi Attunuru, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Use DMA ops to store metadata, remove use of completion pool.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/dma/cnxk/cnxk_dmadev.c           | 53 ++++++----------
 drivers/dma/cnxk/cnxk_dmadev.h           | 24 +------
 drivers/dma/cnxk/cnxk_dmadev_fp.c        | 79 +++++-------------------
 drivers/event/cnxk/cnxk_eventdev_adptr.c | 47 +++-----------
 4 files changed, 45 insertions(+), 158 deletions(-)

diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c
index 4ab3cfbdf2cd..dfd722271327 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.c
+++ b/drivers/dma/cnxk/cnxk_dmadev.c
@@ -2,6 +2,8 @@
  * Copyright (C) 2021 Marvell International Ltd.
  */
 
+#include <rte_event_dma_adapter.h>
+
 #include <cnxk_dmadev.h>
 
 static int cnxk_stats_reset(struct rte_dma_dev *dev, uint16_t vchan);
@@ -30,8 +32,7 @@ cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan)
 {
 	struct cnxk_dpi_conf *dpi_conf;
 	uint16_t num_vchans;
-	uint16_t max_desc;
-	int i, j;
+	int i;
 
 	if (vchan == RTE_DMA_ALL_VCHAN) {
 		num_vchans = dpivf->num_vchans;
@@ -46,12 +47,6 @@ cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan)
 
 	for (; i < num_vchans; i++) {
 		dpi_conf = &dpivf->conf[i];
-		max_desc = dpi_conf->c_desc.max_cnt + 1;
-		if (dpi_conf->c_desc.compl_ptr) {
-			for (j = 0; j < max_desc; j++)
-				rte_free(dpi_conf->c_desc.compl_ptr[j]);
-		}
-
 		rte_free(dpi_conf->c_desc.compl_ptr);
 		dpi_conf->c_desc.compl_ptr = NULL;
 	}
@@ -261,7 +256,7 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
 	if (max_desc > CNXK_DPI_MAX_DESC)
 		max_desc = CNXK_DPI_MAX_DESC;
 
-	size = (max_desc * sizeof(struct cnxk_dpi_compl_s *));
+	size = (max_desc * sizeof(uint8_t) * CNXK_DPI_COMPL_OFFSET);
 	dpi_conf->c_desc.compl_ptr = rte_zmalloc(NULL, size, 0);
 
 	if (dpi_conf->c_desc.compl_ptr == NULL) {
@@ -269,16 +264,8 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < max_desc; i++) {
-		dpi_conf->c_desc.compl_ptr[i] =
-			rte_zmalloc(NULL, sizeof(struct cnxk_dpi_compl_s), 0);
-		if (!dpi_conf->c_desc.compl_ptr[i]) {
-			plt_err("Failed to allocate for descriptor memory");
-			return -ENOMEM;
-		}
-
-		dpi_conf->c_desc.compl_ptr[i]->cdata = CNXK_DPI_REQ_CDATA;
-	}
+	for (i = 0; i < max_desc; i++)
+		dpi_conf->c_desc.compl_ptr[i * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
 
 	dpi_conf->c_desc.max_cnt = (max_desc - 1);
 
@@ -301,10 +288,8 @@ cnxk_dmadev_start(struct rte_dma_dev *dev)
 		dpi_conf->pnum_words = 0;
 		dpi_conf->pending = 0;
 		dpi_conf->desc_idx = 0;
-		for (j = 0; j < dpi_conf->c_desc.max_cnt + 1; j++) {
-			if (dpi_conf->c_desc.compl_ptr[j])
-				dpi_conf->c_desc.compl_ptr[j]->cdata = CNXK_DPI_REQ_CDATA;
-		}
+		for (j = 0; j < dpi_conf->c_desc.max_cnt + 1; j++)
+			dpi_conf->c_desc.compl_ptr[j * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
 		nb_desc += dpi_conf->c_desc.max_cnt + 1;
 		cnxk_stats_reset(dev, i);
 		dpi_conf->completed_offset = 0;
@@ -382,22 +367,22 @@ cnxk_dmadev_completed(void *dev_private, uint16_t vchan, const uint16_t nb_cpls,
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	struct cnxk_dpi_cdesc_data_s *c_desc = &dpi_conf->c_desc;
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t status;
 	int cnt;
 
 	for (cnt = 0; cnt < nb_cpls; cnt++) {
-		comp_ptr = c_desc->compl_ptr[c_desc->head];
-
-		if (comp_ptr->cdata) {
-			if (comp_ptr->cdata == CNXK_DPI_REQ_CDATA)
+		status = c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET];
+		if (status) {
+			if (status == CNXK_DPI_REQ_CDATA)
 				break;
 			*has_error = 1;
 			dpi_conf->stats.errors++;
+			c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] =
+				CNXK_DPI_REQ_CDATA;
 			CNXK_DPI_STRM_INC(*c_desc, head);
 			break;
 		}
-
-		comp_ptr->cdata = CNXK_DPI_REQ_CDATA;
+		c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
 		CNXK_DPI_STRM_INC(*c_desc, head);
 	}
 
@@ -414,19 +399,17 @@ cnxk_dmadev_completed_status(void *dev_private, uint16_t vchan, const uint16_t n
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	struct cnxk_dpi_cdesc_data_s *c_desc = &dpi_conf->c_desc;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	int cnt;
 
 	for (cnt = 0; cnt < nb_cpls; cnt++) {
-		comp_ptr = c_desc->compl_ptr[c_desc->head];
-		status[cnt] = comp_ptr->cdata;
+		status[cnt] = c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET];
 		if (status[cnt]) {
 			if (status[cnt] == CNXK_DPI_REQ_CDATA)
 				break;
 
 			dpi_conf->stats.errors++;
 		}
-		comp_ptr->cdata = CNXK_DPI_REQ_CDATA;
+		c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
 		CNXK_DPI_STRM_INC(*c_desc, head);
 	}
 
@@ -593,7 +576,7 @@ cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_de
 	rdpi = &dpivf->rdpi;
 
 	rdpi->pci_dev = pci_dev;
-	rc = roc_dpi_dev_init(rdpi, offsetof(struct cnxk_dpi_compl_s, wqecs));
+	rc = roc_dpi_dev_init(rdpi, offsetof(struct rte_event_dma_adapter_op, impl_opaque));
 	if (rc < 0)
 		goto err_out_free;
 
diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h
index 610a360ba217..a80db333a0a2 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.h
+++ b/drivers/dma/cnxk/cnxk_dmadev.h
@@ -37,17 +37,12 @@
 #define CNXK_DPI_MAX_CMD_SZ		    CNXK_DPI_CMD_LEN(CNXK_DPI_MAX_POINTER,		\
 							     CNXK_DPI_MAX_POINTER)
 #define CNXK_DPI_CHUNKS_FROM_DESC(cz, desc) (((desc) / (((cz) / 8) / CNXK_DPI_MAX_CMD_SZ)) + 1)
-
+#define CNXK_DPI_COMPL_OFFSET		    ROC_CACHE_LINE_SZ
 /* Set Completion data to 0xFF when request submitted,
  * upon successful request completion engine reset to completion status
  */
 #define CNXK_DPI_REQ_CDATA 0xFF
 
-/* Set Completion data to 0xDEADBEEF when request submitted for SSO.
- * This helps differentiate if the dequeue is called after cnxk enueue.
- */
-#define CNXK_DPI_REQ_SSO_CDATA    0xDEADBEEF
-
 union cnxk_dpi_instr_cmd {
 	uint64_t u;
 	struct cn9k_dpi_instr_cmd {
@@ -91,24 +86,11 @@ union cnxk_dpi_instr_cmd {
 	} cn10k;
 };
 
-struct cnxk_dpi_compl_s {
-	uint64_t cdata;
-	void *op;
-	uint16_t dev_id;
-	uint16_t vchan;
-	uint32_t wqecs;
-};
-
 struct cnxk_dpi_cdesc_data_s {
-	struct cnxk_dpi_compl_s **compl_ptr;
 	uint16_t max_cnt;
 	uint16_t head;
 	uint16_t tail;
-};
-
-struct cnxk_dma_adapter_info {
-	bool enabled;               /* Set if vchan queue is added to dma adapter. */
-	struct rte_mempool *req_mp; /* DMA inflight request mempool. */
+	uint8_t *compl_ptr;
 };
 
 struct cnxk_dpi_conf {
@@ -119,7 +101,7 @@ struct cnxk_dpi_conf {
 	uint16_t desc_idx;
 	struct rte_dma_stats stats;
 	uint64_t completed_offset;
-	struct cnxk_dma_adapter_info adapter_info;
+	bool adapter_enabled;
 };
 
 struct cnxk_dpi_vf_s {
diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c
index 9f7f9b2eed0e..38f4524439af 100644
--- a/drivers/dma/cnxk/cnxk_dmadev_fp.c
+++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c
@@ -245,14 +245,14 @@ cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t d
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	uint64_t cmd[CNXK_DPI_DW_PER_SINGLE_CMD];
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t *comp_ptr;
 	int rc;
 
 	if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) ==
 		     dpi_conf->c_desc.head))
 		return -ENOSPC;
 
-	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
+	comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET];
 	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);
 
 	cmd[0] = (1UL << 54) | (1UL << 48);
@@ -301,7 +301,7 @@ cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	const struct rte_dma_sge *fptr, *lptr;
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t *comp_ptr;
 	uint64_t hdr[4];
 	int rc;
 
@@ -309,7 +309,7 @@ cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 		     dpi_conf->c_desc.head))
 		return -ENOSPC;
 
-	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
+	comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET];
 	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);
 
 	hdr[1] = dpi_conf->cmd.u | ((flags & RTE_DMA_OP_FLAG_AUTO_FREE) << 37);
@@ -357,14 +357,14 @@ cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	uint64_t cmd[CNXK_DPI_DW_PER_SINGLE_CMD];
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t *comp_ptr;
 	int rc;
 
 	if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) ==
 		     dpi_conf->c_desc.head))
 		return -ENOSPC;
 
-	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
+	comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET];
 	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);
 
 	cmd[0] = dpi_conf->cmd.u | (1U << 6) | 1U;
@@ -403,7 +403,7 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 {
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t *comp_ptr;
 	uint64_t hdr[4];
 	int rc;
 
@@ -411,7 +411,7 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 		     dpi_conf->c_desc.head))
 		return -ENOSPC;
 
-	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
+	comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET];
 	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);
 
 	hdr[0] = dpi_conf->cmd.u | (nb_dst << 6) | nb_src;
@@ -454,7 +454,6 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 {
 	const struct rte_dma_sge *src, *dst;
 	struct rte_event_dma_adapter_op *op;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
 	struct cn10k_sso_hws *work;
@@ -471,20 +470,12 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 		dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
 		dpi_conf = &dpivf->conf[op->vchan];
 
-		if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
-			return count;
-
-		comp_ptr->op = op;
-		comp_ptr->dev_id = op->dma_dev_id;
-		comp_ptr->vchan = op->vchan;
-		comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA;
-
 		nb_src = op->nb_src & CNXK_DPI_MAX_POINTER;
 		nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER;
 
 		hdr[0] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 54);
 		hdr[0] |= (nb_dst << 6) | nb_src;
-		hdr[1] = ((uint64_t)comp_ptr);
+		hdr[1] = (uint64_t)op;
 		hdr[2] = cnxk_dma_adapter_format_event(ev[count].event);
 
 		src = &op->src_dst_seg[0];
@@ -524,7 +515,6 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event
 {
 	const struct rte_dma_sge *fptr, *lptr;
 	struct rte_event_dma_adapter_op *op;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cn9k_sso_hws_dual *work;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
@@ -544,16 +534,8 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event
 		dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
 		dpi_conf = &dpivf->conf[op->vchan];
 
-		if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
-			return count;
-
-		comp_ptr->op = op;
-		comp_ptr->dev_id = op->dma_dev_id;
-		comp_ptr->vchan = op->vchan;
-		comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA;
-
 		hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36);
-		hdr[2] = (uint64_t)comp_ptr;
+		hdr[2] = (uint64_t)op;
 
 		nb_src = op->nb_src & CNXK_DPI_MAX_POINTER;
 		nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER;
@@ -605,7 +587,6 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 {
 	const struct rte_dma_sge *fptr, *lptr;
 	struct rte_event_dma_adapter_op *op;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
 	struct cn9k_sso_hws *work;
@@ -622,16 +603,8 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 		dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
 		dpi_conf = &dpivf->conf[op->vchan];
 
-		if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
-			return count;
-
-		comp_ptr->op = op;
-		comp_ptr->dev_id = op->dma_dev_id;
-		comp_ptr->vchan = op->vchan;
-		comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA;
-
 		hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36);
-		hdr[2] = (uint64_t)comp_ptr;
+		hdr[2] = (uint64_t)op;
 
 		nb_src = op->nb_src & CNXK_DPI_MAX_POINTER;
 		nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER;
@@ -682,38 +655,20 @@ uintptr_t
 cnxk_dma_adapter_dequeue(uintptr_t get_work1)
 {
 	struct rte_event_dma_adapter_op *op;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
-	rte_mcslock_t mcs_lock_me;
-	RTE_ATOMIC(uint8_t) *wqecs;
-
-	comp_ptr = (struct cnxk_dpi_compl_s *)get_work1;
-
-	/* Dequeue can be called without calling cnx_enqueue in case of
-	 * dma_adapter. When its called from adapter, dma op will not be
-	 * embedded in completion pointer. In those cases return op.
-	 */
-	if (comp_ptr->cdata != CNXK_DPI_REQ_SSO_CDATA)
-		return (uintptr_t)comp_ptr;
 
-	dpivf =	rte_dma_fp_objs[comp_ptr->dev_id].dev_private;
-	dpi_conf = &dpivf->conf[comp_ptr->vchan];
+	op = (struct rte_event_dma_adapter_op *)get_work1;
+	dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
+	dpi_conf = &dpivf->conf[op->vchan];
 
-	rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
-	wqecs = (uint8_t __rte_atomic *)&comp_ptr->wqecs;
-	if (rte_atomic_load_explicit(wqecs, rte_memory_order_relaxed) != 0)
-		dpi_conf->stats.errors++;
+	if (rte_atomic_load_explicit(&op->impl_opaque[0], rte_memory_order_relaxed) != 0)
+		rte_atomic_fetch_add_explicit(&dpi_conf->stats.errors, 1, rte_memory_order_relaxed);
 
 	/* Take into account errors also. This is similar to
 	 * cnxk_dmadev_completed_status().
 	 */
-	dpi_conf->stats.completed++;
-	rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me);
-
-	op = (struct rte_event_dma_adapter_op *)comp_ptr->op;
-
-	rte_mempool_put(dpi_conf->adapter_info.req_mp, comp_ptr);
+	rte_atomic_fetch_add_explicit(&dpi_conf->stats.completed, 1, rte_memory_order_relaxed);
 
 	return (uintptr_t)op;
 }
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index a2a59b16c912..98db11ad61fa 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -739,31 +739,6 @@ cnxk_crypto_adapter_qp_del(const struct rte_cryptodev *cdev,
 	return 0;
 }
 
-static int
-dma_adapter_vchan_setup(const int16_t dma_dev_id, struct cnxk_dpi_conf *vchan,
-			uint16_t vchan_id)
-{
-	char name[RTE_MEMPOOL_NAMESIZE];
-	uint32_t cache_size, nb_req;
-	unsigned int req_size;
-
-	snprintf(name, RTE_MEMPOOL_NAMESIZE, "cnxk_dma_req_%u:%u", dma_dev_id, vchan_id);
-	req_size = sizeof(struct cnxk_dpi_compl_s);
-
-	nb_req = vchan->c_desc.max_cnt;
-	cache_size = 16;
-	nb_req += (cache_size * rte_lcore_count());
-
-	vchan->adapter_info.req_mp = rte_mempool_create(name, nb_req, req_size, cache_size, 0,
-							NULL, NULL, NULL, NULL, rte_socket_id(), 0);
-	if (vchan->adapter_info.req_mp == NULL)
-		return -ENOMEM;
-
-	vchan->adapter_info.enabled = true;
-
-	return 0;
-}
-
 int
 cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev,
 			   const int16_t dma_dev_id, uint16_t vchan_id)
@@ -772,7 +747,6 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev,
 	uint32_t adptr_xae_cnt = 0;
 	struct cnxk_dpi_vf_s *dpivf;
 	struct cnxk_dpi_conf *vchan;
-	int ret;
 
 	dpivf = rte_dma_fp_objs[dma_dev_id].dev_private;
 	if ((int16_t)vchan_id == -1) {
@@ -780,19 +754,13 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev,
 
 		for (vchan_id = 0; vchan_id < dpivf->num_vchans; vchan_id++) {
 			vchan = &dpivf->conf[vchan_id];
-			ret = dma_adapter_vchan_setup(dma_dev_id, vchan, vchan_id);
-			if (ret) {
-				cnxk_dma_adapter_vchan_del(dma_dev_id, -1);
-				return ret;
-			}
-			adptr_xae_cnt += vchan->adapter_info.req_mp->size;
+			vchan->adapter_enabled = true;
+			adptr_xae_cnt += vchan->c_desc.max_cnt;
 		}
 	} else {
 		vchan = &dpivf->conf[vchan_id];
-		ret = dma_adapter_vchan_setup(dma_dev_id, vchan, vchan_id);
-		if (ret)
-			return ret;
-		adptr_xae_cnt = vchan->adapter_info.req_mp->size;
+		vchan->adapter_enabled = true;
+		adptr_xae_cnt = vchan->c_desc.max_cnt;
 	}
 
 	/* Update dma adapter XAE count */
@@ -805,8 +773,7 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev,
 static int
 dma_adapter_vchan_free(struct cnxk_dpi_conf *vchan)
 {
-	rte_mempool_free(vchan->adapter_info.req_mp);
-	vchan->adapter_info.enabled = false;
+	vchan->adapter_enabled = false;
 
 	return 0;
 }
@@ -823,12 +790,12 @@ cnxk_dma_adapter_vchan_del(const int16_t dma_dev_id, uint16_t vchan_id)
 
 		for (vchan_id = 0; vchan_id < dpivf->num_vchans; vchan_id++) {
 			vchan = &dpivf->conf[vchan_id];
-			if (vchan->adapter_info.enabled)
+			if (vchan->adapter_enabled)
 				dma_adapter_vchan_free(vchan);
 		}
 	} else {
 		vchan = &dpivf->conf[vchan_id];
-		if (vchan->adapter_info.enabled)
+		if (vchan->adapter_enabled)
 			dma_adapter_vchan_free(vchan);
 	}
 
-- 
2.43.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [PATCH 2/2] dma/cnxk: remove completion pool
  2024-04-06 10:13 ` [PATCH 2/2] dma/cnxk: remove completion pool pbhagavatula
@ 2024-04-16  8:56   ` Vamsi Krishna Attunuru
  0 siblings, 0 replies; 17+ messages in thread
From: Vamsi Krishna Attunuru @ 2024-04-16  8:56 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Jerin Jacob,
	Pavan Nikhilesh Bhagavatula, Shijith Thotton
  Cc: dev



> -----Original Message-----
> From: pbhagavatula@marvell.com <pbhagavatula@marvell.com>
> Sent: Saturday, April 6, 2024 3:43 PM
> To: Jerin Jacob <jerinj@marvell.com>; Vamsi Krishna Attunuru
> <vattunuru@marvell.com>; Pavan Nikhilesh Bhagavatula
> <pbhagavatula@marvell.com>; Shijith Thotton <sthotton@marvell.com>
> Cc: dev@dpdk.org
> Subject: [PATCH 2/2] dma/cnxk: remove completion pool
> 
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Use DMA ops to store metadata, remove use of completion pool.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---

Acked-by: Vamsi Attunuru <vattunuru@marvell.com>


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v2 1/2] eventdev/dma: reorganize event DMA ops
  2024-04-06 10:13 [PATCH 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula
  2024-04-06 10:13 ` [PATCH 2/2] dma/cnxk: remove completion pool pbhagavatula
@ 2024-04-17  5:58 ` pbhagavatula
  2024-04-17  5:58   ` [PATCH v2 2/2] dma/cnxk: remove completion pool pbhagavatula
  2024-04-17  8:26   ` [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula
  2024-05-16  7:36 ` [PATCH " Amit Prakash Shukla
  2 siblings, 2 replies; 17+ messages in thread
From: pbhagavatula @ 2024-04-17  5:58 UTC (permalink / raw)
  To: jerinj, Amit Prakash Shukla, Vamsi Attunuru; +Cc: dev, Pavan Nikhilesh

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Re-organize event DMA ops structure to allow holding
source and destination pointers without the need for
additional memory, the mempool allocating memory for
rte_event_dma_adapter_ops can size the structure to
accommodate all the needed source and destination
pointers.

Add multiple words for holding user metadata, adapter
implementation specific metadata and event metadata.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 v2 Changes:
 - Fix 32bit compilation

 app/test-eventdev/test_perf_common.c        | 26 ++++--------
 app/test/test_event_dma_adapter.c           | 20 +++------
 doc/guides/prog_guide/event_dma_adapter.rst |  2 +-
 drivers/dma/cnxk/cnxk_dmadev_fp.c           | 39 +++++++----------
 lib/eventdev/rte_event_dma_adapter.c        | 27 ++++--------
 lib/eventdev/rte_event_dma_adapter.h        | 46 +++++++++++++++------
 6 files changed, 72 insertions(+), 88 deletions(-)

diff --git a/app/test-eventdev/test_perf_common.c b/app/test-eventdev/test_perf_common.c
index 93e6132de8..db0f9c1f3b 100644
--- a/app/test-eventdev/test_perf_common.c
+++ b/app/test-eventdev/test_perf_common.c
@@ -1503,7 +1503,6 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
 		prod = 0;
 		for (; port < perf_nb_event_ports(opt); port++) {
 			struct prod_data *p = &t->prod[port];
-			struct rte_event *response_info;
 			uint32_t flow_id;

 			p->dev_id = opt->dev_id;
@@ -1523,13 +1522,10 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
 			for (flow_id = 0; flow_id < t->nb_flows; flow_id++) {
 				rte_mempool_get(t->da_op_pool, (void **)&op);

-				op->src_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0);
-				op->dst_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0);
-
-				op->src_seg->addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
-				op->dst_seg->addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
-				op->src_seg->length = 1024;
-				op->dst_seg->length = 1024;
+				op->src_dst_seg[0].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
+				op->src_dst_seg[1].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
+				op->src_dst_seg[0].length = 1024;
+				op->src_dst_seg[1].length = 1024;
 				op->nb_src = 1;
 				op->nb_dst = 1;
 				op->flags = RTE_DMA_OP_FLAG_SUBMIT;
@@ -1537,12 +1533,6 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
 				op->dma_dev_id = dma_dev_id;
 				op->vchan = vchan_id;

-				response_info = (struct rte_event *)((uint8_t *)op +
-						 sizeof(struct rte_event_dma_adapter_op));
-				response_info->queue_id = p->queue_id;
-				response_info->sched_type = RTE_SCHED_TYPE_ATOMIC;
-				response_info->flow_id = flow_id;
-
 				p->da.dma_op[flow_id] = op;
 			}

@@ -2036,7 +2026,7 @@ perf_dmadev_setup(struct evt_test *test, struct evt_options *opt)
 		return -ENODEV;
 	}

-	elt_size = sizeof(struct rte_event_dma_adapter_op) + sizeof(struct rte_event);
+	elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2);
 	t->da_op_pool = rte_mempool_create("dma_op_pool", opt->pool_sz, elt_size, 256,
 					   0, NULL, NULL, NULL, NULL, rte_socket_id(), 0);
 	if (t->da_op_pool == NULL) {
@@ -2085,10 +2075,8 @@ perf_dmadev_destroy(struct evt_test *test, struct evt_options *opt)
 		for (flow_id = 0; flow_id < t->nb_flows; flow_id++) {
 			op = p->da.dma_op[flow_id];

-			rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_seg->addr);
-			rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->dst_seg->addr);
-			rte_free(op->src_seg);
-			rte_free(op->dst_seg);
+			rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_dst_seg[0].addr);
+			rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_dst_seg[1].addr);
 			rte_mempool_put(op->op_mp, op);
 		}

diff --git a/app/test/test_event_dma_adapter.c b/app/test/test_event_dma_adapter.c
index 35b417b69f..d9dff4ff7d 100644
--- a/app/test/test_event_dma_adapter.c
+++ b/app/test/test_event_dma_adapter.c
@@ -235,7 +235,6 @@ test_op_forward_mode(void)
 	struct rte_mbuf *dst_mbuf[TEST_MAX_OP];
 	struct rte_event_dma_adapter_op *op;
 	struct rte_event ev[TEST_MAX_OP];
-	struct rte_event response_info;
 	int ret, i;

 	ret = rte_pktmbuf_alloc_bulk(params.src_mbuf_pool, src_mbuf, TEST_MAX_OP);
@@ -253,14 +252,11 @@ test_op_forward_mode(void)
 		rte_mempool_get(params.op_mpool, (void **)&op);
 		TEST_ASSERT_NOT_NULL(op, "Failed to allocate dma operation struct\n");

-		op->src_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0);
-		op->dst_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0);
-
 		/* Update Op */
-		op->src_seg->addr = rte_pktmbuf_iova(src_mbuf[i]);
-		op->dst_seg->addr = rte_pktmbuf_iova(dst_mbuf[i]);
-		op->src_seg->length = PACKET_LENGTH;
-		op->dst_seg->length = PACKET_LENGTH;
+		op->src_dst_seg[0].addr = rte_pktmbuf_iova(src_mbuf[i]);
+		op->src_dst_seg[1].addr = rte_pktmbuf_iova(dst_mbuf[i]);
+		op->src_dst_seg[0].length = PACKET_LENGTH;
+		op->src_dst_seg[1].length = PACKET_LENGTH;
 		op->nb_src = 1;
 		op->nb_dst = 1;
 		op->flags = RTE_DMA_OP_FLAG_SUBMIT;
@@ -268,10 +264,6 @@ test_op_forward_mode(void)
 		op->dma_dev_id = TEST_DMA_DEV_ID;
 		op->vchan = TEST_DMA_VCHAN_ID;

-		response_info.event = dma_response_info.event;
-		rte_memcpy((uint8_t *)op + sizeof(struct rte_event_dma_adapter_op), &response_info,
-			   sizeof(struct rte_event));
-
 		/* Fill in event info and update event_ptr with rte_event_dma_adapter_op */
 		memset(&ev[i], 0, sizeof(struct rte_event));
 		ev[i].event = 0;
@@ -294,8 +286,6 @@ test_op_forward_mode(void)

 		TEST_ASSERT_EQUAL(ret, 0, "Data mismatch for dma adapter\n");

-		rte_free(op->src_seg);
-		rte_free(op->dst_seg);
 		rte_mempool_put(op->op_mp, op);
 	}

@@ -400,7 +390,7 @@ configure_dmadev(void)
 						       rte_socket_id());
 	RTE_TEST_ASSERT_NOT_NULL(params.dst_mbuf_pool, "Can't create DMA_DST_MBUFPOOL\n");

-	elt_size = sizeof(struct rte_event_dma_adapter_op) + sizeof(struct rte_event);
+	elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2);
 	params.op_mpool = rte_mempool_create("EVENT_DMA_OP_POOL", DMA_OP_POOL_SIZE, elt_size, 0,
 					     0, NULL, NULL, NULL, NULL, rte_socket_id(), 0);
 	RTE_TEST_ASSERT_NOT_NULL(params.op_mpool, "Can't create DMA_OP_POOL\n");
diff --git a/doc/guides/prog_guide/event_dma_adapter.rst b/doc/guides/prog_guide/event_dma_adapter.rst
index 3443b6a803..1fb9b0a07b 100644
--- a/doc/guides/prog_guide/event_dma_adapter.rst
+++ b/doc/guides/prog_guide/event_dma_adapter.rst
@@ -144,7 +144,7 @@ on which it enqueues events towards the DMA adapter using ``rte_event_enqueue_bu
    uint32_t cap;
    int ret;

-   /* Fill in event info and update event_ptr with rte_dma_op */
+   /* Fill in event info and update event_ptr with rte_event_dma_adapter_op */
    memset(&ev, 0, sizeof(ev));
    .
    .
diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c
index f6562b603e..9f7f9b2eed 100644
--- a/drivers/dma/cnxk/cnxk_dmadev_fp.c
+++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c
@@ -457,7 +457,6 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
-	struct rte_event *rsp_info;
 	struct cn10k_sso_hws *work;
 	uint16_t nb_src, nb_dst;
 	rte_mcslock_t mcs_lock_me;
@@ -469,9 +468,7 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)

 	for (count = 0; count < nb_events; count++) {
 		op = ev[count].event_ptr;
-		rsp_info = (struct rte_event *)((uint8_t *)op +
-			     sizeof(struct rte_event_dma_adapter_op));
-		dpivf =	rte_dma_fp_objs[op->dma_dev_id].dev_private;
+		dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
 		dpi_conf = &dpivf->conf[op->vchan];

 		if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
@@ -488,15 +485,14 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 		hdr[0] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 54);
 		hdr[0] |= (nb_dst << 6) | nb_src;
 		hdr[1] = ((uint64_t)comp_ptr);
-		hdr[2] = cnxk_dma_adapter_format_event(rsp_info->event);
+		hdr[2] = cnxk_dma_adapter_format_event(ev[count].event);

-		src = &op->src_seg[0];
-		dst = &op->dst_seg[0];
+		src = &op->src_dst_seg[0];
+		dst = &op->src_dst_seg[op->nb_src];

 		if (CNXK_TAG_IS_HEAD(work->gw_rdata) ||
 		    ((CNXK_TT_FROM_TAG(work->gw_rdata) == SSO_TT_ORDERED) &&
-		    (rsp_info->sched_type & DPI_HDR_TT_MASK) ==
-			    RTE_SCHED_TYPE_ORDERED))
+		     (ev[count].sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED))
 			roc_sso_hws_head_wait(work->base);

 		rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
@@ -566,12 +562,12 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event
 		 * For all other cases, src pointers are first pointers.
 		 */
 		if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) {
-			fptr = &op->dst_seg[0];
-			lptr = &op->src_seg[0];
+			fptr = &op->src_dst_seg[nb_src];
+			lptr = &op->src_dst_seg[0];
 			RTE_SWAP(nb_src, nb_dst);
 		} else {
-			fptr = &op->src_seg[0];
-			lptr = &op->dst_seg[0];
+			fptr = &op->src_dst_seg[0];
+			lptr = &op->src_dst_seg[nb_src];
 		}

 		hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48;
@@ -612,7 +608,6 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
-	struct rte_event *rsp_info;
 	struct cn9k_sso_hws *work;
 	uint16_t nb_src, nb_dst;
 	rte_mcslock_t mcs_lock_me;
@@ -624,9 +619,7 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)

 	for (count = 0; count < nb_events; count++) {
 		op = ev[count].event_ptr;
-		rsp_info = (struct rte_event *)((uint8_t *)op +
-			    sizeof(struct rte_event_dma_adapter_op));
-		dpivf =	rte_dma_fp_objs[op->dma_dev_id].dev_private;
+		dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
 		dpi_conf = &dpivf->conf[op->vchan];

 		if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
@@ -647,18 +640,18 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 		 * For all other cases, src pointers are first pointers.
 		 */
 		if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) {
-			fptr = &op->dst_seg[0];
-			lptr = &op->src_seg[0];
+			fptr = &op->src_dst_seg[nb_src];
+			lptr = &op->src_dst_seg[0];
 			RTE_SWAP(nb_src, nb_dst);
 		} else {
-			fptr = &op->src_seg[0];
-			lptr = &op->dst_seg[0];
+			fptr = &op->src_dst_seg[0];
+			lptr = &op->src_dst_seg[nb_src];
 		}

 		hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48;
-		hdr[0] |= cnxk_dma_adapter_format_event(rsp_info->event);
+		hdr[0] |= cnxk_dma_adapter_format_event(ev[count].event);

-		if ((rsp_info->sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED)
+		if ((ev[count].sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED)
 			roc_sso_hws_head_wait(work->base);

 		rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
diff --git a/lib/eventdev/rte_event_dma_adapter.c b/lib/eventdev/rte_event_dma_adapter.c
index 24dff556db..e52ef46a1b 100644
--- a/lib/eventdev/rte_event_dma_adapter.c
+++ b/lib/eventdev/rte_event_dma_adapter.c
@@ -236,9 +236,9 @@ edma_circular_buffer_flush_to_dma_dev(struct event_dma_adapter *adapter,
 				      uint16_t vchan, uint16_t *nb_ops_flushed)
 {
 	struct rte_event_dma_adapter_op *op;
-	struct dma_vchan_info *tq;
 	uint16_t *head = &bufp->head;
 	uint16_t *tail = &bufp->tail;
+	struct dma_vchan_info *tq;
 	uint16_t n;
 	uint16_t i;
 	int ret;
@@ -257,11 +257,13 @@ edma_circular_buffer_flush_to_dma_dev(struct event_dma_adapter *adapter,
 	for (i = 0; i < n; i++)	{
 		op = bufp->op_buffer[*head];
 		if (op->nb_src == 1 && op->nb_dst == 1)
-			ret = rte_dma_copy(dma_dev_id, vchan, op->src_seg->addr, op->dst_seg->addr,
-					   op->src_seg->length, op->flags);
+			ret = rte_dma_copy(dma_dev_id, vchan, op->src_dst_seg[0].addr,
+					   op->src_dst_seg[1].addr, op->src_dst_seg[0].length,
+					   op->flags);
 		else
-			ret = rte_dma_copy_sg(dma_dev_id, vchan, op->src_seg, op->dst_seg,
-					      op->nb_src, op->nb_dst, op->flags);
+			ret = rte_dma_copy_sg(dma_dev_id, vchan, &op->src_dst_seg[0],
+					      &op->src_dst_seg[op->nb_src], op->nb_src, op->nb_dst,
+					      op->flags);
 		if (ret < 0)
 			break;

@@ -511,8 +513,7 @@ edma_enq_to_dma_dev(struct event_dma_adapter *adapter, struct rte_event *ev, uns
 		if (dma_op == NULL)
 			continue;

-		/* Expected to have response info appended to dma_op. */
-
+		dma_op->impl_opaque[0] = ev[i].event;
 		dma_dev_id = dma_op->dma_dev_id;
 		vchan = dma_op->vchan;
 		vchan_qinfo = &adapter->dma_devs[dma_dev_id].vchanq[vchan];
@@ -647,7 +648,6 @@ edma_ops_enqueue_burst(struct event_dma_adapter *adapter, struct rte_event_dma_a
 	uint8_t event_port_id = adapter->event_port_id;
 	uint8_t event_dev_id = adapter->eventdev_id;
 	struct rte_event events[DMA_BATCH_SIZE];
-	struct rte_event *response_info;
 	uint16_t nb_enqueued, nb_ev;
 	uint8_t retry;
 	uint8_t i;
@@ -659,16 +659,7 @@ edma_ops_enqueue_burst(struct event_dma_adapter *adapter, struct rte_event_dma_a
 	for (i = 0; i < num; i++) {
 		struct rte_event *ev = &events[nb_ev++];

-		/* Expected to have response info appended to dma_op. */
-		response_info = (struct rte_event *)((uint8_t *)ops[i] +
-							  sizeof(struct rte_event_dma_adapter_op));
-		if (unlikely(response_info == NULL)) {
-			if (ops[i] != NULL && ops[i]->op_mp != NULL)
-				rte_mempool_put(ops[i]->op_mp, ops[i]);
-			continue;
-		}
-
-		rte_memcpy(ev, response_info, sizeof(struct rte_event));
+		ev->event = ops[i]->impl_opaque[0];
 		ev->event_ptr = ops[i];
 		ev->event_type = RTE_EVENT_TYPE_DMADEV;
 		if (adapter->implicit_release_disabled)
diff --git a/lib/eventdev/rte_event_dma_adapter.h b/lib/eventdev/rte_event_dma_adapter.h
index e924ab673d..048ddba3f3 100644
--- a/lib/eventdev/rte_event_dma_adapter.h
+++ b/lib/eventdev/rte_event_dma_adapter.h
@@ -157,24 +157,46 @@ extern "C" {
  * instance.
  */
 struct rte_event_dma_adapter_op {
-	struct rte_dma_sge *src_seg;
-	/**< Source segments. */
-	struct rte_dma_sge *dst_seg;
-	/**< Destination segments. */
-	uint16_t nb_src;
-	/**< Number of source segments. */
-	uint16_t nb_dst;
-	/**< Number of destination segments. */
 	uint64_t flags;
 	/**< Flags related to the operation.
 	 * @see RTE_DMA_OP_FLAG_*
 	 */
-	int16_t dma_dev_id;
-	/**< DMA device ID to be used */
-	uint16_t vchan;
-	/**< DMA vchan ID to be used */
 	struct rte_mempool *op_mp;
 	/**< Mempool from which op is allocated. */
+	enum rte_dma_status_code status;
+	/**< Status code for this operation. */
+	uint32_t rsvd;
+	/**< Reserved for future use. */
+	uint64_t impl_opaque[2];
+	/**< Implementation-specific opaque data.
+	 * An dma device implementation use this field to hold
+	 * implementation specific values to share between dequeue and enqueue
+	 * operations.
+	 * The application should not modify this field.
+	 */
+	uint64_t user_meta;
+	/**<  Memory to store user specific metadata.
+	 * The dma device implementation should not modify this area.
+	 */
+	uint64_t event_meta;
+	/**< Event metadata that defines event attributes when used in OP_NEW mode.
+	 * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_NEW
+	 * @see struct rte_event::event
+	 */
+	int16_t dma_dev_id;
+	/**< DMA device ID to be used with OP_FORWARD mode.
+	 * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD
+	 */
+	uint16_t vchan;
+	/**< DMA vchan ID to be used with OP_FORWARD mode
+	 * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD
+	 */
+	uint16_t nb_src;
+	/**< Number of source segments. */
+	uint16_t nb_dst;
+	/**< Number of destination segments. */
+	struct rte_dma_sge src_dst_seg[0];
+	/**< Source and destination segments. */
 };

 /**
--
2.25.1


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v2 2/2] dma/cnxk: remove completion pool
  2024-04-17  5:58 ` [PATCH v2 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula
@ 2024-04-17  5:58   ` pbhagavatula
  2024-04-17  8:26   ` [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula
  1 sibling, 0 replies; 17+ messages in thread
From: pbhagavatula @ 2024-04-17  5:58 UTC (permalink / raw)
  To: jerinj, Vamsi Attunuru, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Use DMA ops to store metadata, remove use of completion pool.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Vamsi Attunuru <vattunuru@marvell.com>
---
 drivers/dma/cnxk/cnxk_dmadev.c           | 53 ++++++----------
 drivers/dma/cnxk/cnxk_dmadev.h           | 24 +------
 drivers/dma/cnxk/cnxk_dmadev_fp.c        | 79 +++++-------------------
 drivers/event/cnxk/cnxk_eventdev_adptr.c | 47 +++-----------
 4 files changed, 45 insertions(+), 158 deletions(-)

diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c
index 4ab3cfbdf2..dfd7222713 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.c
+++ b/drivers/dma/cnxk/cnxk_dmadev.c
@@ -2,6 +2,8 @@
  * Copyright (C) 2021 Marvell International Ltd.
  */
 
+#include <rte_event_dma_adapter.h>
+
 #include <cnxk_dmadev.h>
 
 static int cnxk_stats_reset(struct rte_dma_dev *dev, uint16_t vchan);
@@ -30,8 +32,7 @@ cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan)
 {
 	struct cnxk_dpi_conf *dpi_conf;
 	uint16_t num_vchans;
-	uint16_t max_desc;
-	int i, j;
+	int i;
 
 	if (vchan == RTE_DMA_ALL_VCHAN) {
 		num_vchans = dpivf->num_vchans;
@@ -46,12 +47,6 @@ cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan)
 
 	for (; i < num_vchans; i++) {
 		dpi_conf = &dpivf->conf[i];
-		max_desc = dpi_conf->c_desc.max_cnt + 1;
-		if (dpi_conf->c_desc.compl_ptr) {
-			for (j = 0; j < max_desc; j++)
-				rte_free(dpi_conf->c_desc.compl_ptr[j]);
-		}
-
 		rte_free(dpi_conf->c_desc.compl_ptr);
 		dpi_conf->c_desc.compl_ptr = NULL;
 	}
@@ -261,7 +256,7 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
 	if (max_desc > CNXK_DPI_MAX_DESC)
 		max_desc = CNXK_DPI_MAX_DESC;
 
-	size = (max_desc * sizeof(struct cnxk_dpi_compl_s *));
+	size = (max_desc * sizeof(uint8_t) * CNXK_DPI_COMPL_OFFSET);
 	dpi_conf->c_desc.compl_ptr = rte_zmalloc(NULL, size, 0);
 
 	if (dpi_conf->c_desc.compl_ptr == NULL) {
@@ -269,16 +264,8 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < max_desc; i++) {
-		dpi_conf->c_desc.compl_ptr[i] =
-			rte_zmalloc(NULL, sizeof(struct cnxk_dpi_compl_s), 0);
-		if (!dpi_conf->c_desc.compl_ptr[i]) {
-			plt_err("Failed to allocate for descriptor memory");
-			return -ENOMEM;
-		}
-
-		dpi_conf->c_desc.compl_ptr[i]->cdata = CNXK_DPI_REQ_CDATA;
-	}
+	for (i = 0; i < max_desc; i++)
+		dpi_conf->c_desc.compl_ptr[i * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
 
 	dpi_conf->c_desc.max_cnt = (max_desc - 1);
 
@@ -301,10 +288,8 @@ cnxk_dmadev_start(struct rte_dma_dev *dev)
 		dpi_conf->pnum_words = 0;
 		dpi_conf->pending = 0;
 		dpi_conf->desc_idx = 0;
-		for (j = 0; j < dpi_conf->c_desc.max_cnt + 1; j++) {
-			if (dpi_conf->c_desc.compl_ptr[j])
-				dpi_conf->c_desc.compl_ptr[j]->cdata = CNXK_DPI_REQ_CDATA;
-		}
+		for (j = 0; j < dpi_conf->c_desc.max_cnt + 1; j++)
+			dpi_conf->c_desc.compl_ptr[j * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
 		nb_desc += dpi_conf->c_desc.max_cnt + 1;
 		cnxk_stats_reset(dev, i);
 		dpi_conf->completed_offset = 0;
@@ -382,22 +367,22 @@ cnxk_dmadev_completed(void *dev_private, uint16_t vchan, const uint16_t nb_cpls,
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	struct cnxk_dpi_cdesc_data_s *c_desc = &dpi_conf->c_desc;
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t status;
 	int cnt;
 
 	for (cnt = 0; cnt < nb_cpls; cnt++) {
-		comp_ptr = c_desc->compl_ptr[c_desc->head];
-
-		if (comp_ptr->cdata) {
-			if (comp_ptr->cdata == CNXK_DPI_REQ_CDATA)
+		status = c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET];
+		if (status) {
+			if (status == CNXK_DPI_REQ_CDATA)
 				break;
 			*has_error = 1;
 			dpi_conf->stats.errors++;
+			c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] =
+				CNXK_DPI_REQ_CDATA;
 			CNXK_DPI_STRM_INC(*c_desc, head);
 			break;
 		}
-
-		comp_ptr->cdata = CNXK_DPI_REQ_CDATA;
+		c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
 		CNXK_DPI_STRM_INC(*c_desc, head);
 	}
 
@@ -414,19 +399,17 @@ cnxk_dmadev_completed_status(void *dev_private, uint16_t vchan, const uint16_t n
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	struct cnxk_dpi_cdesc_data_s *c_desc = &dpi_conf->c_desc;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	int cnt;
 
 	for (cnt = 0; cnt < nb_cpls; cnt++) {
-		comp_ptr = c_desc->compl_ptr[c_desc->head];
-		status[cnt] = comp_ptr->cdata;
+		status[cnt] = c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET];
 		if (status[cnt]) {
 			if (status[cnt] == CNXK_DPI_REQ_CDATA)
 				break;
 
 			dpi_conf->stats.errors++;
 		}
-		comp_ptr->cdata = CNXK_DPI_REQ_CDATA;
+		c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
 		CNXK_DPI_STRM_INC(*c_desc, head);
 	}
 
@@ -593,7 +576,7 @@ cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_de
 	rdpi = &dpivf->rdpi;
 
 	rdpi->pci_dev = pci_dev;
-	rc = roc_dpi_dev_init(rdpi, offsetof(struct cnxk_dpi_compl_s, wqecs));
+	rc = roc_dpi_dev_init(rdpi, offsetof(struct rte_event_dma_adapter_op, impl_opaque));
 	if (rc < 0)
 		goto err_out_free;
 
diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h
index 610a360ba2..a80db333a0 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.h
+++ b/drivers/dma/cnxk/cnxk_dmadev.h
@@ -37,17 +37,12 @@
 #define CNXK_DPI_MAX_CMD_SZ		    CNXK_DPI_CMD_LEN(CNXK_DPI_MAX_POINTER,		\
 							     CNXK_DPI_MAX_POINTER)
 #define CNXK_DPI_CHUNKS_FROM_DESC(cz, desc) (((desc) / (((cz) / 8) / CNXK_DPI_MAX_CMD_SZ)) + 1)
-
+#define CNXK_DPI_COMPL_OFFSET		    ROC_CACHE_LINE_SZ
 /* Set Completion data to 0xFF when request submitted,
  * upon successful request completion engine reset to completion status
  */
 #define CNXK_DPI_REQ_CDATA 0xFF
 
-/* Set Completion data to 0xDEADBEEF when request submitted for SSO.
- * This helps differentiate if the dequeue is called after cnxk enueue.
- */
-#define CNXK_DPI_REQ_SSO_CDATA    0xDEADBEEF
-
 union cnxk_dpi_instr_cmd {
 	uint64_t u;
 	struct cn9k_dpi_instr_cmd {
@@ -91,24 +86,11 @@ union cnxk_dpi_instr_cmd {
 	} cn10k;
 };
 
-struct cnxk_dpi_compl_s {
-	uint64_t cdata;
-	void *op;
-	uint16_t dev_id;
-	uint16_t vchan;
-	uint32_t wqecs;
-};
-
 struct cnxk_dpi_cdesc_data_s {
-	struct cnxk_dpi_compl_s **compl_ptr;
 	uint16_t max_cnt;
 	uint16_t head;
 	uint16_t tail;
-};
-
-struct cnxk_dma_adapter_info {
-	bool enabled;               /* Set if vchan queue is added to dma adapter. */
-	struct rte_mempool *req_mp; /* DMA inflight request mempool. */
+	uint8_t *compl_ptr;
 };
 
 struct cnxk_dpi_conf {
@@ -119,7 +101,7 @@ struct cnxk_dpi_conf {
 	uint16_t desc_idx;
 	struct rte_dma_stats stats;
 	uint64_t completed_offset;
-	struct cnxk_dma_adapter_info adapter_info;
+	bool adapter_enabled;
 };
 
 struct cnxk_dpi_vf_s {
diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c
index 9f7f9b2eed..38f4524439 100644
--- a/drivers/dma/cnxk/cnxk_dmadev_fp.c
+++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c
@@ -245,14 +245,14 @@ cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t d
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	uint64_t cmd[CNXK_DPI_DW_PER_SINGLE_CMD];
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t *comp_ptr;
 	int rc;
 
 	if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) ==
 		     dpi_conf->c_desc.head))
 		return -ENOSPC;
 
-	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
+	comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET];
 	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);
 
 	cmd[0] = (1UL << 54) | (1UL << 48);
@@ -301,7 +301,7 @@ cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	const struct rte_dma_sge *fptr, *lptr;
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t *comp_ptr;
 	uint64_t hdr[4];
 	int rc;
 
@@ -309,7 +309,7 @@ cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 		     dpi_conf->c_desc.head))
 		return -ENOSPC;
 
-	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
+	comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET];
 	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);
 
 	hdr[1] = dpi_conf->cmd.u | ((flags & RTE_DMA_OP_FLAG_AUTO_FREE) << 37);
@@ -357,14 +357,14 @@ cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	uint64_t cmd[CNXK_DPI_DW_PER_SINGLE_CMD];
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t *comp_ptr;
 	int rc;
 
 	if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) ==
 		     dpi_conf->c_desc.head))
 		return -ENOSPC;
 
-	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
+	comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET];
 	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);
 
 	cmd[0] = dpi_conf->cmd.u | (1U << 6) | 1U;
@@ -403,7 +403,7 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 {
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t *comp_ptr;
 	uint64_t hdr[4];
 	int rc;
 
@@ -411,7 +411,7 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 		     dpi_conf->c_desc.head))
 		return -ENOSPC;
 
-	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
+	comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET];
 	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);
 
 	hdr[0] = dpi_conf->cmd.u | (nb_dst << 6) | nb_src;
@@ -454,7 +454,6 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 {
 	const struct rte_dma_sge *src, *dst;
 	struct rte_event_dma_adapter_op *op;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
 	struct cn10k_sso_hws *work;
@@ -471,20 +470,12 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 		dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
 		dpi_conf = &dpivf->conf[op->vchan];
 
-		if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
-			return count;
-
-		comp_ptr->op = op;
-		comp_ptr->dev_id = op->dma_dev_id;
-		comp_ptr->vchan = op->vchan;
-		comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA;
-
 		nb_src = op->nb_src & CNXK_DPI_MAX_POINTER;
 		nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER;
 
 		hdr[0] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 54);
 		hdr[0] |= (nb_dst << 6) | nb_src;
-		hdr[1] = ((uint64_t)comp_ptr);
+		hdr[1] = (uint64_t)op;
 		hdr[2] = cnxk_dma_adapter_format_event(ev[count].event);
 
 		src = &op->src_dst_seg[0];
@@ -524,7 +515,6 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event
 {
 	const struct rte_dma_sge *fptr, *lptr;
 	struct rte_event_dma_adapter_op *op;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cn9k_sso_hws_dual *work;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
@@ -544,16 +534,8 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event
 		dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
 		dpi_conf = &dpivf->conf[op->vchan];
 
-		if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
-			return count;
-
-		comp_ptr->op = op;
-		comp_ptr->dev_id = op->dma_dev_id;
-		comp_ptr->vchan = op->vchan;
-		comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA;
-
 		hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36);
-		hdr[2] = (uint64_t)comp_ptr;
+		hdr[2] = (uint64_t)op;
 
 		nb_src = op->nb_src & CNXK_DPI_MAX_POINTER;
 		nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER;
@@ -605,7 +587,6 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 {
 	const struct rte_dma_sge *fptr, *lptr;
 	struct rte_event_dma_adapter_op *op;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
 	struct cn9k_sso_hws *work;
@@ -622,16 +603,8 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 		dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
 		dpi_conf = &dpivf->conf[op->vchan];
 
-		if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
-			return count;
-
-		comp_ptr->op = op;
-		comp_ptr->dev_id = op->dma_dev_id;
-		comp_ptr->vchan = op->vchan;
-		comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA;
-
 		hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36);
-		hdr[2] = (uint64_t)comp_ptr;
+		hdr[2] = (uint64_t)op;
 
 		nb_src = op->nb_src & CNXK_DPI_MAX_POINTER;
 		nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER;
@@ -682,38 +655,20 @@ uintptr_t
 cnxk_dma_adapter_dequeue(uintptr_t get_work1)
 {
 	struct rte_event_dma_adapter_op *op;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
-	rte_mcslock_t mcs_lock_me;
-	RTE_ATOMIC(uint8_t) *wqecs;
-
-	comp_ptr = (struct cnxk_dpi_compl_s *)get_work1;
-
-	/* Dequeue can be called without calling cnx_enqueue in case of
-	 * dma_adapter. When its called from adapter, dma op will not be
-	 * embedded in completion pointer. In those cases return op.
-	 */
-	if (comp_ptr->cdata != CNXK_DPI_REQ_SSO_CDATA)
-		return (uintptr_t)comp_ptr;
 
-	dpivf =	rte_dma_fp_objs[comp_ptr->dev_id].dev_private;
-	dpi_conf = &dpivf->conf[comp_ptr->vchan];
+	op = (struct rte_event_dma_adapter_op *)get_work1;
+	dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
+	dpi_conf = &dpivf->conf[op->vchan];
 
-	rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
-	wqecs = (uint8_t __rte_atomic *)&comp_ptr->wqecs;
-	if (rte_atomic_load_explicit(wqecs, rte_memory_order_relaxed) != 0)
-		dpi_conf->stats.errors++;
+	if (rte_atomic_load_explicit(&op->impl_opaque[0], rte_memory_order_relaxed) != 0)
+		rte_atomic_fetch_add_explicit(&dpi_conf->stats.errors, 1, rte_memory_order_relaxed);
 
 	/* Take into account errors also. This is similar to
 	 * cnxk_dmadev_completed_status().
 	 */
-	dpi_conf->stats.completed++;
-	rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me);
-
-	op = (struct rte_event_dma_adapter_op *)comp_ptr->op;
-
-	rte_mempool_put(dpi_conf->adapter_info.req_mp, comp_ptr);
+	rte_atomic_fetch_add_explicit(&dpi_conf->stats.completed, 1, rte_memory_order_relaxed);
 
 	return (uintptr_t)op;
 }
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index a2a59b16c9..98db11ad61 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -739,31 +739,6 @@ cnxk_crypto_adapter_qp_del(const struct rte_cryptodev *cdev,
 	return 0;
 }
 
-static int
-dma_adapter_vchan_setup(const int16_t dma_dev_id, struct cnxk_dpi_conf *vchan,
-			uint16_t vchan_id)
-{
-	char name[RTE_MEMPOOL_NAMESIZE];
-	uint32_t cache_size, nb_req;
-	unsigned int req_size;
-
-	snprintf(name, RTE_MEMPOOL_NAMESIZE, "cnxk_dma_req_%u:%u", dma_dev_id, vchan_id);
-	req_size = sizeof(struct cnxk_dpi_compl_s);
-
-	nb_req = vchan->c_desc.max_cnt;
-	cache_size = 16;
-	nb_req += (cache_size * rte_lcore_count());
-
-	vchan->adapter_info.req_mp = rte_mempool_create(name, nb_req, req_size, cache_size, 0,
-							NULL, NULL, NULL, NULL, rte_socket_id(), 0);
-	if (vchan->adapter_info.req_mp == NULL)
-		return -ENOMEM;
-
-	vchan->adapter_info.enabled = true;
-
-	return 0;
-}
-
 int
 cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev,
 			   const int16_t dma_dev_id, uint16_t vchan_id)
@@ -772,7 +747,6 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev,
 	uint32_t adptr_xae_cnt = 0;
 	struct cnxk_dpi_vf_s *dpivf;
 	struct cnxk_dpi_conf *vchan;
-	int ret;
 
 	dpivf = rte_dma_fp_objs[dma_dev_id].dev_private;
 	if ((int16_t)vchan_id == -1) {
@@ -780,19 +754,13 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev,
 
 		for (vchan_id = 0; vchan_id < dpivf->num_vchans; vchan_id++) {
 			vchan = &dpivf->conf[vchan_id];
-			ret = dma_adapter_vchan_setup(dma_dev_id, vchan, vchan_id);
-			if (ret) {
-				cnxk_dma_adapter_vchan_del(dma_dev_id, -1);
-				return ret;
-			}
-			adptr_xae_cnt += vchan->adapter_info.req_mp->size;
+			vchan->adapter_enabled = true;
+			adptr_xae_cnt += vchan->c_desc.max_cnt;
 		}
 	} else {
 		vchan = &dpivf->conf[vchan_id];
-		ret = dma_adapter_vchan_setup(dma_dev_id, vchan, vchan_id);
-		if (ret)
-			return ret;
-		adptr_xae_cnt = vchan->adapter_info.req_mp->size;
+		vchan->adapter_enabled = true;
+		adptr_xae_cnt = vchan->c_desc.max_cnt;
 	}
 
 	/* Update dma adapter XAE count */
@@ -805,8 +773,7 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev,
 static int
 dma_adapter_vchan_free(struct cnxk_dpi_conf *vchan)
 {
-	rte_mempool_free(vchan->adapter_info.req_mp);
-	vchan->adapter_info.enabled = false;
+	vchan->adapter_enabled = false;
 
 	return 0;
 }
@@ -823,12 +790,12 @@ cnxk_dma_adapter_vchan_del(const int16_t dma_dev_id, uint16_t vchan_id)
 
 		for (vchan_id = 0; vchan_id < dpivf->num_vchans; vchan_id++) {
 			vchan = &dpivf->conf[vchan_id];
-			if (vchan->adapter_info.enabled)
+			if (vchan->adapter_enabled)
 				dma_adapter_vchan_free(vchan);
 		}
 	} else {
 		vchan = &dpivf->conf[vchan_id];
-		if (vchan->adapter_info.enabled)
+		if (vchan->adapter_enabled)
 			dma_adapter_vchan_free(vchan);
 	}
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops
  2024-04-17  5:58 ` [PATCH v2 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula
  2024-04-17  5:58   ` [PATCH v2 2/2] dma/cnxk: remove completion pool pbhagavatula
@ 2024-04-17  8:26   ` pbhagavatula
  2024-04-17  8:26     ` [PATCH v3 2/2] dma/cnxk: remove completion pool pbhagavatula
                       ` (2 more replies)
  1 sibling, 3 replies; 17+ messages in thread
From: pbhagavatula @ 2024-04-17  8:26 UTC (permalink / raw)
  To: jerinj, Amit Prakash Shukla, Vamsi Attunuru; +Cc: dev, Pavan Nikhilesh

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Re-organize event DMA ops structure to allow holding
source and destination pointers without the need for
additional memory, the mempool allocating memory for
rte_event_dma_adapter_ops can size the structure to
accommodate all the needed source and destination
pointers.

Add multiple words for holding user metadata, adapter
implementation specific metadata and event metadata.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 v3 Changes:
 - Fix stdatomic compilation.
 v2 Changes:
 - Fix 32bit compilation

 app/test-eventdev/test_perf_common.c        | 26 ++++--------
 app/test/test_event_dma_adapter.c           | 20 +++------
 doc/guides/prog_guide/event_dma_adapter.rst |  2 +-
 drivers/dma/cnxk/cnxk_dmadev_fp.c           | 39 +++++++----------
 lib/eventdev/rte_event_dma_adapter.c        | 27 ++++--------
 lib/eventdev/rte_event_dma_adapter.h        | 46 +++++++++++++++------
 6 files changed, 72 insertions(+), 88 deletions(-)

diff --git a/app/test-eventdev/test_perf_common.c b/app/test-eventdev/test_perf_common.c
index 93e6132de8..db0f9c1f3b 100644
--- a/app/test-eventdev/test_perf_common.c
+++ b/app/test-eventdev/test_perf_common.c
@@ -1503,7 +1503,6 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
 		prod = 0;
 		for (; port < perf_nb_event_ports(opt); port++) {
 			struct prod_data *p = &t->prod[port];
-			struct rte_event *response_info;
 			uint32_t flow_id;

 			p->dev_id = opt->dev_id;
@@ -1523,13 +1522,10 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
 			for (flow_id = 0; flow_id < t->nb_flows; flow_id++) {
 				rte_mempool_get(t->da_op_pool, (void **)&op);

-				op->src_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0);
-				op->dst_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0);
-
-				op->src_seg->addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
-				op->dst_seg->addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
-				op->src_seg->length = 1024;
-				op->dst_seg->length = 1024;
+				op->src_dst_seg[0].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
+				op->src_dst_seg[1].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
+				op->src_dst_seg[0].length = 1024;
+				op->src_dst_seg[1].length = 1024;
 				op->nb_src = 1;
 				op->nb_dst = 1;
 				op->flags = RTE_DMA_OP_FLAG_SUBMIT;
@@ -1537,12 +1533,6 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
 				op->dma_dev_id = dma_dev_id;
 				op->vchan = vchan_id;

-				response_info = (struct rte_event *)((uint8_t *)op +
-						 sizeof(struct rte_event_dma_adapter_op));
-				response_info->queue_id = p->queue_id;
-				response_info->sched_type = RTE_SCHED_TYPE_ATOMIC;
-				response_info->flow_id = flow_id;
-
 				p->da.dma_op[flow_id] = op;
 			}

@@ -2036,7 +2026,7 @@ perf_dmadev_setup(struct evt_test *test, struct evt_options *opt)
 		return -ENODEV;
 	}

-	elt_size = sizeof(struct rte_event_dma_adapter_op) + sizeof(struct rte_event);
+	elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2);
 	t->da_op_pool = rte_mempool_create("dma_op_pool", opt->pool_sz, elt_size, 256,
 					   0, NULL, NULL, NULL, NULL, rte_socket_id(), 0);
 	if (t->da_op_pool == NULL) {
@@ -2085,10 +2075,8 @@ perf_dmadev_destroy(struct evt_test *test, struct evt_options *opt)
 		for (flow_id = 0; flow_id < t->nb_flows; flow_id++) {
 			op = p->da.dma_op[flow_id];

-			rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_seg->addr);
-			rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->dst_seg->addr);
-			rte_free(op->src_seg);
-			rte_free(op->dst_seg);
+			rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_dst_seg[0].addr);
+			rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_dst_seg[1].addr);
 			rte_mempool_put(op->op_mp, op);
 		}

diff --git a/app/test/test_event_dma_adapter.c b/app/test/test_event_dma_adapter.c
index 35b417b69f..d9dff4ff7d 100644
--- a/app/test/test_event_dma_adapter.c
+++ b/app/test/test_event_dma_adapter.c
@@ -235,7 +235,6 @@ test_op_forward_mode(void)
 	struct rte_mbuf *dst_mbuf[TEST_MAX_OP];
 	struct rte_event_dma_adapter_op *op;
 	struct rte_event ev[TEST_MAX_OP];
-	struct rte_event response_info;
 	int ret, i;

 	ret = rte_pktmbuf_alloc_bulk(params.src_mbuf_pool, src_mbuf, TEST_MAX_OP);
@@ -253,14 +252,11 @@ test_op_forward_mode(void)
 		rte_mempool_get(params.op_mpool, (void **)&op);
 		TEST_ASSERT_NOT_NULL(op, "Failed to allocate dma operation struct\n");

-		op->src_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0);
-		op->dst_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0);
-
 		/* Update Op */
-		op->src_seg->addr = rte_pktmbuf_iova(src_mbuf[i]);
-		op->dst_seg->addr = rte_pktmbuf_iova(dst_mbuf[i]);
-		op->src_seg->length = PACKET_LENGTH;
-		op->dst_seg->length = PACKET_LENGTH;
+		op->src_dst_seg[0].addr = rte_pktmbuf_iova(src_mbuf[i]);
+		op->src_dst_seg[1].addr = rte_pktmbuf_iova(dst_mbuf[i]);
+		op->src_dst_seg[0].length = PACKET_LENGTH;
+		op->src_dst_seg[1].length = PACKET_LENGTH;
 		op->nb_src = 1;
 		op->nb_dst = 1;
 		op->flags = RTE_DMA_OP_FLAG_SUBMIT;
@@ -268,10 +264,6 @@ test_op_forward_mode(void)
 		op->dma_dev_id = TEST_DMA_DEV_ID;
 		op->vchan = TEST_DMA_VCHAN_ID;

-		response_info.event = dma_response_info.event;
-		rte_memcpy((uint8_t *)op + sizeof(struct rte_event_dma_adapter_op), &response_info,
-			   sizeof(struct rte_event));
-
 		/* Fill in event info and update event_ptr with rte_event_dma_adapter_op */
 		memset(&ev[i], 0, sizeof(struct rte_event));
 		ev[i].event = 0;
@@ -294,8 +286,6 @@ test_op_forward_mode(void)

 		TEST_ASSERT_EQUAL(ret, 0, "Data mismatch for dma adapter\n");

-		rte_free(op->src_seg);
-		rte_free(op->dst_seg);
 		rte_mempool_put(op->op_mp, op);
 	}

@@ -400,7 +390,7 @@ configure_dmadev(void)
 						       rte_socket_id());
 	RTE_TEST_ASSERT_NOT_NULL(params.dst_mbuf_pool, "Can't create DMA_DST_MBUFPOOL\n");

-	elt_size = sizeof(struct rte_event_dma_adapter_op) + sizeof(struct rte_event);
+	elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2);
 	params.op_mpool = rte_mempool_create("EVENT_DMA_OP_POOL", DMA_OP_POOL_SIZE, elt_size, 0,
 					     0, NULL, NULL, NULL, NULL, rte_socket_id(), 0);
 	RTE_TEST_ASSERT_NOT_NULL(params.op_mpool, "Can't create DMA_OP_POOL\n");
diff --git a/doc/guides/prog_guide/event_dma_adapter.rst b/doc/guides/prog_guide/event_dma_adapter.rst
index 3443b6a803..1fb9b0a07b 100644
--- a/doc/guides/prog_guide/event_dma_adapter.rst
+++ b/doc/guides/prog_guide/event_dma_adapter.rst
@@ -144,7 +144,7 @@ on which it enqueues events towards the DMA adapter using ``rte_event_enqueue_bu
    uint32_t cap;
    int ret;

-   /* Fill in event info and update event_ptr with rte_dma_op */
+   /* Fill in event info and update event_ptr with rte_event_dma_adapter_op */
    memset(&ev, 0, sizeof(ev));
    .
    .
diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c
index f6562b603e..9f7f9b2eed 100644
--- a/drivers/dma/cnxk/cnxk_dmadev_fp.c
+++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c
@@ -457,7 +457,6 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
-	struct rte_event *rsp_info;
 	struct cn10k_sso_hws *work;
 	uint16_t nb_src, nb_dst;
 	rte_mcslock_t mcs_lock_me;
@@ -469,9 +468,7 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)

 	for (count = 0; count < nb_events; count++) {
 		op = ev[count].event_ptr;
-		rsp_info = (struct rte_event *)((uint8_t *)op +
-			     sizeof(struct rte_event_dma_adapter_op));
-		dpivf =	rte_dma_fp_objs[op->dma_dev_id].dev_private;
+		dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
 		dpi_conf = &dpivf->conf[op->vchan];

 		if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
@@ -488,15 +485,14 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 		hdr[0] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 54);
 		hdr[0] |= (nb_dst << 6) | nb_src;
 		hdr[1] = ((uint64_t)comp_ptr);
-		hdr[2] = cnxk_dma_adapter_format_event(rsp_info->event);
+		hdr[2] = cnxk_dma_adapter_format_event(ev[count].event);

-		src = &op->src_seg[0];
-		dst = &op->dst_seg[0];
+		src = &op->src_dst_seg[0];
+		dst = &op->src_dst_seg[op->nb_src];

 		if (CNXK_TAG_IS_HEAD(work->gw_rdata) ||
 		    ((CNXK_TT_FROM_TAG(work->gw_rdata) == SSO_TT_ORDERED) &&
-		    (rsp_info->sched_type & DPI_HDR_TT_MASK) ==
-			    RTE_SCHED_TYPE_ORDERED))
+		     (ev[count].sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED))
 			roc_sso_hws_head_wait(work->base);

 		rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
@@ -566,12 +562,12 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event
 		 * For all other cases, src pointers are first pointers.
 		 */
 		if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) {
-			fptr = &op->dst_seg[0];
-			lptr = &op->src_seg[0];
+			fptr = &op->src_dst_seg[nb_src];
+			lptr = &op->src_dst_seg[0];
 			RTE_SWAP(nb_src, nb_dst);
 		} else {
-			fptr = &op->src_seg[0];
-			lptr = &op->dst_seg[0];
+			fptr = &op->src_dst_seg[0];
+			lptr = &op->src_dst_seg[nb_src];
 		}

 		hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48;
@@ -612,7 +608,6 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
-	struct rte_event *rsp_info;
 	struct cn9k_sso_hws *work;
 	uint16_t nb_src, nb_dst;
 	rte_mcslock_t mcs_lock_me;
@@ -624,9 +619,7 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)

 	for (count = 0; count < nb_events; count++) {
 		op = ev[count].event_ptr;
-		rsp_info = (struct rte_event *)((uint8_t *)op +
-			    sizeof(struct rte_event_dma_adapter_op));
-		dpivf =	rte_dma_fp_objs[op->dma_dev_id].dev_private;
+		dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
 		dpi_conf = &dpivf->conf[op->vchan];

 		if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
@@ -647,18 +640,18 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 		 * For all other cases, src pointers are first pointers.
 		 */
 		if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) {
-			fptr = &op->dst_seg[0];
-			lptr = &op->src_seg[0];
+			fptr = &op->src_dst_seg[nb_src];
+			lptr = &op->src_dst_seg[0];
 			RTE_SWAP(nb_src, nb_dst);
 		} else {
-			fptr = &op->src_seg[0];
-			lptr = &op->dst_seg[0];
+			fptr = &op->src_dst_seg[0];
+			lptr = &op->src_dst_seg[nb_src];
 		}

 		hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48;
-		hdr[0] |= cnxk_dma_adapter_format_event(rsp_info->event);
+		hdr[0] |= cnxk_dma_adapter_format_event(ev[count].event);

-		if ((rsp_info->sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED)
+		if ((ev[count].sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED)
 			roc_sso_hws_head_wait(work->base);

 		rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
diff --git a/lib/eventdev/rte_event_dma_adapter.c b/lib/eventdev/rte_event_dma_adapter.c
index 24dff556db..e52ef46a1b 100644
--- a/lib/eventdev/rte_event_dma_adapter.c
+++ b/lib/eventdev/rte_event_dma_adapter.c
@@ -236,9 +236,9 @@ edma_circular_buffer_flush_to_dma_dev(struct event_dma_adapter *adapter,
 				      uint16_t vchan, uint16_t *nb_ops_flushed)
 {
 	struct rte_event_dma_adapter_op *op;
-	struct dma_vchan_info *tq;
 	uint16_t *head = &bufp->head;
 	uint16_t *tail = &bufp->tail;
+	struct dma_vchan_info *tq;
 	uint16_t n;
 	uint16_t i;
 	int ret;
@@ -257,11 +257,13 @@ edma_circular_buffer_flush_to_dma_dev(struct event_dma_adapter *adapter,
 	for (i = 0; i < n; i++)	{
 		op = bufp->op_buffer[*head];
 		if (op->nb_src == 1 && op->nb_dst == 1)
-			ret = rte_dma_copy(dma_dev_id, vchan, op->src_seg->addr, op->dst_seg->addr,
-					   op->src_seg->length, op->flags);
+			ret = rte_dma_copy(dma_dev_id, vchan, op->src_dst_seg[0].addr,
+					   op->src_dst_seg[1].addr, op->src_dst_seg[0].length,
+					   op->flags);
 		else
-			ret = rte_dma_copy_sg(dma_dev_id, vchan, op->src_seg, op->dst_seg,
-					      op->nb_src, op->nb_dst, op->flags);
+			ret = rte_dma_copy_sg(dma_dev_id, vchan, &op->src_dst_seg[0],
+					      &op->src_dst_seg[op->nb_src], op->nb_src, op->nb_dst,
+					      op->flags);
 		if (ret < 0)
 			break;

@@ -511,8 +513,7 @@ edma_enq_to_dma_dev(struct event_dma_adapter *adapter, struct rte_event *ev, uns
 		if (dma_op == NULL)
 			continue;

-		/* Expected to have response info appended to dma_op. */
-
+		dma_op->impl_opaque[0] = ev[i].event;
 		dma_dev_id = dma_op->dma_dev_id;
 		vchan = dma_op->vchan;
 		vchan_qinfo = &adapter->dma_devs[dma_dev_id].vchanq[vchan];
@@ -647,7 +648,6 @@ edma_ops_enqueue_burst(struct event_dma_adapter *adapter, struct rte_event_dma_a
 	uint8_t event_port_id = adapter->event_port_id;
 	uint8_t event_dev_id = adapter->eventdev_id;
 	struct rte_event events[DMA_BATCH_SIZE];
-	struct rte_event *response_info;
 	uint16_t nb_enqueued, nb_ev;
 	uint8_t retry;
 	uint8_t i;
@@ -659,16 +659,7 @@ edma_ops_enqueue_burst(struct event_dma_adapter *adapter, struct rte_event_dma_a
 	for (i = 0; i < num; i++) {
 		struct rte_event *ev = &events[nb_ev++];

-		/* Expected to have response info appended to dma_op. */
-		response_info = (struct rte_event *)((uint8_t *)ops[i] +
-							  sizeof(struct rte_event_dma_adapter_op));
-		if (unlikely(response_info == NULL)) {
-			if (ops[i] != NULL && ops[i]->op_mp != NULL)
-				rte_mempool_put(ops[i]->op_mp, ops[i]);
-			continue;
-		}
-
-		rte_memcpy(ev, response_info, sizeof(struct rte_event));
+		ev->event = ops[i]->impl_opaque[0];
 		ev->event_ptr = ops[i];
 		ev->event_type = RTE_EVENT_TYPE_DMADEV;
 		if (adapter->implicit_release_disabled)
diff --git a/lib/eventdev/rte_event_dma_adapter.h b/lib/eventdev/rte_event_dma_adapter.h
index e924ab673d..048ddba3f3 100644
--- a/lib/eventdev/rte_event_dma_adapter.h
+++ b/lib/eventdev/rte_event_dma_adapter.h
@@ -157,24 +157,46 @@ extern "C" {
  * instance.
  */
 struct rte_event_dma_adapter_op {
-	struct rte_dma_sge *src_seg;
-	/**< Source segments. */
-	struct rte_dma_sge *dst_seg;
-	/**< Destination segments. */
-	uint16_t nb_src;
-	/**< Number of source segments. */
-	uint16_t nb_dst;
-	/**< Number of destination segments. */
 	uint64_t flags;
 	/**< Flags related to the operation.
 	 * @see RTE_DMA_OP_FLAG_*
 	 */
-	int16_t dma_dev_id;
-	/**< DMA device ID to be used */
-	uint16_t vchan;
-	/**< DMA vchan ID to be used */
 	struct rte_mempool *op_mp;
 	/**< Mempool from which op is allocated. */
+	enum rte_dma_status_code status;
+	/**< Status code for this operation. */
+	uint32_t rsvd;
+	/**< Reserved for future use. */
+	uint64_t impl_opaque[2];
+	/**< Implementation-specific opaque data.
+	 * An dma device implementation use this field to hold
+	 * implementation specific values to share between dequeue and enqueue
+	 * operations.
+	 * The application should not modify this field.
+	 */
+	uint64_t user_meta;
+	/**<  Memory to store user specific metadata.
+	 * The dma device implementation should not modify this area.
+	 */
+	uint64_t event_meta;
+	/**< Event metadata that defines event attributes when used in OP_NEW mode.
+	 * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_NEW
+	 * @see struct rte_event::event
+	 */
+	int16_t dma_dev_id;
+	/**< DMA device ID to be used with OP_FORWARD mode.
+	 * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD
+	 */
+	uint16_t vchan;
+	/**< DMA vchan ID to be used with OP_FORWARD mode
+	 * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD
+	 */
+	uint16_t nb_src;
+	/**< Number of source segments. */
+	uint16_t nb_dst;
+	/**< Number of destination segments. */
+	struct rte_dma_sge src_dst_seg[0];
+	/**< Source and destination segments. */
 };

 /**
--
2.25.1


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v3 2/2] dma/cnxk: remove completion pool
  2024-04-17  8:26   ` [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula
@ 2024-04-17  8:26     ` pbhagavatula
  2024-05-16  7:39     ` [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops Amit Prakash Shukla
  2024-05-30 12:44     ` [PATCH v4 " pbhagavatula
  2 siblings, 0 replies; 17+ messages in thread
From: pbhagavatula @ 2024-04-17  8:26 UTC (permalink / raw)
  To: jerinj, Vamsi Attunuru, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Use DMA ops to store metadata, remove use of completion pool.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Vamsi Attunuru <vattunuru@marvell.com>
---
 drivers/dma/cnxk/cnxk_dmadev.c           | 53 ++++++---------
 drivers/dma/cnxk/cnxk_dmadev.h           | 24 +------
 drivers/dma/cnxk/cnxk_dmadev_fp.c        | 82 ++++++------------------
 drivers/event/cnxk/cnxk_eventdev_adptr.c | 47 ++------------
 4 files changed, 48 insertions(+), 158 deletions(-)

diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c
index 4ab3cfbdf2..dfd7222713 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.c
+++ b/drivers/dma/cnxk/cnxk_dmadev.c
@@ -2,6 +2,8 @@
  * Copyright (C) 2021 Marvell International Ltd.
  */
 
+#include <rte_event_dma_adapter.h>
+
 #include <cnxk_dmadev.h>
 
 static int cnxk_stats_reset(struct rte_dma_dev *dev, uint16_t vchan);
@@ -30,8 +32,7 @@ cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan)
 {
 	struct cnxk_dpi_conf *dpi_conf;
 	uint16_t num_vchans;
-	uint16_t max_desc;
-	int i, j;
+	int i;
 
 	if (vchan == RTE_DMA_ALL_VCHAN) {
 		num_vchans = dpivf->num_vchans;
@@ -46,12 +47,6 @@ cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan)
 
 	for (; i < num_vchans; i++) {
 		dpi_conf = &dpivf->conf[i];
-		max_desc = dpi_conf->c_desc.max_cnt + 1;
-		if (dpi_conf->c_desc.compl_ptr) {
-			for (j = 0; j < max_desc; j++)
-				rte_free(dpi_conf->c_desc.compl_ptr[j]);
-		}
-
 		rte_free(dpi_conf->c_desc.compl_ptr);
 		dpi_conf->c_desc.compl_ptr = NULL;
 	}
@@ -261,7 +256,7 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
 	if (max_desc > CNXK_DPI_MAX_DESC)
 		max_desc = CNXK_DPI_MAX_DESC;
 
-	size = (max_desc * sizeof(struct cnxk_dpi_compl_s *));
+	size = (max_desc * sizeof(uint8_t) * CNXK_DPI_COMPL_OFFSET);
 	dpi_conf->c_desc.compl_ptr = rte_zmalloc(NULL, size, 0);
 
 	if (dpi_conf->c_desc.compl_ptr == NULL) {
@@ -269,16 +264,8 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < max_desc; i++) {
-		dpi_conf->c_desc.compl_ptr[i] =
-			rte_zmalloc(NULL, sizeof(struct cnxk_dpi_compl_s), 0);
-		if (!dpi_conf->c_desc.compl_ptr[i]) {
-			plt_err("Failed to allocate for descriptor memory");
-			return -ENOMEM;
-		}
-
-		dpi_conf->c_desc.compl_ptr[i]->cdata = CNXK_DPI_REQ_CDATA;
-	}
+	for (i = 0; i < max_desc; i++)
+		dpi_conf->c_desc.compl_ptr[i * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
 
 	dpi_conf->c_desc.max_cnt = (max_desc - 1);
 
@@ -301,10 +288,8 @@ cnxk_dmadev_start(struct rte_dma_dev *dev)
 		dpi_conf->pnum_words = 0;
 		dpi_conf->pending = 0;
 		dpi_conf->desc_idx = 0;
-		for (j = 0; j < dpi_conf->c_desc.max_cnt + 1; j++) {
-			if (dpi_conf->c_desc.compl_ptr[j])
-				dpi_conf->c_desc.compl_ptr[j]->cdata = CNXK_DPI_REQ_CDATA;
-		}
+		for (j = 0; j < dpi_conf->c_desc.max_cnt + 1; j++)
+			dpi_conf->c_desc.compl_ptr[j * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
 		nb_desc += dpi_conf->c_desc.max_cnt + 1;
 		cnxk_stats_reset(dev, i);
 		dpi_conf->completed_offset = 0;
@@ -382,22 +367,22 @@ cnxk_dmadev_completed(void *dev_private, uint16_t vchan, const uint16_t nb_cpls,
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	struct cnxk_dpi_cdesc_data_s *c_desc = &dpi_conf->c_desc;
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t status;
 	int cnt;
 
 	for (cnt = 0; cnt < nb_cpls; cnt++) {
-		comp_ptr = c_desc->compl_ptr[c_desc->head];
-
-		if (comp_ptr->cdata) {
-			if (comp_ptr->cdata == CNXK_DPI_REQ_CDATA)
+		status = c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET];
+		if (status) {
+			if (status == CNXK_DPI_REQ_CDATA)
 				break;
 			*has_error = 1;
 			dpi_conf->stats.errors++;
+			c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] =
+				CNXK_DPI_REQ_CDATA;
 			CNXK_DPI_STRM_INC(*c_desc, head);
 			break;
 		}
-
-		comp_ptr->cdata = CNXK_DPI_REQ_CDATA;
+		c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
 		CNXK_DPI_STRM_INC(*c_desc, head);
 	}
 
@@ -414,19 +399,17 @@ cnxk_dmadev_completed_status(void *dev_private, uint16_t vchan, const uint16_t n
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	struct cnxk_dpi_cdesc_data_s *c_desc = &dpi_conf->c_desc;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	int cnt;
 
 	for (cnt = 0; cnt < nb_cpls; cnt++) {
-		comp_ptr = c_desc->compl_ptr[c_desc->head];
-		status[cnt] = comp_ptr->cdata;
+		status[cnt] = c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET];
 		if (status[cnt]) {
 			if (status[cnt] == CNXK_DPI_REQ_CDATA)
 				break;
 
 			dpi_conf->stats.errors++;
 		}
-		comp_ptr->cdata = CNXK_DPI_REQ_CDATA;
+		c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
 		CNXK_DPI_STRM_INC(*c_desc, head);
 	}
 
@@ -593,7 +576,7 @@ cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_de
 	rdpi = &dpivf->rdpi;
 
 	rdpi->pci_dev = pci_dev;
-	rc = roc_dpi_dev_init(rdpi, offsetof(struct cnxk_dpi_compl_s, wqecs));
+	rc = roc_dpi_dev_init(rdpi, offsetof(struct rte_event_dma_adapter_op, impl_opaque));
 	if (rc < 0)
 		goto err_out_free;
 
diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h
index 610a360ba2..a80db333a0 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.h
+++ b/drivers/dma/cnxk/cnxk_dmadev.h
@@ -37,17 +37,12 @@
 #define CNXK_DPI_MAX_CMD_SZ		    CNXK_DPI_CMD_LEN(CNXK_DPI_MAX_POINTER,		\
 							     CNXK_DPI_MAX_POINTER)
 #define CNXK_DPI_CHUNKS_FROM_DESC(cz, desc) (((desc) / (((cz) / 8) / CNXK_DPI_MAX_CMD_SZ)) + 1)
-
+#define CNXK_DPI_COMPL_OFFSET		    ROC_CACHE_LINE_SZ
 /* Set Completion data to 0xFF when request submitted,
  * upon successful request completion engine reset to completion status
  */
 #define CNXK_DPI_REQ_CDATA 0xFF
 
-/* Set Completion data to 0xDEADBEEF when request submitted for SSO.
- * This helps differentiate if the dequeue is called after cnxk enueue.
- */
-#define CNXK_DPI_REQ_SSO_CDATA    0xDEADBEEF
-
 union cnxk_dpi_instr_cmd {
 	uint64_t u;
 	struct cn9k_dpi_instr_cmd {
@@ -91,24 +86,11 @@ union cnxk_dpi_instr_cmd {
 	} cn10k;
 };
 
-struct cnxk_dpi_compl_s {
-	uint64_t cdata;
-	void *op;
-	uint16_t dev_id;
-	uint16_t vchan;
-	uint32_t wqecs;
-};
-
 struct cnxk_dpi_cdesc_data_s {
-	struct cnxk_dpi_compl_s **compl_ptr;
 	uint16_t max_cnt;
 	uint16_t head;
 	uint16_t tail;
-};
-
-struct cnxk_dma_adapter_info {
-	bool enabled;               /* Set if vchan queue is added to dma adapter. */
-	struct rte_mempool *req_mp; /* DMA inflight request mempool. */
+	uint8_t *compl_ptr;
 };
 
 struct cnxk_dpi_conf {
@@ -119,7 +101,7 @@ struct cnxk_dpi_conf {
 	uint16_t desc_idx;
 	struct rte_dma_stats stats;
 	uint64_t completed_offset;
-	struct cnxk_dma_adapter_info adapter_info;
+	bool adapter_enabled;
 };
 
 struct cnxk_dpi_vf_s {
diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c
index 9f7f9b2eed..26591235c6 100644
--- a/drivers/dma/cnxk/cnxk_dmadev_fp.c
+++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c
@@ -245,14 +245,14 @@ cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t d
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	uint64_t cmd[CNXK_DPI_DW_PER_SINGLE_CMD];
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t *comp_ptr;
 	int rc;
 
 	if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) ==
 		     dpi_conf->c_desc.head))
 		return -ENOSPC;
 
-	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
+	comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET];
 	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);
 
 	cmd[0] = (1UL << 54) | (1UL << 48);
@@ -301,7 +301,7 @@ cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	const struct rte_dma_sge *fptr, *lptr;
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t *comp_ptr;
 	uint64_t hdr[4];
 	int rc;
 
@@ -309,7 +309,7 @@ cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 		     dpi_conf->c_desc.head))
 		return -ENOSPC;
 
-	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
+	comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET];
 	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);
 
 	hdr[1] = dpi_conf->cmd.u | ((flags & RTE_DMA_OP_FLAG_AUTO_FREE) << 37);
@@ -357,14 +357,14 @@ cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	uint64_t cmd[CNXK_DPI_DW_PER_SINGLE_CMD];
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t *comp_ptr;
 	int rc;
 
 	if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) ==
 		     dpi_conf->c_desc.head))
 		return -ENOSPC;
 
-	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
+	comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET];
 	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);
 
 	cmd[0] = dpi_conf->cmd.u | (1U << 6) | 1U;
@@ -403,7 +403,7 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 {
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t *comp_ptr;
 	uint64_t hdr[4];
 	int rc;
 
@@ -411,7 +411,7 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 		     dpi_conf->c_desc.head))
 		return -ENOSPC;
 
-	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
+	comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET];
 	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);
 
 	hdr[0] = dpi_conf->cmd.u | (nb_dst << 6) | nb_src;
@@ -454,7 +454,6 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 {
 	const struct rte_dma_sge *src, *dst;
 	struct rte_event_dma_adapter_op *op;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
 	struct cn10k_sso_hws *work;
@@ -471,20 +470,12 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 		dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
 		dpi_conf = &dpivf->conf[op->vchan];
 
-		if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
-			return count;
-
-		comp_ptr->op = op;
-		comp_ptr->dev_id = op->dma_dev_id;
-		comp_ptr->vchan = op->vchan;
-		comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA;
-
 		nb_src = op->nb_src & CNXK_DPI_MAX_POINTER;
 		nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER;
 
 		hdr[0] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 54);
 		hdr[0] |= (nb_dst << 6) | nb_src;
-		hdr[1] = ((uint64_t)comp_ptr);
+		hdr[1] = (uint64_t)op;
 		hdr[2] = cnxk_dma_adapter_format_event(ev[count].event);
 
 		src = &op->src_dst_seg[0];
@@ -524,7 +515,6 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event
 {
 	const struct rte_dma_sge *fptr, *lptr;
 	struct rte_event_dma_adapter_op *op;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cn9k_sso_hws_dual *work;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
@@ -544,16 +534,8 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event
 		dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
 		dpi_conf = &dpivf->conf[op->vchan];
 
-		if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
-			return count;
-
-		comp_ptr->op = op;
-		comp_ptr->dev_id = op->dma_dev_id;
-		comp_ptr->vchan = op->vchan;
-		comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA;
-
 		hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36);
-		hdr[2] = (uint64_t)comp_ptr;
+		hdr[2] = (uint64_t)op;
 
 		nb_src = op->nb_src & CNXK_DPI_MAX_POINTER;
 		nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER;
@@ -605,7 +587,6 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 {
 	const struct rte_dma_sge *fptr, *lptr;
 	struct rte_event_dma_adapter_op *op;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
 	struct cn9k_sso_hws *work;
@@ -622,16 +603,8 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 		dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
 		dpi_conf = &dpivf->conf[op->vchan];
 
-		if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
-			return count;
-
-		comp_ptr->op = op;
-		comp_ptr->dev_id = op->dma_dev_id;
-		comp_ptr->vchan = op->vchan;
-		comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA;
-
 		hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36);
-		hdr[2] = (uint64_t)comp_ptr;
+		hdr[2] = (uint64_t)op;
 
 		nb_src = op->nb_src & CNXK_DPI_MAX_POINTER;
 		nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER;
@@ -682,38 +655,23 @@ uintptr_t
 cnxk_dma_adapter_dequeue(uintptr_t get_work1)
 {
 	struct rte_event_dma_adapter_op *op;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
-	rte_mcslock_t mcs_lock_me;
-	RTE_ATOMIC(uint8_t) *wqecs;
-
-	comp_ptr = (struct cnxk_dpi_compl_s *)get_work1;
-
-	/* Dequeue can be called without calling cnx_enqueue in case of
-	 * dma_adapter. When its called from adapter, dma op will not be
-	 * embedded in completion pointer. In those cases return op.
-	 */
-	if (comp_ptr->cdata != CNXK_DPI_REQ_SSO_CDATA)
-		return (uintptr_t)comp_ptr;
 
-	dpivf =	rte_dma_fp_objs[comp_ptr->dev_id].dev_private;
-	dpi_conf = &dpivf->conf[comp_ptr->vchan];
+	op = (struct rte_event_dma_adapter_op *)get_work1;
+	dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
+	dpi_conf = &dpivf->conf[op->vchan];
 
-	rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
-	wqecs = (uint8_t __rte_atomic *)&comp_ptr->wqecs;
-	if (rte_atomic_load_explicit(wqecs, rte_memory_order_relaxed) != 0)
-		dpi_conf->stats.errors++;
+	if (rte_atomic_load_explicit((RTE_ATOMIC(uint64_t) *)&op->impl_opaque[0],
+				     rte_memory_order_relaxed) != 0)
+		rte_atomic_fetch_add_explicit((RTE_ATOMIC(uint64_t) *)&dpi_conf->stats.errors, 1,
+					      rte_memory_order_relaxed);
 
 	/* Take into account errors also. This is similar to
 	 * cnxk_dmadev_completed_status().
 	 */
-	dpi_conf->stats.completed++;
-	rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me);
-
-	op = (struct rte_event_dma_adapter_op *)comp_ptr->op;
-
-	rte_mempool_put(dpi_conf->adapter_info.req_mp, comp_ptr);
+	rte_atomic_fetch_add_explicit((RTE_ATOMIC(uint64_t) *)&dpi_conf->stats.completed, 1,
+				      rte_memory_order_relaxed);
 
 	return (uintptr_t)op;
 }
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index a2a59b16c9..98db11ad61 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -739,31 +739,6 @@ cnxk_crypto_adapter_qp_del(const struct rte_cryptodev *cdev,
 	return 0;
 }
 
-static int
-dma_adapter_vchan_setup(const int16_t dma_dev_id, struct cnxk_dpi_conf *vchan,
-			uint16_t vchan_id)
-{
-	char name[RTE_MEMPOOL_NAMESIZE];
-	uint32_t cache_size, nb_req;
-	unsigned int req_size;
-
-	snprintf(name, RTE_MEMPOOL_NAMESIZE, "cnxk_dma_req_%u:%u", dma_dev_id, vchan_id);
-	req_size = sizeof(struct cnxk_dpi_compl_s);
-
-	nb_req = vchan->c_desc.max_cnt;
-	cache_size = 16;
-	nb_req += (cache_size * rte_lcore_count());
-
-	vchan->adapter_info.req_mp = rte_mempool_create(name, nb_req, req_size, cache_size, 0,
-							NULL, NULL, NULL, NULL, rte_socket_id(), 0);
-	if (vchan->adapter_info.req_mp == NULL)
-		return -ENOMEM;
-
-	vchan->adapter_info.enabled = true;
-
-	return 0;
-}
-
 int
 cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev,
 			   const int16_t dma_dev_id, uint16_t vchan_id)
@@ -772,7 +747,6 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev,
 	uint32_t adptr_xae_cnt = 0;
 	struct cnxk_dpi_vf_s *dpivf;
 	struct cnxk_dpi_conf *vchan;
-	int ret;
 
 	dpivf = rte_dma_fp_objs[dma_dev_id].dev_private;
 	if ((int16_t)vchan_id == -1) {
@@ -780,19 +754,13 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev,
 
 		for (vchan_id = 0; vchan_id < dpivf->num_vchans; vchan_id++) {
 			vchan = &dpivf->conf[vchan_id];
-			ret = dma_adapter_vchan_setup(dma_dev_id, vchan, vchan_id);
-			if (ret) {
-				cnxk_dma_adapter_vchan_del(dma_dev_id, -1);
-				return ret;
-			}
-			adptr_xae_cnt += vchan->adapter_info.req_mp->size;
+			vchan->adapter_enabled = true;
+			adptr_xae_cnt += vchan->c_desc.max_cnt;
 		}
 	} else {
 		vchan = &dpivf->conf[vchan_id];
-		ret = dma_adapter_vchan_setup(dma_dev_id, vchan, vchan_id);
-		if (ret)
-			return ret;
-		adptr_xae_cnt = vchan->adapter_info.req_mp->size;
+		vchan->adapter_enabled = true;
+		adptr_xae_cnt = vchan->c_desc.max_cnt;
 	}
 
 	/* Update dma adapter XAE count */
@@ -805,8 +773,7 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev,
 static int
 dma_adapter_vchan_free(struct cnxk_dpi_conf *vchan)
 {
-	rte_mempool_free(vchan->adapter_info.req_mp);
-	vchan->adapter_info.enabled = false;
+	vchan->adapter_enabled = false;
 
 	return 0;
 }
@@ -823,12 +790,12 @@ cnxk_dma_adapter_vchan_del(const int16_t dma_dev_id, uint16_t vchan_id)
 
 		for (vchan_id = 0; vchan_id < dpivf->num_vchans; vchan_id++) {
 			vchan = &dpivf->conf[vchan_id];
-			if (vchan->adapter_info.enabled)
+			if (vchan->adapter_enabled)
 				dma_adapter_vchan_free(vchan);
 		}
 	} else {
 		vchan = &dpivf->conf[vchan_id];
-		if (vchan->adapter_info.enabled)
+		if (vchan->adapter_enabled)
 			dma_adapter_vchan_free(vchan);
 	}
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [PATCH 1/2] eventdev/dma: reorganize event DMA ops
  2024-04-06 10:13 [PATCH 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula
  2024-04-06 10:13 ` [PATCH 2/2] dma/cnxk: remove completion pool pbhagavatula
  2024-04-17  5:58 ` [PATCH v2 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula
@ 2024-05-16  7:36 ` Amit Prakash Shukla
  2 siblings, 0 replies; 17+ messages in thread
From: Amit Prakash Shukla @ 2024-05-16  7:36 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Jerin Jacob, Vamsi Krishna Attunuru
  Cc: dev, Pavan Nikhilesh Bhagavatula

> -----Original Message-----
> From: pbhagavatula@marvell.com <pbhagavatula@marvell.com>
> Sent: Saturday, April 6, 2024 3:43 PM
> To: Jerin Jacob <jerinj@marvell.com>; Amit Prakash Shukla
> <amitprakashs@marvell.com>; Vamsi Krishna Attunuru
> <vattunuru@marvell.com>
> Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula
> <pbhagavatula@marvell.com>
> Subject: [PATCH 1/2] eventdev/dma: reorganize event DMA ops
> 
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Re-organize event DMA ops structure to allow holding source and destination
> pointers without the need for additional memory, the mempool allocating
> memory for rte_event_dma_adapter_ops can size the structure to
> accommodate all the needed source and destination pointers.
> 
> Add multiple words for holding user metadata, adapter implementation
> specific metadata and event metadata.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>

Acked-by: Amit Prakash Shukla <amitprakashs@marvell.com>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops
  2024-04-17  8:26   ` [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula
  2024-04-17  8:26     ` [PATCH v3 2/2] dma/cnxk: remove completion pool pbhagavatula
@ 2024-05-16  7:39     ` Amit Prakash Shukla
  2024-05-30 12:23       ` Jerin Jacob
  2024-05-30 12:44     ` [PATCH v4 " pbhagavatula
  2 siblings, 1 reply; 17+ messages in thread
From: Amit Prakash Shukla @ 2024-05-16  7:39 UTC (permalink / raw)
  To: Pavan Nikhilesh Bhagavatula, Jerin Jacob, Vamsi Krishna Attunuru
  Cc: dev, Pavan Nikhilesh Bhagavatula

> -----Original Message-----
> From: pbhagavatula@marvell.com <pbhagavatula@marvell.com>
> Sent: Wednesday, April 17, 2024 1:57 PM
> To: Jerin Jacob <jerinj@marvell.com>; Amit Prakash Shukla
> <amitprakashs@marvell.com>; Vamsi Krishna Attunuru
> <vattunuru@marvell.com>
> Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula
> <pbhagavatula@marvell.com>
> Subject: [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops
> 
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> 
> Re-organize event DMA ops structure to allow holding source and destination
> pointers without the need for additional memory, the mempool allocating
> memory for rte_event_dma_adapter_ops can size the structure to
> accommodate all the needed source and destination pointers.
> 
> Add multiple words for holding user metadata, adapter implementation
> specific metadata and event metadata.
> 
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
>  v3 Changes:
>  - Fix stdatomic compilation.
>  v2 Changes:
>  - Fix 32bit compilation
> 
>  app/test-eventdev/test_perf_common.c        | 26 ++++--------
>  app/test/test_event_dma_adapter.c           | 20 +++------
>  doc/guides/prog_guide/event_dma_adapter.rst |  2 +-
>  drivers/dma/cnxk/cnxk_dmadev_fp.c           | 39 +++++++----------
>  lib/eventdev/rte_event_dma_adapter.c        | 27 ++++--------
>  lib/eventdev/rte_event_dma_adapter.h        | 46 +++++++++++++++------
>  6 files changed, 72 insertions(+), 88 deletions(-)
> 

Acked-by: Amit Prakash Shukla <amitprakashs@marvell.com>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops
  2024-05-16  7:39     ` [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops Amit Prakash Shukla
@ 2024-05-30 12:23       ` Jerin Jacob
  2024-05-30 17:35         ` [EXTERNAL] " Pavan Nikhilesh Bhagavatula
  0 siblings, 1 reply; 17+ messages in thread
From: Jerin Jacob @ 2024-05-30 12:23 UTC (permalink / raw)
  To: Amit Prakash Shukla
  Cc: Pavan Nikhilesh Bhagavatula, Jerin Jacob, Vamsi Krishna Attunuru, dev

On Thu, May 16, 2024 at 1:09 PM Amit Prakash Shukla
<amitprakashs@marvell.com> wrote:
>
> > -----Original Message-----
> > From: pbhagavatula@marvell.com <pbhagavatula@marvell.com>
> > Sent: Wednesday, April 17, 2024 1:57 PM
> > To: Jerin Jacob <jerinj@marvell.com>; Amit Prakash Shukla
> > <amitprakashs@marvell.com>; Vamsi Krishna Attunuru
> > <vattunuru@marvell.com>
> > Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula
> > <pbhagavatula@marvell.com>
> > Subject: [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops
> >
> > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> >
> > Re-organize event DMA ops structure to allow holding source and destination
> > pointers without the need for additional memory, the mempool allocating
> > memory for rte_event_dma_adapter_ops can size the structure to
> > accommodate all the needed source and destination pointers.
> >
> > Add multiple words for holding user metadata, adapter implementation
> > specific metadata and event metadata.
> >
> > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > ---
> >  v3 Changes:
> >  - Fix stdatomic compilation.
> >  v2 Changes:
> >  - Fix 32bit compilation
> >
> >  app/test-eventdev/test_perf_common.c        | 26 ++++--------
> >  app/test/test_event_dma_adapter.c           | 20 +++------
> >  doc/guides/prog_guide/event_dma_adapter.rst |  2 +-
> >  drivers/dma/cnxk/cnxk_dmadev_fp.c           | 39 +++++++----------

Spotted driver change in library patch. Please split.

> >  lib/eventdev/rte_event_dma_adapter.c        | 27 ++++--------
> >  lib/eventdev/rte_event_dma_adapter.h        | 46 +++++++++++++++------
> >  6 files changed, 72 insertions(+), 88 deletions(-)
> >
>
> Acked-by: Amit Prakash Shukla <amitprakashs@marvell.com>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v4 1/2] eventdev/dma: reorganize event DMA ops
  2024-04-17  8:26   ` [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula
  2024-04-17  8:26     ` [PATCH v3 2/2] dma/cnxk: remove completion pool pbhagavatula
  2024-05-16  7:39     ` [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops Amit Prakash Shukla
@ 2024-05-30 12:44     ` pbhagavatula
  2024-05-30 12:44       ` [PATCH v4 2/2] dma/cnxk: remove completion pool pbhagavatula
                         ` (2 more replies)
  2 siblings, 3 replies; 17+ messages in thread
From: pbhagavatula @ 2024-05-30 12:44 UTC (permalink / raw)
  To: jerinj, Amit Prakash Shukla, Vamsi Attunuru; +Cc: dev, Pavan Nikhilesh

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Re-organize event DMA ops structure to allow holding
source and destination pointers without the need for
additional memory, the mempool allocating memory for
rte_event_dma_adapter_ops can size the structure to
accommodate all the needed source and destination
pointers.

Add multiple words for holding user metadata, adapter
implementation specific metadata and event metadata.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Amit Prakash Shukla <amitprakashs@marvell.com>
---
 v4 Changes:
 - Reduce unreleated driver changes and move to 2/2.
 v3 Changes:
 - Fix stdatomic compilation.
 v2 Changes:
 - Fix 32bit compilation

 app/test-eventdev/test_perf_common.c        | 26 ++++--------
 app/test/test_event_dma_adapter.c           | 20 +++------
 doc/guides/prog_guide/event_dma_adapter.rst |  2 +-
 drivers/dma/cnxk/cnxk_dmadev_fp.c           | 20 ++++-----
 lib/eventdev/rte_event_dma_adapter.c        | 27 ++++--------
 lib/eventdev/rte_event_dma_adapter.h        | 46 +++++++++++++++------
 6 files changed, 66 insertions(+), 75 deletions(-)

diff --git a/app/test-eventdev/test_perf_common.c b/app/test-eventdev/test_perf_common.c
index 93e6132de8..db0f9c1f3b 100644
--- a/app/test-eventdev/test_perf_common.c
+++ b/app/test-eventdev/test_perf_common.c
@@ -1503,7 +1503,6 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
 		prod = 0;
 		for (; port < perf_nb_event_ports(opt); port++) {
 			struct prod_data *p = &t->prod[port];
-			struct rte_event *response_info;
 			uint32_t flow_id;

 			p->dev_id = opt->dev_id;
@@ -1523,13 +1522,10 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
 			for (flow_id = 0; flow_id < t->nb_flows; flow_id++) {
 				rte_mempool_get(t->da_op_pool, (void **)&op);

-				op->src_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0);
-				op->dst_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0);
-
-				op->src_seg->addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
-				op->dst_seg->addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
-				op->src_seg->length = 1024;
-				op->dst_seg->length = 1024;
+				op->src_dst_seg[0].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
+				op->src_dst_seg[1].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
+				op->src_dst_seg[0].length = 1024;
+				op->src_dst_seg[1].length = 1024;
 				op->nb_src = 1;
 				op->nb_dst = 1;
 				op->flags = RTE_DMA_OP_FLAG_SUBMIT;
@@ -1537,12 +1533,6 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
 				op->dma_dev_id = dma_dev_id;
 				op->vchan = vchan_id;

-				response_info = (struct rte_event *)((uint8_t *)op +
-						 sizeof(struct rte_event_dma_adapter_op));
-				response_info->queue_id = p->queue_id;
-				response_info->sched_type = RTE_SCHED_TYPE_ATOMIC;
-				response_info->flow_id = flow_id;
-
 				p->da.dma_op[flow_id] = op;
 			}

@@ -2036,7 +2026,7 @@ perf_dmadev_setup(struct evt_test *test, struct evt_options *opt)
 		return -ENODEV;
 	}

-	elt_size = sizeof(struct rte_event_dma_adapter_op) + sizeof(struct rte_event);
+	elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2);
 	t->da_op_pool = rte_mempool_create("dma_op_pool", opt->pool_sz, elt_size, 256,
 					   0, NULL, NULL, NULL, NULL, rte_socket_id(), 0);
 	if (t->da_op_pool == NULL) {
@@ -2085,10 +2075,8 @@ perf_dmadev_destroy(struct evt_test *test, struct evt_options *opt)
 		for (flow_id = 0; flow_id < t->nb_flows; flow_id++) {
 			op = p->da.dma_op[flow_id];

-			rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_seg->addr);
-			rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->dst_seg->addr);
-			rte_free(op->src_seg);
-			rte_free(op->dst_seg);
+			rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_dst_seg[0].addr);
+			rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_dst_seg[1].addr);
 			rte_mempool_put(op->op_mp, op);
 		}

diff --git a/app/test/test_event_dma_adapter.c b/app/test/test_event_dma_adapter.c
index 35b417b69f..d9dff4ff7d 100644
--- a/app/test/test_event_dma_adapter.c
+++ b/app/test/test_event_dma_adapter.c
@@ -235,7 +235,6 @@ test_op_forward_mode(void)
 	struct rte_mbuf *dst_mbuf[TEST_MAX_OP];
 	struct rte_event_dma_adapter_op *op;
 	struct rte_event ev[TEST_MAX_OP];
-	struct rte_event response_info;
 	int ret, i;

 	ret = rte_pktmbuf_alloc_bulk(params.src_mbuf_pool, src_mbuf, TEST_MAX_OP);
@@ -253,14 +252,11 @@ test_op_forward_mode(void)
 		rte_mempool_get(params.op_mpool, (void **)&op);
 		TEST_ASSERT_NOT_NULL(op, "Failed to allocate dma operation struct\n");

-		op->src_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0);
-		op->dst_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0);
-
 		/* Update Op */
-		op->src_seg->addr = rte_pktmbuf_iova(src_mbuf[i]);
-		op->dst_seg->addr = rte_pktmbuf_iova(dst_mbuf[i]);
-		op->src_seg->length = PACKET_LENGTH;
-		op->dst_seg->length = PACKET_LENGTH;
+		op->src_dst_seg[0].addr = rte_pktmbuf_iova(src_mbuf[i]);
+		op->src_dst_seg[1].addr = rte_pktmbuf_iova(dst_mbuf[i]);
+		op->src_dst_seg[0].length = PACKET_LENGTH;
+		op->src_dst_seg[1].length = PACKET_LENGTH;
 		op->nb_src = 1;
 		op->nb_dst = 1;
 		op->flags = RTE_DMA_OP_FLAG_SUBMIT;
@@ -268,10 +264,6 @@ test_op_forward_mode(void)
 		op->dma_dev_id = TEST_DMA_DEV_ID;
 		op->vchan = TEST_DMA_VCHAN_ID;

-		response_info.event = dma_response_info.event;
-		rte_memcpy((uint8_t *)op + sizeof(struct rte_event_dma_adapter_op), &response_info,
-			   sizeof(struct rte_event));
-
 		/* Fill in event info and update event_ptr with rte_event_dma_adapter_op */
 		memset(&ev[i], 0, sizeof(struct rte_event));
 		ev[i].event = 0;
@@ -294,8 +286,6 @@ test_op_forward_mode(void)

 		TEST_ASSERT_EQUAL(ret, 0, "Data mismatch for dma adapter\n");

-		rte_free(op->src_seg);
-		rte_free(op->dst_seg);
 		rte_mempool_put(op->op_mp, op);
 	}

@@ -400,7 +390,7 @@ configure_dmadev(void)
 						       rte_socket_id());
 	RTE_TEST_ASSERT_NOT_NULL(params.dst_mbuf_pool, "Can't create DMA_DST_MBUFPOOL\n");

-	elt_size = sizeof(struct rte_event_dma_adapter_op) + sizeof(struct rte_event);
+	elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2);
 	params.op_mpool = rte_mempool_create("EVENT_DMA_OP_POOL", DMA_OP_POOL_SIZE, elt_size, 0,
 					     0, NULL, NULL, NULL, NULL, rte_socket_id(), 0);
 	RTE_TEST_ASSERT_NOT_NULL(params.op_mpool, "Can't create DMA_OP_POOL\n");
diff --git a/doc/guides/prog_guide/event_dma_adapter.rst b/doc/guides/prog_guide/event_dma_adapter.rst
index 3443b6a803..1fb9b0a07b 100644
--- a/doc/guides/prog_guide/event_dma_adapter.rst
+++ b/doc/guides/prog_guide/event_dma_adapter.rst
@@ -144,7 +144,7 @@ on which it enqueues events towards the DMA adapter using ``rte_event_enqueue_bu
    uint32_t cap;
    int ret;

-   /* Fill in event info and update event_ptr with rte_dma_op */
+   /* Fill in event info and update event_ptr with rte_event_dma_adapter_op */
    memset(&ev, 0, sizeof(ev));
    .
    .
diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c
index f6562b603e..8a3c0c1008 100644
--- a/drivers/dma/cnxk/cnxk_dmadev_fp.c
+++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c
@@ -490,8 +490,8 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 		hdr[1] = ((uint64_t)comp_ptr);
 		hdr[2] = cnxk_dma_adapter_format_event(rsp_info->event);

-		src = &op->src_seg[0];
-		dst = &op->dst_seg[0];
+		src = &op->src_dst_seg[0];
+		dst = &op->src_dst_seg[op->nb_src];

 		if (CNXK_TAG_IS_HEAD(work->gw_rdata) ||
 		    ((CNXK_TT_FROM_TAG(work->gw_rdata) == SSO_TT_ORDERED) &&
@@ -566,12 +566,12 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event
 		 * For all other cases, src pointers are first pointers.
 		 */
 		if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) {
-			fptr = &op->dst_seg[0];
-			lptr = &op->src_seg[0];
+			fptr = &op->src_dst_seg[nb_src];
+			lptr = &op->src_dst_seg[0];
 			RTE_SWAP(nb_src, nb_dst);
 		} else {
-			fptr = &op->src_seg[0];
-			lptr = &op->dst_seg[0];
+			fptr = &op->src_dst_seg[0];
+			lptr = &op->src_dst_seg[nb_src];
 		}

 		hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48;
@@ -647,12 +647,12 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 		 * For all other cases, src pointers are first pointers.
 		 */
 		if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) {
-			fptr = &op->dst_seg[0];
-			lptr = &op->src_seg[0];
+			fptr = &op->src_dst_seg[nb_src];
+			lptr = &op->src_dst_seg[0];
 			RTE_SWAP(nb_src, nb_dst);
 		} else {
-			fptr = &op->src_seg[0];
-			lptr = &op->dst_seg[0];
+			fptr = &op->src_dst_seg[0];
+			lptr = &op->src_dst_seg[nb_src];
 		}

 		hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48;
diff --git a/lib/eventdev/rte_event_dma_adapter.c b/lib/eventdev/rte_event_dma_adapter.c
index 24dff556db..e52ef46a1b 100644
--- a/lib/eventdev/rte_event_dma_adapter.c
+++ b/lib/eventdev/rte_event_dma_adapter.c
@@ -236,9 +236,9 @@ edma_circular_buffer_flush_to_dma_dev(struct event_dma_adapter *adapter,
 				      uint16_t vchan, uint16_t *nb_ops_flushed)
 {
 	struct rte_event_dma_adapter_op *op;
-	struct dma_vchan_info *tq;
 	uint16_t *head = &bufp->head;
 	uint16_t *tail = &bufp->tail;
+	struct dma_vchan_info *tq;
 	uint16_t n;
 	uint16_t i;
 	int ret;
@@ -257,11 +257,13 @@ edma_circular_buffer_flush_to_dma_dev(struct event_dma_adapter *adapter,
 	for (i = 0; i < n; i++)	{
 		op = bufp->op_buffer[*head];
 		if (op->nb_src == 1 && op->nb_dst == 1)
-			ret = rte_dma_copy(dma_dev_id, vchan, op->src_seg->addr, op->dst_seg->addr,
-					   op->src_seg->length, op->flags);
+			ret = rte_dma_copy(dma_dev_id, vchan, op->src_dst_seg[0].addr,
+					   op->src_dst_seg[1].addr, op->src_dst_seg[0].length,
+					   op->flags);
 		else
-			ret = rte_dma_copy_sg(dma_dev_id, vchan, op->src_seg, op->dst_seg,
-					      op->nb_src, op->nb_dst, op->flags);
+			ret = rte_dma_copy_sg(dma_dev_id, vchan, &op->src_dst_seg[0],
+					      &op->src_dst_seg[op->nb_src], op->nb_src, op->nb_dst,
+					      op->flags);
 		if (ret < 0)
 			break;

@@ -511,8 +513,7 @@ edma_enq_to_dma_dev(struct event_dma_adapter *adapter, struct rte_event *ev, uns
 		if (dma_op == NULL)
 			continue;

-		/* Expected to have response info appended to dma_op. */
-
+		dma_op->impl_opaque[0] = ev[i].event;
 		dma_dev_id = dma_op->dma_dev_id;
 		vchan = dma_op->vchan;
 		vchan_qinfo = &adapter->dma_devs[dma_dev_id].vchanq[vchan];
@@ -647,7 +648,6 @@ edma_ops_enqueue_burst(struct event_dma_adapter *adapter, struct rte_event_dma_a
 	uint8_t event_port_id = adapter->event_port_id;
 	uint8_t event_dev_id = adapter->eventdev_id;
 	struct rte_event events[DMA_BATCH_SIZE];
-	struct rte_event *response_info;
 	uint16_t nb_enqueued, nb_ev;
 	uint8_t retry;
 	uint8_t i;
@@ -659,16 +659,7 @@ edma_ops_enqueue_burst(struct event_dma_adapter *adapter, struct rte_event_dma_a
 	for (i = 0; i < num; i++) {
 		struct rte_event *ev = &events[nb_ev++];

-		/* Expected to have response info appended to dma_op. */
-		response_info = (struct rte_event *)((uint8_t *)ops[i] +
-							  sizeof(struct rte_event_dma_adapter_op));
-		if (unlikely(response_info == NULL)) {
-			if (ops[i] != NULL && ops[i]->op_mp != NULL)
-				rte_mempool_put(ops[i]->op_mp, ops[i]);
-			continue;
-		}
-
-		rte_memcpy(ev, response_info, sizeof(struct rte_event));
+		ev->event = ops[i]->impl_opaque[0];
 		ev->event_ptr = ops[i];
 		ev->event_type = RTE_EVENT_TYPE_DMADEV;
 		if (adapter->implicit_release_disabled)
diff --git a/lib/eventdev/rte_event_dma_adapter.h b/lib/eventdev/rte_event_dma_adapter.h
index e924ab673d..048ddba3f3 100644
--- a/lib/eventdev/rte_event_dma_adapter.h
+++ b/lib/eventdev/rte_event_dma_adapter.h
@@ -157,24 +157,46 @@ extern "C" {
  * instance.
  */
 struct rte_event_dma_adapter_op {
-	struct rte_dma_sge *src_seg;
-	/**< Source segments. */
-	struct rte_dma_sge *dst_seg;
-	/**< Destination segments. */
-	uint16_t nb_src;
-	/**< Number of source segments. */
-	uint16_t nb_dst;
-	/**< Number of destination segments. */
 	uint64_t flags;
 	/**< Flags related to the operation.
 	 * @see RTE_DMA_OP_FLAG_*
 	 */
-	int16_t dma_dev_id;
-	/**< DMA device ID to be used */
-	uint16_t vchan;
-	/**< DMA vchan ID to be used */
 	struct rte_mempool *op_mp;
 	/**< Mempool from which op is allocated. */
+	enum rte_dma_status_code status;
+	/**< Status code for this operation. */
+	uint32_t rsvd;
+	/**< Reserved for future use. */
+	uint64_t impl_opaque[2];
+	/**< Implementation-specific opaque data.
+	 * An dma device implementation use this field to hold
+	 * implementation specific values to share between dequeue and enqueue
+	 * operations.
+	 * The application should not modify this field.
+	 */
+	uint64_t user_meta;
+	/**<  Memory to store user specific metadata.
+	 * The dma device implementation should not modify this area.
+	 */
+	uint64_t event_meta;
+	/**< Event metadata that defines event attributes when used in OP_NEW mode.
+	 * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_NEW
+	 * @see struct rte_event::event
+	 */
+	int16_t dma_dev_id;
+	/**< DMA device ID to be used with OP_FORWARD mode.
+	 * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD
+	 */
+	uint16_t vchan;
+	/**< DMA vchan ID to be used with OP_FORWARD mode
+	 * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD
+	 */
+	uint16_t nb_src;
+	/**< Number of source segments. */
+	uint16_t nb_dst;
+	/**< Number of destination segments. */
+	struct rte_dma_sge src_dst_seg[0];
+	/**< Source and destination segments. */
 };

 /**
--
2.25.1


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v4 2/2] dma/cnxk: remove completion pool
  2024-05-30 12:44     ` [PATCH v4 " pbhagavatula
@ 2024-05-30 12:44       ` pbhagavatula
  2024-06-07 10:20       ` [PATCH v4 1/2] eventdev/dma: reorganize event DMA ops Jerin Jacob
  2024-06-07 10:36       ` [PATCH v5 " pbhagavatula
  2 siblings, 0 replies; 17+ messages in thread
From: pbhagavatula @ 2024-05-30 12:44 UTC (permalink / raw)
  To: jerinj, Vamsi Attunuru, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Use DMA ops to store metadata, remove use of completion pool.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Vamsi Attunuru <vattunuru@marvell.com>
---
 drivers/dma/cnxk/cnxk_dmadev.c           |  53 ++++--------
 drivers/dma/cnxk/cnxk_dmadev.h           |  24 +-----
 drivers/dma/cnxk/cnxk_dmadev_fp.c        | 101 ++++++-----------------
 drivers/event/cnxk/cnxk_eventdev_adptr.c |  47 ++---------
 4 files changed, 54 insertions(+), 171 deletions(-)

diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c
index 4ab3cfbdf2..dfd7222713 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.c
+++ b/drivers/dma/cnxk/cnxk_dmadev.c
@@ -2,6 +2,8 @@
  * Copyright (C) 2021 Marvell International Ltd.
  */
 
+#include <rte_event_dma_adapter.h>
+
 #include <cnxk_dmadev.h>
 
 static int cnxk_stats_reset(struct rte_dma_dev *dev, uint16_t vchan);
@@ -30,8 +32,7 @@ cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan)
 {
 	struct cnxk_dpi_conf *dpi_conf;
 	uint16_t num_vchans;
-	uint16_t max_desc;
-	int i, j;
+	int i;
 
 	if (vchan == RTE_DMA_ALL_VCHAN) {
 		num_vchans = dpivf->num_vchans;
@@ -46,12 +47,6 @@ cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan)
 
 	for (; i < num_vchans; i++) {
 		dpi_conf = &dpivf->conf[i];
-		max_desc = dpi_conf->c_desc.max_cnt + 1;
-		if (dpi_conf->c_desc.compl_ptr) {
-			for (j = 0; j < max_desc; j++)
-				rte_free(dpi_conf->c_desc.compl_ptr[j]);
-		}
-
 		rte_free(dpi_conf->c_desc.compl_ptr);
 		dpi_conf->c_desc.compl_ptr = NULL;
 	}
@@ -261,7 +256,7 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
 	if (max_desc > CNXK_DPI_MAX_DESC)
 		max_desc = CNXK_DPI_MAX_DESC;
 
-	size = (max_desc * sizeof(struct cnxk_dpi_compl_s *));
+	size = (max_desc * sizeof(uint8_t) * CNXK_DPI_COMPL_OFFSET);
 	dpi_conf->c_desc.compl_ptr = rte_zmalloc(NULL, size, 0);
 
 	if (dpi_conf->c_desc.compl_ptr == NULL) {
@@ -269,16 +264,8 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < max_desc; i++) {
-		dpi_conf->c_desc.compl_ptr[i] =
-			rte_zmalloc(NULL, sizeof(struct cnxk_dpi_compl_s), 0);
-		if (!dpi_conf->c_desc.compl_ptr[i]) {
-			plt_err("Failed to allocate for descriptor memory");
-			return -ENOMEM;
-		}
-
-		dpi_conf->c_desc.compl_ptr[i]->cdata = CNXK_DPI_REQ_CDATA;
-	}
+	for (i = 0; i < max_desc; i++)
+		dpi_conf->c_desc.compl_ptr[i * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
 
 	dpi_conf->c_desc.max_cnt = (max_desc - 1);
 
@@ -301,10 +288,8 @@ cnxk_dmadev_start(struct rte_dma_dev *dev)
 		dpi_conf->pnum_words = 0;
 		dpi_conf->pending = 0;
 		dpi_conf->desc_idx = 0;
-		for (j = 0; j < dpi_conf->c_desc.max_cnt + 1; j++) {
-			if (dpi_conf->c_desc.compl_ptr[j])
-				dpi_conf->c_desc.compl_ptr[j]->cdata = CNXK_DPI_REQ_CDATA;
-		}
+		for (j = 0; j < dpi_conf->c_desc.max_cnt + 1; j++)
+			dpi_conf->c_desc.compl_ptr[j * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
 		nb_desc += dpi_conf->c_desc.max_cnt + 1;
 		cnxk_stats_reset(dev, i);
 		dpi_conf->completed_offset = 0;
@@ -382,22 +367,22 @@ cnxk_dmadev_completed(void *dev_private, uint16_t vchan, const uint16_t nb_cpls,
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	struct cnxk_dpi_cdesc_data_s *c_desc = &dpi_conf->c_desc;
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t status;
 	int cnt;
 
 	for (cnt = 0; cnt < nb_cpls; cnt++) {
-		comp_ptr = c_desc->compl_ptr[c_desc->head];
-
-		if (comp_ptr->cdata) {
-			if (comp_ptr->cdata == CNXK_DPI_REQ_CDATA)
+		status = c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET];
+		if (status) {
+			if (status == CNXK_DPI_REQ_CDATA)
 				break;
 			*has_error = 1;
 			dpi_conf->stats.errors++;
+			c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] =
+				CNXK_DPI_REQ_CDATA;
 			CNXK_DPI_STRM_INC(*c_desc, head);
 			break;
 		}
-
-		comp_ptr->cdata = CNXK_DPI_REQ_CDATA;
+		c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
 		CNXK_DPI_STRM_INC(*c_desc, head);
 	}
 
@@ -414,19 +399,17 @@ cnxk_dmadev_completed_status(void *dev_private, uint16_t vchan, const uint16_t n
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	struct cnxk_dpi_cdesc_data_s *c_desc = &dpi_conf->c_desc;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	int cnt;
 
 	for (cnt = 0; cnt < nb_cpls; cnt++) {
-		comp_ptr = c_desc->compl_ptr[c_desc->head];
-		status[cnt] = comp_ptr->cdata;
+		status[cnt] = c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET];
 		if (status[cnt]) {
 			if (status[cnt] == CNXK_DPI_REQ_CDATA)
 				break;
 
 			dpi_conf->stats.errors++;
 		}
-		comp_ptr->cdata = CNXK_DPI_REQ_CDATA;
+		c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
 		CNXK_DPI_STRM_INC(*c_desc, head);
 	}
 
@@ -593,7 +576,7 @@ cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_de
 	rdpi = &dpivf->rdpi;
 
 	rdpi->pci_dev = pci_dev;
-	rc = roc_dpi_dev_init(rdpi, offsetof(struct cnxk_dpi_compl_s, wqecs));
+	rc = roc_dpi_dev_init(rdpi, offsetof(struct rte_event_dma_adapter_op, impl_opaque));
 	if (rc < 0)
 		goto err_out_free;
 
diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h
index 610a360ba2..a80db333a0 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.h
+++ b/drivers/dma/cnxk/cnxk_dmadev.h
@@ -37,17 +37,12 @@
 #define CNXK_DPI_MAX_CMD_SZ		    CNXK_DPI_CMD_LEN(CNXK_DPI_MAX_POINTER,		\
 							     CNXK_DPI_MAX_POINTER)
 #define CNXK_DPI_CHUNKS_FROM_DESC(cz, desc) (((desc) / (((cz) / 8) / CNXK_DPI_MAX_CMD_SZ)) + 1)
-
+#define CNXK_DPI_COMPL_OFFSET		    ROC_CACHE_LINE_SZ
 /* Set Completion data to 0xFF when request submitted,
  * upon successful request completion engine reset to completion status
  */
 #define CNXK_DPI_REQ_CDATA 0xFF
 
-/* Set Completion data to 0xDEADBEEF when request submitted for SSO.
- * This helps differentiate if the dequeue is called after cnxk enueue.
- */
-#define CNXK_DPI_REQ_SSO_CDATA    0xDEADBEEF
-
 union cnxk_dpi_instr_cmd {
 	uint64_t u;
 	struct cn9k_dpi_instr_cmd {
@@ -91,24 +86,11 @@ union cnxk_dpi_instr_cmd {
 	} cn10k;
 };
 
-struct cnxk_dpi_compl_s {
-	uint64_t cdata;
-	void *op;
-	uint16_t dev_id;
-	uint16_t vchan;
-	uint32_t wqecs;
-};
-
 struct cnxk_dpi_cdesc_data_s {
-	struct cnxk_dpi_compl_s **compl_ptr;
 	uint16_t max_cnt;
 	uint16_t head;
 	uint16_t tail;
-};
-
-struct cnxk_dma_adapter_info {
-	bool enabled;               /* Set if vchan queue is added to dma adapter. */
-	struct rte_mempool *req_mp; /* DMA inflight request mempool. */
+	uint8_t *compl_ptr;
 };
 
 struct cnxk_dpi_conf {
@@ -119,7 +101,7 @@ struct cnxk_dpi_conf {
 	uint16_t desc_idx;
 	struct rte_dma_stats stats;
 	uint64_t completed_offset;
-	struct cnxk_dma_adapter_info adapter_info;
+	bool adapter_enabled;
 };
 
 struct cnxk_dpi_vf_s {
diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c
index 8a3c0c1008..26591235c6 100644
--- a/drivers/dma/cnxk/cnxk_dmadev_fp.c
+++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c
@@ -245,14 +245,14 @@ cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t d
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	uint64_t cmd[CNXK_DPI_DW_PER_SINGLE_CMD];
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t *comp_ptr;
 	int rc;
 
 	if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) ==
 		     dpi_conf->c_desc.head))
 		return -ENOSPC;
 
-	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
+	comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET];
 	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);
 
 	cmd[0] = (1UL << 54) | (1UL << 48);
@@ -301,7 +301,7 @@ cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	const struct rte_dma_sge *fptr, *lptr;
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t *comp_ptr;
 	uint64_t hdr[4];
 	int rc;
 
@@ -309,7 +309,7 @@ cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 		     dpi_conf->c_desc.head))
 		return -ENOSPC;
 
-	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
+	comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET];
 	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);
 
 	hdr[1] = dpi_conf->cmd.u | ((flags & RTE_DMA_OP_FLAG_AUTO_FREE) << 37);
@@ -357,14 +357,14 @@ cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	uint64_t cmd[CNXK_DPI_DW_PER_SINGLE_CMD];
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t *comp_ptr;
 	int rc;
 
 	if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) ==
 		     dpi_conf->c_desc.head))
 		return -ENOSPC;
 
-	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
+	comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET];
 	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);
 
 	cmd[0] = dpi_conf->cmd.u | (1U << 6) | 1U;
@@ -403,7 +403,7 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 {
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t *comp_ptr;
 	uint64_t hdr[4];
 	int rc;
 
@@ -411,7 +411,7 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 		     dpi_conf->c_desc.head))
 		return -ENOSPC;
 
-	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
+	comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET];
 	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);
 
 	hdr[0] = dpi_conf->cmd.u | (nb_dst << 6) | nb_src;
@@ -454,10 +454,8 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 {
 	const struct rte_dma_sge *src, *dst;
 	struct rte_event_dma_adapter_op *op;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
-	struct rte_event *rsp_info;
 	struct cn10k_sso_hws *work;
 	uint16_t nb_src, nb_dst;
 	rte_mcslock_t mcs_lock_me;
@@ -469,34 +467,23 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 
 	for (count = 0; count < nb_events; count++) {
 		op = ev[count].event_ptr;
-		rsp_info = (struct rte_event *)((uint8_t *)op +
-			     sizeof(struct rte_event_dma_adapter_op));
-		dpivf =	rte_dma_fp_objs[op->dma_dev_id].dev_private;
+		dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
 		dpi_conf = &dpivf->conf[op->vchan];
 
-		if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
-			return count;
-
-		comp_ptr->op = op;
-		comp_ptr->dev_id = op->dma_dev_id;
-		comp_ptr->vchan = op->vchan;
-		comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA;
-
 		nb_src = op->nb_src & CNXK_DPI_MAX_POINTER;
 		nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER;
 
 		hdr[0] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 54);
 		hdr[0] |= (nb_dst << 6) | nb_src;
-		hdr[1] = ((uint64_t)comp_ptr);
-		hdr[2] = cnxk_dma_adapter_format_event(rsp_info->event);
+		hdr[1] = (uint64_t)op;
+		hdr[2] = cnxk_dma_adapter_format_event(ev[count].event);
 
 		src = &op->src_dst_seg[0];
 		dst = &op->src_dst_seg[op->nb_src];
 
 		if (CNXK_TAG_IS_HEAD(work->gw_rdata) ||
 		    ((CNXK_TT_FROM_TAG(work->gw_rdata) == SSO_TT_ORDERED) &&
-		    (rsp_info->sched_type & DPI_HDR_TT_MASK) ==
-			    RTE_SCHED_TYPE_ORDERED))
+		     (ev[count].sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED))
 			roc_sso_hws_head_wait(work->base);
 
 		rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
@@ -528,7 +515,6 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event
 {
 	const struct rte_dma_sge *fptr, *lptr;
 	struct rte_event_dma_adapter_op *op;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cn9k_sso_hws_dual *work;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
@@ -548,16 +534,8 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event
 		dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
 		dpi_conf = &dpivf->conf[op->vchan];
 
-		if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
-			return count;
-
-		comp_ptr->op = op;
-		comp_ptr->dev_id = op->dma_dev_id;
-		comp_ptr->vchan = op->vchan;
-		comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA;
-
 		hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36);
-		hdr[2] = (uint64_t)comp_ptr;
+		hdr[2] = (uint64_t)op;
 
 		nb_src = op->nb_src & CNXK_DPI_MAX_POINTER;
 		nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER;
@@ -609,10 +587,8 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 {
 	const struct rte_dma_sge *fptr, *lptr;
 	struct rte_event_dma_adapter_op *op;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
-	struct rte_event *rsp_info;
 	struct cn9k_sso_hws *work;
 	uint16_t nb_src, nb_dst;
 	rte_mcslock_t mcs_lock_me;
@@ -624,21 +600,11 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 
 	for (count = 0; count < nb_events; count++) {
 		op = ev[count].event_ptr;
-		rsp_info = (struct rte_event *)((uint8_t *)op +
-			    sizeof(struct rte_event_dma_adapter_op));
-		dpivf =	rte_dma_fp_objs[op->dma_dev_id].dev_private;
+		dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
 		dpi_conf = &dpivf->conf[op->vchan];
 
-		if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
-			return count;
-
-		comp_ptr->op = op;
-		comp_ptr->dev_id = op->dma_dev_id;
-		comp_ptr->vchan = op->vchan;
-		comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA;
-
 		hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36);
-		hdr[2] = (uint64_t)comp_ptr;
+		hdr[2] = (uint64_t)op;
 
 		nb_src = op->nb_src & CNXK_DPI_MAX_POINTER;
 		nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER;
@@ -656,9 +622,9 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 		}
 
 		hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48;
-		hdr[0] |= cnxk_dma_adapter_format_event(rsp_info->event);
+		hdr[0] |= cnxk_dma_adapter_format_event(ev[count].event);
 
-		if ((rsp_info->sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED)
+		if ((ev[count].sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED)
 			roc_sso_hws_head_wait(work->base);
 
 		rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
@@ -689,38 +655,23 @@ uintptr_t
 cnxk_dma_adapter_dequeue(uintptr_t get_work1)
 {
 	struct rte_event_dma_adapter_op *op;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
-	rte_mcslock_t mcs_lock_me;
-	RTE_ATOMIC(uint8_t) *wqecs;
 
-	comp_ptr = (struct cnxk_dpi_compl_s *)get_work1;
+	op = (struct rte_event_dma_adapter_op *)get_work1;
+	dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
+	dpi_conf = &dpivf->conf[op->vchan];
 
-	/* Dequeue can be called without calling cnx_enqueue in case of
-	 * dma_adapter. When its called from adapter, dma op will not be
-	 * embedded in completion pointer. In those cases return op.
-	 */
-	if (comp_ptr->cdata != CNXK_DPI_REQ_SSO_CDATA)
-		return (uintptr_t)comp_ptr;
-
-	dpivf =	rte_dma_fp_objs[comp_ptr->dev_id].dev_private;
-	dpi_conf = &dpivf->conf[comp_ptr->vchan];
-
-	rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
-	wqecs = (uint8_t __rte_atomic *)&comp_ptr->wqecs;
-	if (rte_atomic_load_explicit(wqecs, rte_memory_order_relaxed) != 0)
-		dpi_conf->stats.errors++;
+	if (rte_atomic_load_explicit((RTE_ATOMIC(uint64_t) *)&op->impl_opaque[0],
+				     rte_memory_order_relaxed) != 0)
+		rte_atomic_fetch_add_explicit((RTE_ATOMIC(uint64_t) *)&dpi_conf->stats.errors, 1,
+					      rte_memory_order_relaxed);
 
 	/* Take into account errors also. This is similar to
 	 * cnxk_dmadev_completed_status().
 	 */
-	dpi_conf->stats.completed++;
-	rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me);
-
-	op = (struct rte_event_dma_adapter_op *)comp_ptr->op;
-
-	rte_mempool_put(dpi_conf->adapter_info.req_mp, comp_ptr);
+	rte_atomic_fetch_add_explicit((RTE_ATOMIC(uint64_t) *)&dpi_conf->stats.completed, 1,
+				      rte_memory_order_relaxed);
 
 	return (uintptr_t)op;
 }
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index a2a59b16c9..98db11ad61 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -739,31 +739,6 @@ cnxk_crypto_adapter_qp_del(const struct rte_cryptodev *cdev,
 	return 0;
 }
 
-static int
-dma_adapter_vchan_setup(const int16_t dma_dev_id, struct cnxk_dpi_conf *vchan,
-			uint16_t vchan_id)
-{
-	char name[RTE_MEMPOOL_NAMESIZE];
-	uint32_t cache_size, nb_req;
-	unsigned int req_size;
-
-	snprintf(name, RTE_MEMPOOL_NAMESIZE, "cnxk_dma_req_%u:%u", dma_dev_id, vchan_id);
-	req_size = sizeof(struct cnxk_dpi_compl_s);
-
-	nb_req = vchan->c_desc.max_cnt;
-	cache_size = 16;
-	nb_req += (cache_size * rte_lcore_count());
-
-	vchan->adapter_info.req_mp = rte_mempool_create(name, nb_req, req_size, cache_size, 0,
-							NULL, NULL, NULL, NULL, rte_socket_id(), 0);
-	if (vchan->adapter_info.req_mp == NULL)
-		return -ENOMEM;
-
-	vchan->adapter_info.enabled = true;
-
-	return 0;
-}
-
 int
 cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev,
 			   const int16_t dma_dev_id, uint16_t vchan_id)
@@ -772,7 +747,6 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev,
 	uint32_t adptr_xae_cnt = 0;
 	struct cnxk_dpi_vf_s *dpivf;
 	struct cnxk_dpi_conf *vchan;
-	int ret;
 
 	dpivf = rte_dma_fp_objs[dma_dev_id].dev_private;
 	if ((int16_t)vchan_id == -1) {
@@ -780,19 +754,13 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev,
 
 		for (vchan_id = 0; vchan_id < dpivf->num_vchans; vchan_id++) {
 			vchan = &dpivf->conf[vchan_id];
-			ret = dma_adapter_vchan_setup(dma_dev_id, vchan, vchan_id);
-			if (ret) {
-				cnxk_dma_adapter_vchan_del(dma_dev_id, -1);
-				return ret;
-			}
-			adptr_xae_cnt += vchan->adapter_info.req_mp->size;
+			vchan->adapter_enabled = true;
+			adptr_xae_cnt += vchan->c_desc.max_cnt;
 		}
 	} else {
 		vchan = &dpivf->conf[vchan_id];
-		ret = dma_adapter_vchan_setup(dma_dev_id, vchan, vchan_id);
-		if (ret)
-			return ret;
-		adptr_xae_cnt = vchan->adapter_info.req_mp->size;
+		vchan->adapter_enabled = true;
+		adptr_xae_cnt = vchan->c_desc.max_cnt;
 	}
 
 	/* Update dma adapter XAE count */
@@ -805,8 +773,7 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev,
 static int
 dma_adapter_vchan_free(struct cnxk_dpi_conf *vchan)
 {
-	rte_mempool_free(vchan->adapter_info.req_mp);
-	vchan->adapter_info.enabled = false;
+	vchan->adapter_enabled = false;
 
 	return 0;
 }
@@ -823,12 +790,12 @@ cnxk_dma_adapter_vchan_del(const int16_t dma_dev_id, uint16_t vchan_id)
 
 		for (vchan_id = 0; vchan_id < dpivf->num_vchans; vchan_id++) {
 			vchan = &dpivf->conf[vchan_id];
-			if (vchan->adapter_info.enabled)
+			if (vchan->adapter_enabled)
 				dma_adapter_vchan_free(vchan);
 		}
 	} else {
 		vchan = &dpivf->conf[vchan_id];
-		if (vchan->adapter_info.enabled)
+		if (vchan->adapter_enabled)
 			dma_adapter_vchan_free(vchan);
 	}
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 17+ messages in thread

* RE: [EXTERNAL] Re: [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops
  2024-05-30 12:23       ` Jerin Jacob
@ 2024-05-30 17:35         ` Pavan Nikhilesh Bhagavatula
  0 siblings, 0 replies; 17+ messages in thread
From: Pavan Nikhilesh Bhagavatula @ 2024-05-30 17:35 UTC (permalink / raw)
  To: Jerin Jacob, Amit Prakash Shukla; +Cc: Jerin Jacob, Vamsi Krishna Attunuru, dev

> On Thu, May 16, 2024 at 1:09 PM Amit Prakash Shukla
> <amitprakashs@marvell.com> wrote:
> >
> > > -----Original Message-----
> > > From: pbhagavatula@marvell.com <pbhagavatula@marvell.com>
> > > Sent: Wednesday, April 17, 2024 1:57 PM
> > > To: Jerin Jacob <jerinj@marvell.com>; Amit Prakash Shukla
> > > <amitprakashs@marvell.com>; Vamsi Krishna Attunuru
> > > <vattunuru@marvell.com>
> > > Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula
> > > <pbhagavatula@marvell.com>
> > > Subject: [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops
> > >
> > > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > >
> > > Re-organize event DMA ops structure to allow holding source and
> destination
> > > pointers without the need for additional memory, the mempool allocating
> > > memory for rte_event_dma_adapter_ops can size the structure to
> > > accommodate all the needed source and destination pointers.
> > >
> > > Add multiple words for holding user metadata, adapter implementation
> > > specific metadata and event metadata.
> > >
> > > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > > ---
> > >  v3 Changes:
> > >  - Fix stdatomic compilation.
> > >  v2 Changes:
> > >  - Fix 32bit compilation
> > >
> > >  app/test-eventdev/test_perf_common.c        | 26 ++++--------
> > >  app/test/test_event_dma_adapter.c           | 20 +++------
> > >  doc/guides/prog_guide/event_dma_adapter.rst |  2 +-
> > >  drivers/dma/cnxk/cnxk_dmadev_fp.c           | 39 +++++++----------
> 
> Spotted driver change in library patch. Please split.

Since this change modifies the fastpath structure rte_event_dma_adapter_op, 
some driver changes are required, I have moved the non-relevant changes to
2/2.

> 
> > >  lib/eventdev/rte_event_dma_adapter.c        | 27 ++++--------
> > >  lib/eventdev/rte_event_dma_adapter.h        | 46 +++++++++++++++------
> > >  6 files changed, 72 insertions(+), 88 deletions(-)
> > >
> >
> > Acked-by: Amit Prakash Shukla <amitprakashs@marvell.com>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v4 1/2] eventdev/dma: reorganize event DMA ops
  2024-05-30 12:44     ` [PATCH v4 " pbhagavatula
  2024-05-30 12:44       ` [PATCH v4 2/2] dma/cnxk: remove completion pool pbhagavatula
@ 2024-06-07 10:20       ` Jerin Jacob
  2024-06-07 10:36       ` [PATCH v5 " pbhagavatula
  2 siblings, 0 replies; 17+ messages in thread
From: Jerin Jacob @ 2024-06-07 10:20 UTC (permalink / raw)
  To: pbhagavatula; +Cc: jerinj, Amit Prakash Shukla, Vamsi Attunuru, dev

On Thu, May 30, 2024 at 6:14 PM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Re-organize event DMA ops structure to allow holding
> source and destination pointers without the need for
> additional memory, the mempool allocating memory for
> rte_event_dma_adapter_ops can size the structure to
> accommodate all the needed source and destination
> pointers.
>
> Add multiple words for holding user metadata, adapter
> implementation specific metadata and event metadata.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> Acked-by: Amit Prakash Shukla <amitprakashs@marvell.com>
> ---
>  v4 Changes:
>  - Reduce unreleated driver changes and move to 2/2.
>  v3 Changes:
>  - Fix stdatomic compilation.
>  v2 Changes:
>  - Fix 32bit compilation
>
>  app/test-eventdev/test_perf_common.c        | 26 ++++--------
>  app/test/test_event_dma_adapter.c           | 20 +++------
>  doc/guides/prog_guide/event_dma_adapter.rst |  2 +-
>  drivers/dma/cnxk/cnxk_dmadev_fp.c           | 20 ++++-----
>  lib/eventdev/rte_event_dma_adapter.c        | 27 ++++--------
>  lib/eventdev/rte_event_dma_adapter.h        | 46 +++++++++++++++------
>  6 files changed, 66 insertions(+), 75 deletions(-)

>   * instance.
>   */
>  struct rte_event_dma_adapter_op {
> -       struct rte_dma_sge *src_seg;


Even though it is experimental , Changes in public structure need to
updated in release notes.
Please send the next version.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v5 1/2] eventdev/dma: reorganize event DMA ops
  2024-05-30 12:44     ` [PATCH v4 " pbhagavatula
  2024-05-30 12:44       ` [PATCH v4 2/2] dma/cnxk: remove completion pool pbhagavatula
  2024-06-07 10:20       ` [PATCH v4 1/2] eventdev/dma: reorganize event DMA ops Jerin Jacob
@ 2024-06-07 10:36       ` pbhagavatula
  2024-06-07 10:36         ` [PATCH v5 2/2] dma/cnxk: remove completion pool pbhagavatula
  2024-06-08  6:16         ` [PATCH v5 1/2] eventdev/dma: reorganize event DMA ops Jerin Jacob
  2 siblings, 2 replies; 17+ messages in thread
From: pbhagavatula @ 2024-06-07 10:36 UTC (permalink / raw)
  To: jerinj, Amit Prakash Shukla, Vamsi Attunuru; +Cc: dev, Pavan Nikhilesh

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Re-organize event DMA ops structure to allow holding
source and destination pointers without the need for
additional memory, the mempool allocating memory for
rte_event_dma_adapter_ops can size the structure to
accommodate all the needed source and destination
pointers.

Add multiple words for holding user metadata, adapter
implementation specific metadata and event metadata.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Amit Prakash Shukla <amitprakashs@marvell.com>
---
 v5 Changes:
 - Update release notes with Experimental API changes.
 v4 Changes:
 - Reduce unreleated driver changes and move to 2/2.
 v3 Changes:
 - Fix stdatomic compilation.
 v2 Changes:
 - Fix 32bit compilation

 app/test-eventdev/test_perf_common.c        | 26 ++++--------
 app/test/test_event_dma_adapter.c           | 20 +++------
 doc/guides/prog_guide/event_dma_adapter.rst |  2 +-
 doc/guides/rel_notes/release_24_07.rst      |  3 ++
 drivers/dma/cnxk/cnxk_dmadev_fp.c           | 20 ++++-----
 lib/eventdev/rte_event_dma_adapter.c        | 27 ++++--------
 lib/eventdev/rte_event_dma_adapter.h        | 46 +++++++++++++++------
 7 files changed, 69 insertions(+), 75 deletions(-)

diff --git a/app/test-eventdev/test_perf_common.c b/app/test-eventdev/test_perf_common.c
index 93e6132de8..db0f9c1f3b 100644
--- a/app/test-eventdev/test_perf_common.c
+++ b/app/test-eventdev/test_perf_common.c
@@ -1503,7 +1503,6 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
 		prod = 0;
 		for (; port < perf_nb_event_ports(opt); port++) {
 			struct prod_data *p = &t->prod[port];
-			struct rte_event *response_info;
 			uint32_t flow_id;

 			p->dev_id = opt->dev_id;
@@ -1523,13 +1522,10 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
 			for (flow_id = 0; flow_id < t->nb_flows; flow_id++) {
 				rte_mempool_get(t->da_op_pool, (void **)&op);

-				op->src_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0);
-				op->dst_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0);
-
-				op->src_seg->addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
-				op->dst_seg->addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
-				op->src_seg->length = 1024;
-				op->dst_seg->length = 1024;
+				op->src_dst_seg[0].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
+				op->src_dst_seg[1].addr = rte_pktmbuf_iova(rte_pktmbuf_alloc(pool));
+				op->src_dst_seg[0].length = 1024;
+				op->src_dst_seg[1].length = 1024;
 				op->nb_src = 1;
 				op->nb_dst = 1;
 				op->flags = RTE_DMA_OP_FLAG_SUBMIT;
@@ -1537,12 +1533,6 @@ perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt,
 				op->dma_dev_id = dma_dev_id;
 				op->vchan = vchan_id;

-				response_info = (struct rte_event *)((uint8_t *)op +
-						 sizeof(struct rte_event_dma_adapter_op));
-				response_info->queue_id = p->queue_id;
-				response_info->sched_type = RTE_SCHED_TYPE_ATOMIC;
-				response_info->flow_id = flow_id;
-
 				p->da.dma_op[flow_id] = op;
 			}

@@ -2036,7 +2026,7 @@ perf_dmadev_setup(struct evt_test *test, struct evt_options *opt)
 		return -ENODEV;
 	}

-	elt_size = sizeof(struct rte_event_dma_adapter_op) + sizeof(struct rte_event);
+	elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2);
 	t->da_op_pool = rte_mempool_create("dma_op_pool", opt->pool_sz, elt_size, 256,
 					   0, NULL, NULL, NULL, NULL, rte_socket_id(), 0);
 	if (t->da_op_pool == NULL) {
@@ -2085,10 +2075,8 @@ perf_dmadev_destroy(struct evt_test *test, struct evt_options *opt)
 		for (flow_id = 0; flow_id < t->nb_flows; flow_id++) {
 			op = p->da.dma_op[flow_id];

-			rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_seg->addr);
-			rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->dst_seg->addr);
-			rte_free(op->src_seg);
-			rte_free(op->dst_seg);
+			rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_dst_seg[0].addr);
+			rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)op->src_dst_seg[1].addr);
 			rte_mempool_put(op->op_mp, op);
 		}

diff --git a/app/test/test_event_dma_adapter.c b/app/test/test_event_dma_adapter.c
index 35b417b69f..d9dff4ff7d 100644
--- a/app/test/test_event_dma_adapter.c
+++ b/app/test/test_event_dma_adapter.c
@@ -235,7 +235,6 @@ test_op_forward_mode(void)
 	struct rte_mbuf *dst_mbuf[TEST_MAX_OP];
 	struct rte_event_dma_adapter_op *op;
 	struct rte_event ev[TEST_MAX_OP];
-	struct rte_event response_info;
 	int ret, i;

 	ret = rte_pktmbuf_alloc_bulk(params.src_mbuf_pool, src_mbuf, TEST_MAX_OP);
@@ -253,14 +252,11 @@ test_op_forward_mode(void)
 		rte_mempool_get(params.op_mpool, (void **)&op);
 		TEST_ASSERT_NOT_NULL(op, "Failed to allocate dma operation struct\n");

-		op->src_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0);
-		op->dst_seg = rte_malloc(NULL, sizeof(struct rte_dma_sge), 0);
-
 		/* Update Op */
-		op->src_seg->addr = rte_pktmbuf_iova(src_mbuf[i]);
-		op->dst_seg->addr = rte_pktmbuf_iova(dst_mbuf[i]);
-		op->src_seg->length = PACKET_LENGTH;
-		op->dst_seg->length = PACKET_LENGTH;
+		op->src_dst_seg[0].addr = rte_pktmbuf_iova(src_mbuf[i]);
+		op->src_dst_seg[1].addr = rte_pktmbuf_iova(dst_mbuf[i]);
+		op->src_dst_seg[0].length = PACKET_LENGTH;
+		op->src_dst_seg[1].length = PACKET_LENGTH;
 		op->nb_src = 1;
 		op->nb_dst = 1;
 		op->flags = RTE_DMA_OP_FLAG_SUBMIT;
@@ -268,10 +264,6 @@ test_op_forward_mode(void)
 		op->dma_dev_id = TEST_DMA_DEV_ID;
 		op->vchan = TEST_DMA_VCHAN_ID;

-		response_info.event = dma_response_info.event;
-		rte_memcpy((uint8_t *)op + sizeof(struct rte_event_dma_adapter_op), &response_info,
-			   sizeof(struct rte_event));
-
 		/* Fill in event info and update event_ptr with rte_event_dma_adapter_op */
 		memset(&ev[i], 0, sizeof(struct rte_event));
 		ev[i].event = 0;
@@ -294,8 +286,6 @@ test_op_forward_mode(void)

 		TEST_ASSERT_EQUAL(ret, 0, "Data mismatch for dma adapter\n");

-		rte_free(op->src_seg);
-		rte_free(op->dst_seg);
 		rte_mempool_put(op->op_mp, op);
 	}

@@ -400,7 +390,7 @@ configure_dmadev(void)
 						       rte_socket_id());
 	RTE_TEST_ASSERT_NOT_NULL(params.dst_mbuf_pool, "Can't create DMA_DST_MBUFPOOL\n");

-	elt_size = sizeof(struct rte_event_dma_adapter_op) + sizeof(struct rte_event);
+	elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2);
 	params.op_mpool = rte_mempool_create("EVENT_DMA_OP_POOL", DMA_OP_POOL_SIZE, elt_size, 0,
 					     0, NULL, NULL, NULL, NULL, rte_socket_id(), 0);
 	RTE_TEST_ASSERT_NOT_NULL(params.op_mpool, "Can't create DMA_OP_POOL\n");
diff --git a/doc/guides/prog_guide/event_dma_adapter.rst b/doc/guides/prog_guide/event_dma_adapter.rst
index 3443b6a803..1fb9b0a07b 100644
--- a/doc/guides/prog_guide/event_dma_adapter.rst
+++ b/doc/guides/prog_guide/event_dma_adapter.rst
@@ -144,7 +144,7 @@ on which it enqueues events towards the DMA adapter using ``rte_event_enqueue_bu
    uint32_t cap;
    int ret;

-   /* Fill in event info and update event_ptr with rte_dma_op */
+   /* Fill in event info and update event_ptr with rte_event_dma_adapter_op */
    memset(&ev, 0, sizeof(ev));
    .
    .
diff --git a/doc/guides/rel_notes/release_24_07.rst b/doc/guides/rel_notes/release_24_07.rst
index a69f24cf99..7800cb4c31 100644
--- a/doc/guides/rel_notes/release_24_07.rst
+++ b/doc/guides/rel_notes/release_24_07.rst
@@ -84,6 +84,9 @@ API Changes
    Also, make sure to start the actual text at the margin.
    =======================================================

+* eventdev: Reorganize the fastpath structure ``rte_event_dma_adapter_op``
+  to optimize the memory layout and improve performance.
+

 ABI Changes
 -----------
diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c
index f6562b603e..8a3c0c1008 100644
--- a/drivers/dma/cnxk/cnxk_dmadev_fp.c
+++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c
@@ -490,8 +490,8 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 		hdr[1] = ((uint64_t)comp_ptr);
 		hdr[2] = cnxk_dma_adapter_format_event(rsp_info->event);

-		src = &op->src_seg[0];
-		dst = &op->dst_seg[0];
+		src = &op->src_dst_seg[0];
+		dst = &op->src_dst_seg[op->nb_src];

 		if (CNXK_TAG_IS_HEAD(work->gw_rdata) ||
 		    ((CNXK_TT_FROM_TAG(work->gw_rdata) == SSO_TT_ORDERED) &&
@@ -566,12 +566,12 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event
 		 * For all other cases, src pointers are first pointers.
 		 */
 		if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) {
-			fptr = &op->dst_seg[0];
-			lptr = &op->src_seg[0];
+			fptr = &op->src_dst_seg[nb_src];
+			lptr = &op->src_dst_seg[0];
 			RTE_SWAP(nb_src, nb_dst);
 		} else {
-			fptr = &op->src_seg[0];
-			lptr = &op->dst_seg[0];
+			fptr = &op->src_dst_seg[0];
+			lptr = &op->src_dst_seg[nb_src];
 		}

 		hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48;
@@ -647,12 +647,12 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 		 * For all other cases, src pointers are first pointers.
 		 */
 		if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) {
-			fptr = &op->dst_seg[0];
-			lptr = &op->src_seg[0];
+			fptr = &op->src_dst_seg[nb_src];
+			lptr = &op->src_dst_seg[0];
 			RTE_SWAP(nb_src, nb_dst);
 		} else {
-			fptr = &op->src_seg[0];
-			lptr = &op->dst_seg[0];
+			fptr = &op->src_dst_seg[0];
+			lptr = &op->src_dst_seg[nb_src];
 		}

 		hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48;
diff --git a/lib/eventdev/rte_event_dma_adapter.c b/lib/eventdev/rte_event_dma_adapter.c
index 24dff556db..e52ef46a1b 100644
--- a/lib/eventdev/rte_event_dma_adapter.c
+++ b/lib/eventdev/rte_event_dma_adapter.c
@@ -236,9 +236,9 @@ edma_circular_buffer_flush_to_dma_dev(struct event_dma_adapter *adapter,
 				      uint16_t vchan, uint16_t *nb_ops_flushed)
 {
 	struct rte_event_dma_adapter_op *op;
-	struct dma_vchan_info *tq;
 	uint16_t *head = &bufp->head;
 	uint16_t *tail = &bufp->tail;
+	struct dma_vchan_info *tq;
 	uint16_t n;
 	uint16_t i;
 	int ret;
@@ -257,11 +257,13 @@ edma_circular_buffer_flush_to_dma_dev(struct event_dma_adapter *adapter,
 	for (i = 0; i < n; i++)	{
 		op = bufp->op_buffer[*head];
 		if (op->nb_src == 1 && op->nb_dst == 1)
-			ret = rte_dma_copy(dma_dev_id, vchan, op->src_seg->addr, op->dst_seg->addr,
-					   op->src_seg->length, op->flags);
+			ret = rte_dma_copy(dma_dev_id, vchan, op->src_dst_seg[0].addr,
+					   op->src_dst_seg[1].addr, op->src_dst_seg[0].length,
+					   op->flags);
 		else
-			ret = rte_dma_copy_sg(dma_dev_id, vchan, op->src_seg, op->dst_seg,
-					      op->nb_src, op->nb_dst, op->flags);
+			ret = rte_dma_copy_sg(dma_dev_id, vchan, &op->src_dst_seg[0],
+					      &op->src_dst_seg[op->nb_src], op->nb_src, op->nb_dst,
+					      op->flags);
 		if (ret < 0)
 			break;

@@ -511,8 +513,7 @@ edma_enq_to_dma_dev(struct event_dma_adapter *adapter, struct rte_event *ev, uns
 		if (dma_op == NULL)
 			continue;

-		/* Expected to have response info appended to dma_op. */
-
+		dma_op->impl_opaque[0] = ev[i].event;
 		dma_dev_id = dma_op->dma_dev_id;
 		vchan = dma_op->vchan;
 		vchan_qinfo = &adapter->dma_devs[dma_dev_id].vchanq[vchan];
@@ -647,7 +648,6 @@ edma_ops_enqueue_burst(struct event_dma_adapter *adapter, struct rte_event_dma_a
 	uint8_t event_port_id = adapter->event_port_id;
 	uint8_t event_dev_id = adapter->eventdev_id;
 	struct rte_event events[DMA_BATCH_SIZE];
-	struct rte_event *response_info;
 	uint16_t nb_enqueued, nb_ev;
 	uint8_t retry;
 	uint8_t i;
@@ -659,16 +659,7 @@ edma_ops_enqueue_burst(struct event_dma_adapter *adapter, struct rte_event_dma_a
 	for (i = 0; i < num; i++) {
 		struct rte_event *ev = &events[nb_ev++];

-		/* Expected to have response info appended to dma_op. */
-		response_info = (struct rte_event *)((uint8_t *)ops[i] +
-							  sizeof(struct rte_event_dma_adapter_op));
-		if (unlikely(response_info == NULL)) {
-			if (ops[i] != NULL && ops[i]->op_mp != NULL)
-				rte_mempool_put(ops[i]->op_mp, ops[i]);
-			continue;
-		}
-
-		rte_memcpy(ev, response_info, sizeof(struct rte_event));
+		ev->event = ops[i]->impl_opaque[0];
 		ev->event_ptr = ops[i];
 		ev->event_type = RTE_EVENT_TYPE_DMADEV;
 		if (adapter->implicit_release_disabled)
diff --git a/lib/eventdev/rte_event_dma_adapter.h b/lib/eventdev/rte_event_dma_adapter.h
index e924ab673d..048ddba3f3 100644
--- a/lib/eventdev/rte_event_dma_adapter.h
+++ b/lib/eventdev/rte_event_dma_adapter.h
@@ -157,24 +157,46 @@ extern "C" {
  * instance.
  */
 struct rte_event_dma_adapter_op {
-	struct rte_dma_sge *src_seg;
-	/**< Source segments. */
-	struct rte_dma_sge *dst_seg;
-	/**< Destination segments. */
-	uint16_t nb_src;
-	/**< Number of source segments. */
-	uint16_t nb_dst;
-	/**< Number of destination segments. */
 	uint64_t flags;
 	/**< Flags related to the operation.
 	 * @see RTE_DMA_OP_FLAG_*
 	 */
-	int16_t dma_dev_id;
-	/**< DMA device ID to be used */
-	uint16_t vchan;
-	/**< DMA vchan ID to be used */
 	struct rte_mempool *op_mp;
 	/**< Mempool from which op is allocated. */
+	enum rte_dma_status_code status;
+	/**< Status code for this operation. */
+	uint32_t rsvd;
+	/**< Reserved for future use. */
+	uint64_t impl_opaque[2];
+	/**< Implementation-specific opaque data.
+	 * An dma device implementation use this field to hold
+	 * implementation specific values to share between dequeue and enqueue
+	 * operations.
+	 * The application should not modify this field.
+	 */
+	uint64_t user_meta;
+	/**<  Memory to store user specific metadata.
+	 * The dma device implementation should not modify this area.
+	 */
+	uint64_t event_meta;
+	/**< Event metadata that defines event attributes when used in OP_NEW mode.
+	 * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_NEW
+	 * @see struct rte_event::event
+	 */
+	int16_t dma_dev_id;
+	/**< DMA device ID to be used with OP_FORWARD mode.
+	 * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD
+	 */
+	uint16_t vchan;
+	/**< DMA vchan ID to be used with OP_FORWARD mode
+	 * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD
+	 */
+	uint16_t nb_src;
+	/**< Number of source segments. */
+	uint16_t nb_dst;
+	/**< Number of destination segments. */
+	struct rte_dma_sge src_dst_seg[0];
+	/**< Source and destination segments. */
 };

 /**
--
2.25.1


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v5 2/2] dma/cnxk: remove completion pool
  2024-06-07 10:36       ` [PATCH v5 " pbhagavatula
@ 2024-06-07 10:36         ` pbhagavatula
  2024-06-08  6:16         ` [PATCH v5 1/2] eventdev/dma: reorganize event DMA ops Jerin Jacob
  1 sibling, 0 replies; 17+ messages in thread
From: pbhagavatula @ 2024-06-07 10:36 UTC (permalink / raw)
  To: jerinj, Vamsi Attunuru, Pavan Nikhilesh, Shijith Thotton; +Cc: dev

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Use DMA ops to store metadata, remove use of completion pool.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Vamsi Attunuru <vattunuru@marvell.com>
---
 drivers/dma/cnxk/cnxk_dmadev.c           |  53 ++++--------
 drivers/dma/cnxk/cnxk_dmadev.h           |  24 +-----
 drivers/dma/cnxk/cnxk_dmadev_fp.c        | 101 ++++++-----------------
 drivers/event/cnxk/cnxk_eventdev_adptr.c |  47 ++---------
 4 files changed, 54 insertions(+), 171 deletions(-)

diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c
index 4ab3cfbdf2..dfd7222713 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.c
+++ b/drivers/dma/cnxk/cnxk_dmadev.c
@@ -2,6 +2,8 @@
  * Copyright (C) 2021 Marvell International Ltd.
  */
 
+#include <rte_event_dma_adapter.h>
+
 #include <cnxk_dmadev.h>
 
 static int cnxk_stats_reset(struct rte_dma_dev *dev, uint16_t vchan);
@@ -30,8 +32,7 @@ cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan)
 {
 	struct cnxk_dpi_conf *dpi_conf;
 	uint16_t num_vchans;
-	uint16_t max_desc;
-	int i, j;
+	int i;
 
 	if (vchan == RTE_DMA_ALL_VCHAN) {
 		num_vchans = dpivf->num_vchans;
@@ -46,12 +47,6 @@ cnxk_dmadev_vchan_free(struct cnxk_dpi_vf_s *dpivf, uint16_t vchan)
 
 	for (; i < num_vchans; i++) {
 		dpi_conf = &dpivf->conf[i];
-		max_desc = dpi_conf->c_desc.max_cnt + 1;
-		if (dpi_conf->c_desc.compl_ptr) {
-			for (j = 0; j < max_desc; j++)
-				rte_free(dpi_conf->c_desc.compl_ptr[j]);
-		}
-
 		rte_free(dpi_conf->c_desc.compl_ptr);
 		dpi_conf->c_desc.compl_ptr = NULL;
 	}
@@ -261,7 +256,7 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
 	if (max_desc > CNXK_DPI_MAX_DESC)
 		max_desc = CNXK_DPI_MAX_DESC;
 
-	size = (max_desc * sizeof(struct cnxk_dpi_compl_s *));
+	size = (max_desc * sizeof(uint8_t) * CNXK_DPI_COMPL_OFFSET);
 	dpi_conf->c_desc.compl_ptr = rte_zmalloc(NULL, size, 0);
 
 	if (dpi_conf->c_desc.compl_ptr == NULL) {
@@ -269,16 +264,8 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < max_desc; i++) {
-		dpi_conf->c_desc.compl_ptr[i] =
-			rte_zmalloc(NULL, sizeof(struct cnxk_dpi_compl_s), 0);
-		if (!dpi_conf->c_desc.compl_ptr[i]) {
-			plt_err("Failed to allocate for descriptor memory");
-			return -ENOMEM;
-		}
-
-		dpi_conf->c_desc.compl_ptr[i]->cdata = CNXK_DPI_REQ_CDATA;
-	}
+	for (i = 0; i < max_desc; i++)
+		dpi_conf->c_desc.compl_ptr[i * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
 
 	dpi_conf->c_desc.max_cnt = (max_desc - 1);
 
@@ -301,10 +288,8 @@ cnxk_dmadev_start(struct rte_dma_dev *dev)
 		dpi_conf->pnum_words = 0;
 		dpi_conf->pending = 0;
 		dpi_conf->desc_idx = 0;
-		for (j = 0; j < dpi_conf->c_desc.max_cnt + 1; j++) {
-			if (dpi_conf->c_desc.compl_ptr[j])
-				dpi_conf->c_desc.compl_ptr[j]->cdata = CNXK_DPI_REQ_CDATA;
-		}
+		for (j = 0; j < dpi_conf->c_desc.max_cnt + 1; j++)
+			dpi_conf->c_desc.compl_ptr[j * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
 		nb_desc += dpi_conf->c_desc.max_cnt + 1;
 		cnxk_stats_reset(dev, i);
 		dpi_conf->completed_offset = 0;
@@ -382,22 +367,22 @@ cnxk_dmadev_completed(void *dev_private, uint16_t vchan, const uint16_t nb_cpls,
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	struct cnxk_dpi_cdesc_data_s *c_desc = &dpi_conf->c_desc;
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t status;
 	int cnt;
 
 	for (cnt = 0; cnt < nb_cpls; cnt++) {
-		comp_ptr = c_desc->compl_ptr[c_desc->head];
-
-		if (comp_ptr->cdata) {
-			if (comp_ptr->cdata == CNXK_DPI_REQ_CDATA)
+		status = c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET];
+		if (status) {
+			if (status == CNXK_DPI_REQ_CDATA)
 				break;
 			*has_error = 1;
 			dpi_conf->stats.errors++;
+			c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] =
+				CNXK_DPI_REQ_CDATA;
 			CNXK_DPI_STRM_INC(*c_desc, head);
 			break;
 		}
-
-		comp_ptr->cdata = CNXK_DPI_REQ_CDATA;
+		c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
 		CNXK_DPI_STRM_INC(*c_desc, head);
 	}
 
@@ -414,19 +399,17 @@ cnxk_dmadev_completed_status(void *dev_private, uint16_t vchan, const uint16_t n
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	struct cnxk_dpi_cdesc_data_s *c_desc = &dpi_conf->c_desc;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	int cnt;
 
 	for (cnt = 0; cnt < nb_cpls; cnt++) {
-		comp_ptr = c_desc->compl_ptr[c_desc->head];
-		status[cnt] = comp_ptr->cdata;
+		status[cnt] = c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET];
 		if (status[cnt]) {
 			if (status[cnt] == CNXK_DPI_REQ_CDATA)
 				break;
 
 			dpi_conf->stats.errors++;
 		}
-		comp_ptr->cdata = CNXK_DPI_REQ_CDATA;
+		c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
 		CNXK_DPI_STRM_INC(*c_desc, head);
 	}
 
@@ -593,7 +576,7 @@ cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_de
 	rdpi = &dpivf->rdpi;
 
 	rdpi->pci_dev = pci_dev;
-	rc = roc_dpi_dev_init(rdpi, offsetof(struct cnxk_dpi_compl_s, wqecs));
+	rc = roc_dpi_dev_init(rdpi, offsetof(struct rte_event_dma_adapter_op, impl_opaque));
 	if (rc < 0)
 		goto err_out_free;
 
diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h
index 610a360ba2..a80db333a0 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.h
+++ b/drivers/dma/cnxk/cnxk_dmadev.h
@@ -37,17 +37,12 @@
 #define CNXK_DPI_MAX_CMD_SZ		    CNXK_DPI_CMD_LEN(CNXK_DPI_MAX_POINTER,		\
 							     CNXK_DPI_MAX_POINTER)
 #define CNXK_DPI_CHUNKS_FROM_DESC(cz, desc) (((desc) / (((cz) / 8) / CNXK_DPI_MAX_CMD_SZ)) + 1)
-
+#define CNXK_DPI_COMPL_OFFSET		    ROC_CACHE_LINE_SZ
 /* Set Completion data to 0xFF when request submitted,
  * upon successful request completion engine reset to completion status
  */
 #define CNXK_DPI_REQ_CDATA 0xFF
 
-/* Set Completion data to 0xDEADBEEF when request submitted for SSO.
- * This helps differentiate if the dequeue is called after cnxk enueue.
- */
-#define CNXK_DPI_REQ_SSO_CDATA    0xDEADBEEF
-
 union cnxk_dpi_instr_cmd {
 	uint64_t u;
 	struct cn9k_dpi_instr_cmd {
@@ -91,24 +86,11 @@ union cnxk_dpi_instr_cmd {
 	} cn10k;
 };
 
-struct cnxk_dpi_compl_s {
-	uint64_t cdata;
-	void *op;
-	uint16_t dev_id;
-	uint16_t vchan;
-	uint32_t wqecs;
-};
-
 struct cnxk_dpi_cdesc_data_s {
-	struct cnxk_dpi_compl_s **compl_ptr;
 	uint16_t max_cnt;
 	uint16_t head;
 	uint16_t tail;
-};
-
-struct cnxk_dma_adapter_info {
-	bool enabled;               /* Set if vchan queue is added to dma adapter. */
-	struct rte_mempool *req_mp; /* DMA inflight request mempool. */
+	uint8_t *compl_ptr;
 };
 
 struct cnxk_dpi_conf {
@@ -119,7 +101,7 @@ struct cnxk_dpi_conf {
 	uint16_t desc_idx;
 	struct rte_dma_stats stats;
 	uint64_t completed_offset;
-	struct cnxk_dma_adapter_info adapter_info;
+	bool adapter_enabled;
 };
 
 struct cnxk_dpi_vf_s {
diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c
index 8a3c0c1008..26591235c6 100644
--- a/drivers/dma/cnxk/cnxk_dmadev_fp.c
+++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c
@@ -245,14 +245,14 @@ cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t d
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	uint64_t cmd[CNXK_DPI_DW_PER_SINGLE_CMD];
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t *comp_ptr;
 	int rc;
 
 	if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) ==
 		     dpi_conf->c_desc.head))
 		return -ENOSPC;
 
-	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
+	comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET];
 	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);
 
 	cmd[0] = (1UL << 54) | (1UL << 48);
@@ -301,7 +301,7 @@ cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	const struct rte_dma_sge *fptr, *lptr;
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t *comp_ptr;
 	uint64_t hdr[4];
 	int rc;
 
@@ -309,7 +309,7 @@ cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 		     dpi_conf->c_desc.head))
 		return -ENOSPC;
 
-	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
+	comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET];
 	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);
 
 	hdr[1] = dpi_conf->cmd.u | ((flags & RTE_DMA_OP_FLAG_AUTO_FREE) << 37);
@@ -357,14 +357,14 @@ cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
 	uint64_t cmd[CNXK_DPI_DW_PER_SINGLE_CMD];
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t *comp_ptr;
 	int rc;
 
 	if (unlikely(((dpi_conf->c_desc.tail + 1) & dpi_conf->c_desc.max_cnt) ==
 		     dpi_conf->c_desc.head))
 		return -ENOSPC;
 
-	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
+	comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET];
 	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);
 
 	cmd[0] = dpi_conf->cmd.u | (1U << 6) | 1U;
@@ -403,7 +403,7 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 {
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
-	struct cnxk_dpi_compl_s *comp_ptr;
+	uint8_t *comp_ptr;
 	uint64_t hdr[4];
 	int rc;
 
@@ -411,7 +411,7 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 		     dpi_conf->c_desc.head))
 		return -ENOSPC;
 
-	comp_ptr = dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail];
+	comp_ptr = &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET];
 	CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);
 
 	hdr[0] = dpi_conf->cmd.u | (nb_dst << 6) | nb_src;
@@ -454,10 +454,8 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 {
 	const struct rte_dma_sge *src, *dst;
 	struct rte_event_dma_adapter_op *op;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
-	struct rte_event *rsp_info;
 	struct cn10k_sso_hws *work;
 	uint16_t nb_src, nb_dst;
 	rte_mcslock_t mcs_lock_me;
@@ -469,34 +467,23 @@ cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 
 	for (count = 0; count < nb_events; count++) {
 		op = ev[count].event_ptr;
-		rsp_info = (struct rte_event *)((uint8_t *)op +
-			     sizeof(struct rte_event_dma_adapter_op));
-		dpivf =	rte_dma_fp_objs[op->dma_dev_id].dev_private;
+		dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
 		dpi_conf = &dpivf->conf[op->vchan];
 
-		if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
-			return count;
-
-		comp_ptr->op = op;
-		comp_ptr->dev_id = op->dma_dev_id;
-		comp_ptr->vchan = op->vchan;
-		comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA;
-
 		nb_src = op->nb_src & CNXK_DPI_MAX_POINTER;
 		nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER;
 
 		hdr[0] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 54);
 		hdr[0] |= (nb_dst << 6) | nb_src;
-		hdr[1] = ((uint64_t)comp_ptr);
-		hdr[2] = cnxk_dma_adapter_format_event(rsp_info->event);
+		hdr[1] = (uint64_t)op;
+		hdr[2] = cnxk_dma_adapter_format_event(ev[count].event);
 
 		src = &op->src_dst_seg[0];
 		dst = &op->src_dst_seg[op->nb_src];
 
 		if (CNXK_TAG_IS_HEAD(work->gw_rdata) ||
 		    ((CNXK_TT_FROM_TAG(work->gw_rdata) == SSO_TT_ORDERED) &&
-		    (rsp_info->sched_type & DPI_HDR_TT_MASK) ==
-			    RTE_SCHED_TYPE_ORDERED))
+		     (ev[count].sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED))
 			roc_sso_hws_head_wait(work->base);
 
 		rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
@@ -528,7 +515,6 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event
 {
 	const struct rte_dma_sge *fptr, *lptr;
 	struct rte_event_dma_adapter_op *op;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cn9k_sso_hws_dual *work;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
@@ -548,16 +534,8 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event
 		dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
 		dpi_conf = &dpivf->conf[op->vchan];
 
-		if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
-			return count;
-
-		comp_ptr->op = op;
-		comp_ptr->dev_id = op->dma_dev_id;
-		comp_ptr->vchan = op->vchan;
-		comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA;
-
 		hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36);
-		hdr[2] = (uint64_t)comp_ptr;
+		hdr[2] = (uint64_t)op;
 
 		nb_src = op->nb_src & CNXK_DPI_MAX_POINTER;
 		nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER;
@@ -609,10 +587,8 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 {
 	const struct rte_dma_sge *fptr, *lptr;
 	struct rte_event_dma_adapter_op *op;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
-	struct rte_event *rsp_info;
 	struct cn9k_sso_hws *work;
 	uint16_t nb_src, nb_dst;
 	rte_mcslock_t mcs_lock_me;
@@ -624,21 +600,11 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 
 	for (count = 0; count < nb_events; count++) {
 		op = ev[count].event_ptr;
-		rsp_info = (struct rte_event *)((uint8_t *)op +
-			    sizeof(struct rte_event_dma_adapter_op));
-		dpivf =	rte_dma_fp_objs[op->dma_dev_id].dev_private;
+		dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
 		dpi_conf = &dpivf->conf[op->vchan];
 
-		if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
-			return count;
-
-		comp_ptr->op = op;
-		comp_ptr->dev_id = op->dma_dev_id;
-		comp_ptr->vchan = op->vchan;
-		comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA;
-
 		hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36);
-		hdr[2] = (uint64_t)comp_ptr;
+		hdr[2] = (uint64_t)op;
 
 		nb_src = op->nb_src & CNXK_DPI_MAX_POINTER;
 		nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER;
@@ -656,9 +622,9 @@ cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 		}
 
 		hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48;
-		hdr[0] |= cnxk_dma_adapter_format_event(rsp_info->event);
+		hdr[0] |= cnxk_dma_adapter_format_event(ev[count].event);
 
-		if ((rsp_info->sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED)
+		if ((ev[count].sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED)
 			roc_sso_hws_head_wait(work->base);
 
 		rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
@@ -689,38 +655,23 @@ uintptr_t
 cnxk_dma_adapter_dequeue(uintptr_t get_work1)
 {
 	struct rte_event_dma_adapter_op *op;
-	struct cnxk_dpi_compl_s *comp_ptr;
 	struct cnxk_dpi_conf *dpi_conf;
 	struct cnxk_dpi_vf_s *dpivf;
-	rte_mcslock_t mcs_lock_me;
-	RTE_ATOMIC(uint8_t) *wqecs;
 
-	comp_ptr = (struct cnxk_dpi_compl_s *)get_work1;
+	op = (struct rte_event_dma_adapter_op *)get_work1;
+	dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
+	dpi_conf = &dpivf->conf[op->vchan];
 
-	/* Dequeue can be called without calling cnx_enqueue in case of
-	 * dma_adapter. When its called from adapter, dma op will not be
-	 * embedded in completion pointer. In those cases return op.
-	 */
-	if (comp_ptr->cdata != CNXK_DPI_REQ_SSO_CDATA)
-		return (uintptr_t)comp_ptr;
-
-	dpivf =	rte_dma_fp_objs[comp_ptr->dev_id].dev_private;
-	dpi_conf = &dpivf->conf[comp_ptr->vchan];
-
-	rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
-	wqecs = (uint8_t __rte_atomic *)&comp_ptr->wqecs;
-	if (rte_atomic_load_explicit(wqecs, rte_memory_order_relaxed) != 0)
-		dpi_conf->stats.errors++;
+	if (rte_atomic_load_explicit((RTE_ATOMIC(uint64_t) *)&op->impl_opaque[0],
+				     rte_memory_order_relaxed) != 0)
+		rte_atomic_fetch_add_explicit((RTE_ATOMIC(uint64_t) *)&dpi_conf->stats.errors, 1,
+					      rte_memory_order_relaxed);
 
 	/* Take into account errors also. This is similar to
 	 * cnxk_dmadev_completed_status().
 	 */
-	dpi_conf->stats.completed++;
-	rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me);
-
-	op = (struct rte_event_dma_adapter_op *)comp_ptr->op;
-
-	rte_mempool_put(dpi_conf->adapter_info.req_mp, comp_ptr);
+	rte_atomic_fetch_add_explicit((RTE_ATOMIC(uint64_t) *)&dpi_conf->stats.completed, 1,
+				      rte_memory_order_relaxed);
 
 	return (uintptr_t)op;
 }
diff --git a/drivers/event/cnxk/cnxk_eventdev_adptr.c b/drivers/event/cnxk/cnxk_eventdev_adptr.c
index a2a59b16c9..98db11ad61 100644
--- a/drivers/event/cnxk/cnxk_eventdev_adptr.c
+++ b/drivers/event/cnxk/cnxk_eventdev_adptr.c
@@ -739,31 +739,6 @@ cnxk_crypto_adapter_qp_del(const struct rte_cryptodev *cdev,
 	return 0;
 }
 
-static int
-dma_adapter_vchan_setup(const int16_t dma_dev_id, struct cnxk_dpi_conf *vchan,
-			uint16_t vchan_id)
-{
-	char name[RTE_MEMPOOL_NAMESIZE];
-	uint32_t cache_size, nb_req;
-	unsigned int req_size;
-
-	snprintf(name, RTE_MEMPOOL_NAMESIZE, "cnxk_dma_req_%u:%u", dma_dev_id, vchan_id);
-	req_size = sizeof(struct cnxk_dpi_compl_s);
-
-	nb_req = vchan->c_desc.max_cnt;
-	cache_size = 16;
-	nb_req += (cache_size * rte_lcore_count());
-
-	vchan->adapter_info.req_mp = rte_mempool_create(name, nb_req, req_size, cache_size, 0,
-							NULL, NULL, NULL, NULL, rte_socket_id(), 0);
-	if (vchan->adapter_info.req_mp == NULL)
-		return -ENOMEM;
-
-	vchan->adapter_info.enabled = true;
-
-	return 0;
-}
-
 int
 cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev,
 			   const int16_t dma_dev_id, uint16_t vchan_id)
@@ -772,7 +747,6 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev,
 	uint32_t adptr_xae_cnt = 0;
 	struct cnxk_dpi_vf_s *dpivf;
 	struct cnxk_dpi_conf *vchan;
-	int ret;
 
 	dpivf = rte_dma_fp_objs[dma_dev_id].dev_private;
 	if ((int16_t)vchan_id == -1) {
@@ -780,19 +754,13 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev,
 
 		for (vchan_id = 0; vchan_id < dpivf->num_vchans; vchan_id++) {
 			vchan = &dpivf->conf[vchan_id];
-			ret = dma_adapter_vchan_setup(dma_dev_id, vchan, vchan_id);
-			if (ret) {
-				cnxk_dma_adapter_vchan_del(dma_dev_id, -1);
-				return ret;
-			}
-			adptr_xae_cnt += vchan->adapter_info.req_mp->size;
+			vchan->adapter_enabled = true;
+			adptr_xae_cnt += vchan->c_desc.max_cnt;
 		}
 	} else {
 		vchan = &dpivf->conf[vchan_id];
-		ret = dma_adapter_vchan_setup(dma_dev_id, vchan, vchan_id);
-		if (ret)
-			return ret;
-		adptr_xae_cnt = vchan->adapter_info.req_mp->size;
+		vchan->adapter_enabled = true;
+		adptr_xae_cnt = vchan->c_desc.max_cnt;
 	}
 
 	/* Update dma adapter XAE count */
@@ -805,8 +773,7 @@ cnxk_dma_adapter_vchan_add(const struct rte_eventdev *event_dev,
 static int
 dma_adapter_vchan_free(struct cnxk_dpi_conf *vchan)
 {
-	rte_mempool_free(vchan->adapter_info.req_mp);
-	vchan->adapter_info.enabled = false;
+	vchan->adapter_enabled = false;
 
 	return 0;
 }
@@ -823,12 +790,12 @@ cnxk_dma_adapter_vchan_del(const int16_t dma_dev_id, uint16_t vchan_id)
 
 		for (vchan_id = 0; vchan_id < dpivf->num_vchans; vchan_id++) {
 			vchan = &dpivf->conf[vchan_id];
-			if (vchan->adapter_info.enabled)
+			if (vchan->adapter_enabled)
 				dma_adapter_vchan_free(vchan);
 		}
 	} else {
 		vchan = &dpivf->conf[vchan_id];
-		if (vchan->adapter_info.enabled)
+		if (vchan->adapter_enabled)
 			dma_adapter_vchan_free(vchan);
 	}
 
-- 
2.25.1


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v5 1/2] eventdev/dma: reorganize event DMA ops
  2024-06-07 10:36       ` [PATCH v5 " pbhagavatula
  2024-06-07 10:36         ` [PATCH v5 2/2] dma/cnxk: remove completion pool pbhagavatula
@ 2024-06-08  6:16         ` Jerin Jacob
  1 sibling, 0 replies; 17+ messages in thread
From: Jerin Jacob @ 2024-06-08  6:16 UTC (permalink / raw)
  To: pbhagavatula; +Cc: jerinj, Amit Prakash Shukla, Vamsi Attunuru, dev

On Fri, Jun 7, 2024 at 11:53 PM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Re-organize event DMA ops structure to allow holding
> source and destination pointers without the need for
> additional memory, the mempool allocating memory for
> rte_event_dma_adapter_ops can size the structure to
> accommodate all the needed source and destination
> pointers.
>
> Add multiple words for holding user metadata, adapter
> implementation specific metadata and event metadata.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> Acked-by: Amit Prakash Shukla <amitprakashs@marvell.com>
> ---
>  v5 Changes:
>  - Update release notes with Experimental API changes.
>  v4 Changes:
>  - Reduce unreleated driver changes and move to 2/2.
>  v3 Changes:
>  - Fix stdatomic compilation.
>  v2 Changes:
>  - Fix 32bit compilation
>

>     .
> diff --git a/doc/guides/rel_notes/release_24_07.rst b/doc/guides/rel_notes/release_24_07.rst
> index a69f24cf99..7800cb4c31 100644
> --- a/doc/guides/rel_notes/release_24_07.rst
> +++ b/doc/guides/rel_notes/release_24_07.rst
> @@ -84,6 +84,9 @@ API Changes

It is not API change. Applied following diff and Applied series to
dpdk-next-eventdev/for-main. Thanks


[for-main][dpdk-next-eventdev] $ git diff
diff --git a/doc/guides/rel_notes/release_24_07.rst
b/doc/guides/rel_notes/release_24_07.rst
index 09e58dddf2..14bd5d37b1 100644
--- a/doc/guides/rel_notes/release_24_07.rst
+++ b/doc/guides/rel_notes/release_24_07.rst
@@ -91,9 +91,6 @@ API Changes
    Also, make sure to start the actual text at the margin.
    =======================================================

-* eventdev: Reorganize the fastpath structure ``rte_event_dma_adapter_op``
-  to optimize the memory layout and improve performance.
-

 ABI Changes
 -----------
@@ -112,6 +109,9 @@ ABI Changes

 * No ABI change that would break compatibility with 23.11.

+* eventdev/dma: Reorganize the experimental fastpath structure
``rte_event_dma_adapter_op``
+  to optimize the memory layout and improve performance.
+

>     Also, make sure to start the actual text at the margin.
>     =======================================================
>
> +* eventdev: Reorganize the fastpath structure ``rte_event_dma_adapter_op``
> +  to optimize the memory layout and improve performance.
> +
>
>  ABI Changes

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2024-06-08  6:17 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-06 10:13 [PATCH 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula
2024-04-06 10:13 ` [PATCH 2/2] dma/cnxk: remove completion pool pbhagavatula
2024-04-16  8:56   ` Vamsi Krishna Attunuru
2024-04-17  5:58 ` [PATCH v2 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula
2024-04-17  5:58   ` [PATCH v2 2/2] dma/cnxk: remove completion pool pbhagavatula
2024-04-17  8:26   ` [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops pbhagavatula
2024-04-17  8:26     ` [PATCH v3 2/2] dma/cnxk: remove completion pool pbhagavatula
2024-05-16  7:39     ` [PATCH v3 1/2] eventdev/dma: reorganize event DMA ops Amit Prakash Shukla
2024-05-30 12:23       ` Jerin Jacob
2024-05-30 17:35         ` [EXTERNAL] " Pavan Nikhilesh Bhagavatula
2024-05-30 12:44     ` [PATCH v4 " pbhagavatula
2024-05-30 12:44       ` [PATCH v4 2/2] dma/cnxk: remove completion pool pbhagavatula
2024-06-07 10:20       ` [PATCH v4 1/2] eventdev/dma: reorganize event DMA ops Jerin Jacob
2024-06-07 10:36       ` [PATCH v5 " pbhagavatula
2024-06-07 10:36         ` [PATCH v5 2/2] dma/cnxk: remove completion pool pbhagavatula
2024-06-08  6:16         ` [PATCH v5 1/2] eventdev/dma: reorganize event DMA ops Jerin Jacob
2024-05-16  7:36 ` [PATCH " Amit Prakash Shukla

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).