* [25.11 PATCH 1/3] dmadev: add enqueue dequeue operations
2025-04-16 10:09 [25.11 PATCH 0/3] Introduce DMA enqueue/dequeue operations pbhagavatula
@ 2025-04-16 10:09 ` pbhagavatula
2025-04-24 7:01 ` fengchengwen
2025-04-16 10:09 ` [25.11 PATCH 2/3] dma/cnxk: implement enqueue dequeue ops pbhagavatula
2025-04-16 10:09 ` [25.11 PATCH 3/3] eventdev: refactor DMA adapter ops pbhagavatula
2 siblings, 1 reply; 5+ messages in thread
From: pbhagavatula @ 2025-04-16 10:09 UTC (permalink / raw)
To: jerinj, Chengwen Feng, Kevin Laatz, Bruce Richardson,
Gagandeep Singh, Sachin Saxena, Hemant Agrawal
Cc: vattunuru, conor.walsh, gmuthukrishn, vvelumuri, anatoly.burakov,
dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add enqueue/dequeue operations that use struct rte_dma_op
to communicate with the dma device.
These operations need to be enabled at dma device configuration
time by setting the flag rte_dma_conf::enable_enq_deq if the
device supports RTE_DMA_CAPA_OPS_ENQ_DEQ capability.
The enqueue dequeue operations are not compatible with
rte_dma_copy, rte_dma_copy_sg, rte_dma_fill, rte_dma_submit,
rte_dma_completed, rte_dma_completed_status range of APIs.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
app/test/test_dmadev.c | 160 +++++++++++++++++++++++++++
app/test/test_dmadev_api.c | 78 +++++++++++--
doc/guides/prog_guide/dmadev.rst | 34 ++++++
drivers/dma/dpaa/dpaa_qdma.c | 2 +-
drivers/dma/dpaa2/dpaa2_qdma.c | 2 +-
lib/dmadev/rte_dmadev.c | 30 ++++-
lib/dmadev/rte_dmadev.h | 155 ++++++++++++++++++++++++--
lib/dmadev/rte_dmadev_core.h | 10 ++
lib/dmadev/rte_dmadev_trace.h | 2 +-
lib/dmadev/rte_dmadev_trace_fp.h | 20 ++++
lib/dmadev/rte_dmadev_trace_points.c | 6 +
11 files changed, 477 insertions(+), 22 deletions(-)
diff --git a/app/test/test_dmadev.c b/app/test/test_dmadev.c
index 9cbb9a6552..e9a62a0ddf 100644
--- a/app/test/test_dmadev.c
+++ b/app/test/test_dmadev.c
@@ -1052,6 +1052,147 @@ prepare_m2d_auto_free(int16_t dev_id, uint16_t vchan)
return 0;
}
+static int
+test_enq_deq_ops(int16_t dev_id, uint16_t vchan)
+{
+#define BURST_SIZE 16
+#define ROUNDS 2E7
+#define CPY_LEN 64
+ struct rte_mempool *ops_pool, *pkt_pool;
+ struct rte_mbuf *mbufs[BURST_SIZE * 2];
+ struct rte_dma_op *ops[BURST_SIZE];
+ uint64_t enq_lat, deq_lat, start;
+ int ret, i, j, enq, deq, n, max;
+ struct rte_dma_sge ssg, dsg;
+ struct rte_dma_info info;
+ uint64_t tenq, tdeq;
+
+ memset(&info, 0, sizeof(info));
+ ret = rte_dma_info_get(dev_id, &info);
+ if (ret != 0)
+ ERR_RETURN("Error with rte_dma_info_get()\n");
+
+ pkt_pool = rte_pktmbuf_pool_create("pkt_pool", info.max_desc * 2, 0, 0,
+ CPY_LEN + RTE_PKTMBUF_HEADROOM, rte_socket_id());
+ if (pkt_pool == NULL)
+ ERR_RETURN("Error creating pkt pool\n");
+
+ ops_pool = rte_mempool_create("ops_pool", info.max_desc,
+ sizeof(struct rte_dma_op) + (sizeof(struct rte_dma_sge) * 2),
+ 0, 0, NULL, NULL, NULL, NULL, rte_socket_id(), 0);
+ if (ops_pool == NULL)
+ ERR_RETURN("Error creating ops pool\n");
+
+ max = info.max_desc - BURST_SIZE;
+ tenq = 0;
+ tdeq = 0;
+ enq_lat = 0;
+ deq_lat = 0;
+
+ for (i = 0; i < ROUNDS / max; i++) {
+ n = 0;
+ while (n != max) {
+ if (rte_mempool_get_bulk(ops_pool, (void **)ops, BURST_SIZE) != 0)
+ continue;
+
+ if (rte_pktmbuf_alloc_bulk(pkt_pool, mbufs, BURST_SIZE * 2) != 0)
+ ERR_RETURN("Error allocating mbufs %d\n", n);
+
+ for (j = 0; j < BURST_SIZE; j++) {
+ ops[j]->src_dst_seg[0].addr = rte_pktmbuf_iova(mbufs[j]);
+ ops[j]->src_dst_seg[1].addr =
+ rte_pktmbuf_iova(mbufs[j + BURST_SIZE]);
+ ops[j]->src_dst_seg[0].length = CPY_LEN;
+ ops[j]->src_dst_seg[1].length = CPY_LEN;
+
+ ops[j]->nb_src = 1;
+ ops[j]->nb_dst = 1;
+ ops[j]->user_meta = (uint64_t)mbufs[j];
+ ops[j]->event_meta = (uint64_t)mbufs[j + BURST_SIZE];
+
+ memset((void *)(uintptr_t)ops[j]->src_dst_seg[0].addr,
+ rte_rand() & 0xFF, CPY_LEN);
+ memset((void *)(uintptr_t)ops[j]->src_dst_seg[1].addr, 0, CPY_LEN);
+ }
+
+ start = rte_rdtsc_precise();
+ enq = rte_dma_enqueue_ops(dev_id, vchan, ops, BURST_SIZE);
+ while (enq != BURST_SIZE) {
+ enq += rte_dma_enqueue_ops(dev_id, vchan, ops + enq,
+ BURST_SIZE - enq);
+ }
+
+ enq_lat += rte_rdtsc_precise() - start;
+ n += enq;
+ }
+ tenq += n;
+
+ memset(ops, 0, sizeof(ops));
+ n = 0;
+ while (n != max) {
+ start = rte_rdtsc_precise();
+ deq = rte_dma_dequeue_ops(dev_id, vchan, ops, BURST_SIZE);
+ while (deq != BURST_SIZE) {
+ deq += rte_dma_dequeue_ops(dev_id, vchan, ops + deq,
+ BURST_SIZE - deq);
+ }
+ n += deq;
+ deq_lat += rte_rdtsc_precise() - start;
+
+ for (j = 0; j < deq; j++) {
+ /* check the data is correct */
+ ssg = ops[j]->src_dst_seg[0];
+ dsg = ops[j]->src_dst_seg[1];
+ if (memcmp((void *)(uintptr_t)ssg.addr, (void *)(uintptr_t)dsg.addr,
+ ssg.length) != 0)
+ ERR_RETURN("Error with copy operation\n");
+ rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)ops[j]->user_meta);
+ rte_pktmbuf_free((struct rte_mbuf *)(uintptr_t)ops[j]->event_meta);
+ }
+ rte_mempool_put_bulk(ops_pool, (void **)ops, BURST_SIZE);
+ }
+ tdeq += n;
+
+ printf("\rEnqueued %" PRIu64 " Latency %.3f Dequeued %" PRIu64 " Latency %.3f",
+ tenq, (double)enq_lat / tenq, tdeq, (double)deq_lat / tdeq);
+ }
+ printf("\n");
+
+ rte_mempool_free(pkt_pool);
+ rte_mempool_free(ops_pool);
+
+ return 0;
+}
+
+static int
+prepare_enq_deq_ops(int16_t dev_id, uint16_t vchan)
+{
+ const struct rte_dma_conf conf = {.nb_vchans = 1, .flags = RTE_DMA_CFG_FLAG_ENQ_DEQ};
+ struct rte_dma_vchan_conf qconf;
+ struct rte_dma_info info;
+
+ memset(&qconf, 0, sizeof(qconf));
+ memset(&info, 0, sizeof(info));
+
+ int ret = rte_dma_info_get(dev_id, &info);
+ if (ret != 0)
+ ERR_RETURN("Error with rte_dma_info_get()\n");
+
+ qconf.direction = RTE_DMA_DIR_MEM_TO_MEM;
+ qconf.nb_desc = info.max_desc;
+
+ if (rte_dma_stop(dev_id) < 0)
+ ERR_RETURN("Error stopping device %u\n", dev_id);
+ if (rte_dma_configure(dev_id, &conf) != 0)
+ ERR_RETURN("Error with rte_dma_configure()\n");
+ if (rte_dma_vchan_setup(dev_id, vchan, &qconf) < 0)
+ ERR_RETURN("Error with queue configuration\n");
+ if (rte_dma_start(dev_id) != 0)
+ ERR_RETURN("Error with rte_dma_start()\n");
+
+ return 0;
+}
+
static int
test_dmadev_sg_copy_setup(void)
{
@@ -1129,6 +1270,20 @@ test_dmadev_autofree_setup(void)
return ret;
}
+static int
+test_dmadev_enq_deq_setup(void)
+{
+ int ret = TEST_SKIPPED;
+
+ if ((info.dev_capa & RTE_DMA_CAPA_OPS_ENQ_DEQ)) {
+ if (prepare_enq_deq_ops(test_dev_id, vchan) != 0)
+ return ret;
+ ret = TEST_SUCCESS;
+ }
+
+ return ret;
+}
+
static int
test_dmadev_setup(void)
{
@@ -1210,6 +1365,7 @@ test_dmadev_instance(int16_t dev_id)
TEST_ERR,
TEST_FILL,
TEST_M2D,
+ TEST_ENQ_DEQ,
TEST_END
};
@@ -1221,6 +1377,7 @@ test_dmadev_instance(int16_t dev_id)
{"error_handling", test_completion_handling, 1},
{"fill", test_enqueue_fill, 1},
{"m2d_auto_free", test_m2d_auto_free, 128},
+ {"dma_enq_deq", test_enq_deq_ops, 1},
};
static struct unit_test_suite ts = {
@@ -1249,6 +1406,9 @@ test_dmadev_instance(int16_t dev_id)
TEST_CASE_NAMED_WITH_DATA("m2d_autofree",
test_dmadev_autofree_setup, NULL,
runtest, ¶m[TEST_M2D]),
+ TEST_CASE_NAMED_WITH_DATA("dma_enq_deq",
+ test_dmadev_enq_deq_setup, NULL,
+ runtest, ¶m[TEST_ENQ_DEQ]),
TEST_CASES_END()
}
};
diff --git a/app/test/test_dmadev_api.c b/app/test/test_dmadev_api.c
index fb49fcb56b..1ba053696b 100644
--- a/app/test/test_dmadev_api.c
+++ b/app/test/test_dmadev_api.c
@@ -159,7 +159,7 @@ test_dma_configure(void)
/* Check enable silent mode */
memset(&conf, 0, sizeof(conf));
conf.nb_vchans = info.max_vchans;
- conf.enable_silent = true;
+ conf.flags = RTE_DMA_CFG_FLAG_SILENT;
ret = rte_dma_configure(test_dev_id, &conf);
RTE_TEST_ASSERT(ret == -EINVAL, "Expected -EINVAL, %d", ret);
@@ -289,7 +289,7 @@ test_dma_vchan_setup(void)
}
static int
-setup_vchan(int nb_vchans)
+setup_vchan(int nb_vchans, bool ena_enq_deq)
{
struct rte_dma_vchan_conf vchan_conf = { 0 };
struct rte_dma_info dev_info = { 0 };
@@ -299,6 +299,7 @@ setup_vchan(int nb_vchans)
ret = rte_dma_info_get(test_dev_id, &dev_info);
RTE_TEST_ASSERT_SUCCESS(ret, "Failed to obtain device info, %d", ret);
dev_conf.nb_vchans = nb_vchans;
+ dev_conf.flags = ena_enq_deq ? RTE_DMA_CFG_FLAG_ENQ_DEQ : 0;
ret = rte_dma_configure(test_dev_id, &dev_conf);
RTE_TEST_ASSERT_SUCCESS(ret, "Failed to configure, %d", ret);
vchan_conf.direction = RTE_DMA_DIR_MEM_TO_MEM;
@@ -325,7 +326,7 @@ test_dma_start_stop(void)
RTE_TEST_ASSERT(ret == -EINVAL, "Expected -EINVAL, %d", ret);
/* Setup one vchan for later test */
- ret = setup_vchan(1);
+ ret = setup_vchan(1, 0);
RTE_TEST_ASSERT_SUCCESS(ret, "Failed to setup one vchan, %d", ret);
ret = rte_dma_start(test_dev_id);
@@ -359,7 +360,7 @@ test_dma_reconfigure(void)
return TEST_SKIPPED;
/* Setup one vchan for later test */
- ret = setup_vchan(1);
+ ret = setup_vchan(1, 0);
RTE_TEST_ASSERT_SUCCESS(ret, "Failed to setup one vchan, %d", ret);
ret = rte_dma_start(test_dev_id);
@@ -371,7 +372,7 @@ test_dma_reconfigure(void)
/* Check reconfigure and vchan setup after device stopped */
cfg_vchans = dev_conf.nb_vchans = (dev_info.max_vchans - 1);
- ret = setup_vchan(cfg_vchans);
+ ret = setup_vchan(cfg_vchans, 0);
RTE_TEST_ASSERT_SUCCESS(ret, "Failed to setup one vchan, %d", ret);
ret = rte_dma_start(test_dev_id);
@@ -403,7 +404,7 @@ test_dma_stats(void)
RTE_TEST_ASSERT(ret == -EINVAL, "Expected -EINVAL, %d", ret);
/* Setup one vchan for later test */
- ret = setup_vchan(1);
+ ret = setup_vchan(1, 0);
RTE_TEST_ASSERT_SUCCESS(ret, "Failed to setup one vchan, %d", ret);
/* Check for invalid vchan */
@@ -506,7 +507,7 @@ test_dma_completed(void)
int ret;
/* Setup one vchan for later test */
- ret = setup_vchan(1);
+ ret = setup_vchan(1, 0);
RTE_TEST_ASSERT_SUCCESS(ret, "Failed to setup one vchan, %d", ret);
ret = rte_dma_start(test_dev_id);
@@ -569,7 +570,7 @@ test_dma_completed_status(void)
int ret;
/* Setup one vchan for later test */
- ret = setup_vchan(1);
+ ret = setup_vchan(1, 0);
RTE_TEST_ASSERT_SUCCESS(ret, "Failed to setup one vchan, %d", ret);
ret = rte_dma_start(test_dev_id);
@@ -637,7 +638,7 @@ test_dma_sg(void)
n_sge = RTE_MIN(dev_info.max_sges, TEST_SG_MAX);
- ret = setup_vchan(1);
+ ret = setup_vchan(1, 0);
RTE_TEST_ASSERT_SUCCESS(ret, "Failed to setup one vchan, %d", ret);
ret = rte_dma_start(test_dev_id);
@@ -699,6 +700,64 @@ test_dma_sg(void)
return TEST_SUCCESS;
}
+static int
+test_dma_ops_enq_deq(void)
+{
+ struct rte_dma_info dev_info = {0};
+ struct rte_dma_op *ops;
+ int n_sge, i, ret;
+
+ ret = rte_dma_info_get(test_dev_id, &dev_info);
+ RTE_TEST_ASSERT_SUCCESS(ret, "Failed to obtain device info, %d", ret);
+ if ((dev_info.dev_capa & RTE_DMA_CAPA_OPS_ENQ_DEQ) == 0)
+ return TEST_SKIPPED;
+
+ n_sge = RTE_MIN(dev_info.max_sges, TEST_SG_MAX);
+
+ ret = setup_vchan(1, 1);
+ RTE_TEST_ASSERT_SUCCESS(ret, "Failed to setup one vchan, %d", ret);
+
+ ret = rte_dma_start(test_dev_id);
+ RTE_TEST_ASSERT_SUCCESS(ret, "Failed to start, %d", ret);
+
+ ops = rte_zmalloc(
+ "ops", sizeof(struct rte_dma_op) + ((2 * n_sge) * sizeof(struct rte_dma_sge)), 0);
+
+ for (i = 0; i < n_sge; i++) {
+ ops->src_dst_seg[i].addr = rte_malloc_virt2iova(src_sg[i]);
+ ops->src_dst_seg[i].length = TEST_MEMCPY_SIZE;
+ ops->src_dst_seg[n_sge + i].addr = rte_malloc_virt2iova(dst_sg[i]);
+ ops->src_dst_seg[n_sge + i].length = TEST_MEMCPY_SIZE;
+ }
+
+ ops->nb_src = n_sge;
+ ops->nb_dst = n_sge;
+ sg_memory_setup(n_sge);
+
+ /* Enqueue operations */
+ ret = rte_dma_enqueue_ops(test_dev_id, 0, &ops, 1);
+ RTE_TEST_ASSERT(ret == 1, "Failed to enqueue DMA operations, %d", ret);
+
+ rte_delay_us_sleep(TEST_WAIT_US_VAL);
+
+ ops = NULL;
+ /* Dequeue operations */
+ ret = rte_dma_dequeue_ops(test_dev_id, 0, &ops, 1);
+ RTE_TEST_ASSERT(ret == 1, "Failed to dequeue DMA operations, %d", ret);
+ RTE_TEST_ASSERT(ops != NULL, "Failed to dequeue DMA operations %p", ops);
+ /* Free allocated memory for ops */
+ rte_free(ops);
+
+ ret = sg_memory_verify(n_sge);
+ RTE_TEST_ASSERT_SUCCESS(ret, "Failed to verify memory");
+
+ /* Stop dmadev to make sure dmadev to a known state */
+ ret = rte_dma_stop(test_dev_id);
+ RTE_TEST_ASSERT_SUCCESS(ret, "Failed to stop, %d", ret);
+
+ return TEST_SUCCESS;
+}
+
static struct unit_test_suite dma_api_testsuite = {
.suite_name = "DMA API Test Suite",
.setup = testsuite_setup,
@@ -717,6 +776,7 @@ static struct unit_test_suite dma_api_testsuite = {
TEST_CASE(test_dma_completed),
TEST_CASE(test_dma_completed_status),
TEST_CASE(test_dma_sg),
+ TEST_CASE(test_dma_ops_enq_deq),
TEST_CASES_END()
}
};
diff --git a/doc/guides/prog_guide/dmadev.rst b/doc/guides/prog_guide/dmadev.rst
index 67a62ff420..11b20cc3d6 100644
--- a/doc/guides/prog_guide/dmadev.rst
+++ b/doc/guides/prog_guide/dmadev.rst
@@ -108,6 +108,40 @@ completed operations along with the status of each operation (filled into the
completed operation's ``ring_idx`` which could help user track operations within
their own application-defined rings.
+Alternatively, if the DMA device supports enqueue and dequeue operations, as
+indicated by ``RTE_DMA_CAPA_OPS_ENQ_DEQ`` capability in ``rte_dma_info::dev_capa``,
+the application can utilize the ``rte_dma_enqueue_ops`` and ``rte_dma_dequeue_ops``
+APIs.
+To enable this, the DMA device must be configured in operations mode by setting
+``RTE_DMA_CFG_FLAG_ENQ_DEQ`` flag in ``rte_dma_config::flags``.
+
+The following example demonstrates the usage of enqueue and dequeue operations:
+
+.. code-block:: C
+
+ struct rte_dma_op *op;
+
+ op = rte_zmalloc(sizeof(struct rte_dma_op) + (sizeof(struct rte_dma_sge) * 2), 0);
+
+ op->src_dst_seg[0].addr = src_addr;
+ op->src_dst_seg[0].length = src_len;
+ op->src_dst_seg[1].addr = dst_addr;
+ op->src_dst_seg[1].length = dst_len;
+
+
+ ret = rte_dma_enqueue_ops(dev_id, &op, 1);
+ if (ret < 0) {
+ PRINT_ERR("Failed to enqueue DMA op\n");
+ return -1;
+ }
+
+ op = NULL;
+ ret = rte_dma_dequeue_ops(dev_id, &op, 1);
+ if (ret < 0) {
+ PRINT_ERR("Failed to dequeue DMA op\n");
+ return -1;
+ }
+
Querying Device Statistics
~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/drivers/dma/dpaa/dpaa_qdma.c b/drivers/dma/dpaa/dpaa_qdma.c
index a541398e48..74e23d2ee5 100644
--- a/drivers/dma/dpaa/dpaa_qdma.c
+++ b/drivers/dma/dpaa/dpaa_qdma.c
@@ -954,7 +954,7 @@ dpaa_qdma_configure(struct rte_dma_dev *dmadev,
{
struct fsl_qdma_engine *fsl_qdma = dmadev->data->dev_private;
- fsl_qdma->is_silent = dev_conf->enable_silent;
+ fsl_qdma->is_silent = dev_conf->flags & RTE_DMA_CFG_FLAG_SILENT;
return 0;
}
diff --git a/drivers/dma/dpaa2/dpaa2_qdma.c b/drivers/dma/dpaa2/dpaa2_qdma.c
index 3c9a7b5485..ca18fe89c5 100644
--- a/drivers/dma/dpaa2/dpaa2_qdma.c
+++ b/drivers/dma/dpaa2/dpaa2_qdma.c
@@ -1277,7 +1277,7 @@ dpaa2_qdma_configure(struct rte_dma_dev *dev,
}
qdma_dev->num_vqs = dev_conf->nb_vchans;
- qdma_dev->is_silent = dev_conf->enable_silent;
+ qdma_dev->is_silent = dev_conf->flags & RTE_DMA_CFG_FLAG_SILENT;
return 0;
diff --git a/lib/dmadev/rte_dmadev.c b/lib/dmadev/rte_dmadev.c
index 17ee0808a9..73d24f8ff3 100644
--- a/lib/dmadev/rte_dmadev.c
+++ b/lib/dmadev/rte_dmadev.c
@@ -509,7 +509,7 @@ rte_dma_configure(int16_t dev_id, const struct rte_dma_conf *dev_conf)
"Device %d configure too many vchans", dev_id);
return -EINVAL;
}
- if (dev_conf->enable_silent &&
+ if ((dev_conf->flags & RTE_DMA_CFG_FLAG_SILENT) &&
!(dev_info.dev_capa & RTE_DMA_CAPA_SILENT)) {
RTE_DMA_LOG(ERR, "Device %d don't support silent", dev_id);
return -EINVAL;
@@ -521,6 +521,12 @@ rte_dma_configure(int16_t dev_id, const struct rte_dma_conf *dev_conf)
return -EINVAL;
}
+ if ((dev_conf->flags & RTE_DMA_CFG_FLAG_ENQ_DEQ) &&
+ !(dev_info.dev_capa & RTE_DMA_CAPA_OPS_ENQ_DEQ)) {
+ RTE_DMA_LOG(ERR, "Device %d don't support enqueue/dequeue", dev_id);
+ return -EINVAL;
+ }
+
if (dev->dev_ops->dev_configure == NULL)
return -ENOTSUP;
ret = dev->dev_ops->dev_configure(dev, dev_conf, sizeof(struct rte_dma_conf));
@@ -863,7 +869,9 @@ rte_dma_dump(int16_t dev_id, FILE *f)
(void)fprintf(f, " max_vchans_supported: %u\n", dev_info.max_vchans);
(void)fprintf(f, " nb_vchans_configured: %u\n", dev_info.nb_vchans);
(void)fprintf(f, " silent_mode: %s\n",
- dev->data->dev_conf.enable_silent ? "on" : "off");
+ dev->data->dev_conf.flags & RTE_DMA_CFG_FLAG_SILENT ? "on" : "off");
+ (void)fprintf(f, " ops_mode: %s\n",
+ dev->data->dev_conf.flags & RTE_DMA_CFG_FLAG_ENQ_DEQ ? "on" : "off");
if (dev->dev_ops->dev_dump != NULL)
ret = dev->dev_ops->dev_dump(dev, f);
@@ -937,6 +945,22 @@ dummy_burst_capacity(__rte_unused const void *dev_private,
return 0;
}
+static uint16_t
+dummy_enqueue(__rte_unused void *dev_private, __rte_unused uint16_t vchan,
+ __rte_unused struct rte_dma_op **ops, __rte_unused uint16_t nb_ops)
+{
+ RTE_DMA_LOG(ERR, "Enqueue not configured or not supported.");
+ return 0;
+}
+
+static uint16_t
+dummy_dequeue(__rte_unused void *dev_private, __rte_unused uint16_t vchan,
+ __rte_unused struct rte_dma_op **ops, __rte_unused uint16_t nb_ops)
+{
+ RTE_DMA_LOG(ERR, "Enqueue not configured or not supported.");
+ return 0;
+}
+
static void
dma_fp_object_dummy(struct rte_dma_fp_object *obj)
{
@@ -948,6 +972,8 @@ dma_fp_object_dummy(struct rte_dma_fp_object *obj)
obj->completed = dummy_completed;
obj->completed_status = dummy_completed_status;
obj->burst_capacity = dummy_burst_capacity;
+ obj->enqueue = dummy_enqueue;
+ obj->dequeue = dummy_dequeue;
}
static int
diff --git a/lib/dmadev/rte_dmadev.h b/lib/dmadev/rte_dmadev.h
index 550dbfbf75..d88424d699 100644
--- a/lib/dmadev/rte_dmadev.h
+++ b/lib/dmadev/rte_dmadev.h
@@ -275,8 +275,22 @@ int16_t rte_dma_next_dev(int16_t start_dev_id);
#define RTE_DMA_CAPA_OPS_COPY_SG RTE_BIT64(33)
/** Support fill operation. */
#define RTE_DMA_CAPA_OPS_FILL RTE_BIT64(34)
+/** Support enqueue and dequeue operations. */
+#define RTE_DMA_CAPA_OPS_ENQ_DEQ RTE_BIT64(35)
/**@}*/
+/** DMA device configuration flags.
+ * @see struct rte_dma_conf::flags
+ */
+/** Operate in silent mode
+ * @see RTE_DMA_CAPA_SILENT
+ */
+#define RTE_DMA_CFG_FLAG_SILENT RTE_BIT64(0)
+/** Enable enqueue and dequeue operations
+ * @see RTE_DMA_CAPA_OPS_ENQ_DEQ
+ */
+#define RTE_DMA_CFG_FLAG_ENQ_DEQ RTE_BIT64(1)
+
/**
* A structure used to retrieve the information of a DMA device.
*
@@ -335,14 +349,6 @@ struct rte_dma_conf {
* rte_dma_info which get from rte_dma_info_get().
*/
uint16_t nb_vchans;
- /** Indicates whether to enable silent mode.
- * false-default mode, true-silent mode.
- * This value can be set to true only when the SILENT capability is
- * supported.
- *
- * @see RTE_DMA_CAPA_SILENT
- */
- bool enable_silent;
/* The priority of the DMA device.
* This value should be lower than the field 'nb_priorities' of struct
* rte_dma_info which get from rte_dma_info_get(). If the DMA device
@@ -351,6 +357,8 @@ struct rte_dma_conf {
* Lowest value indicates higher priority and vice-versa.
*/
uint16_t priority;
+ /** DMA device configuration flags defined as RTE_DMA_CFG_FLAG_*. */
+ uint64_t flags;
};
/**
@@ -794,6 +802,63 @@ struct rte_dma_sge {
uint32_t length; /**< The DMA operation length. */
};
+/**
+ * A structure used to hold event based DMA operation entry. All the information
+ * required for a DMA transfer shall be populated in "struct rte_dma_op"
+ * instance.
+ */
+struct rte_dma_op {
+ uint64_t flags;
+ /**< Flags related to the operation.
+ * @see RTE_DMA_OP_FLAG_*
+ */
+ struct rte_mempool *op_mp;
+ /**< Mempool from which op is allocated. */
+ enum rte_dma_status_code status;
+ /**< Status code for this operation. */
+ uint32_t rsvd;
+ /**< Reserved for future use. */
+ uint64_t impl_opaque[2];
+ /**< Implementation-specific opaque data.
+ * An dma device implementation use this field to hold
+ * implementation specific values to share between dequeue and enqueue
+ * operations.
+ * The application should not modify this field.
+ */
+ uint64_t user_meta;
+ /**< Memory to store user specific metadata.
+ * The dma device implementation should not modify this area.
+ */
+ uint64_t event_meta;
+ /**< Event metadata of DMA completion event.
+ * Used when RTE_EVENT_DMA_ADAPTER_CAP_INTERNAL_PORT_VCHAN_EV_BIND is not
+ * supported in OP_NEW mode.
+ * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_NEW
+ * @see RTE_EVENT_DMA_ADAPTER_CAP_INTERNAL_PORT_VCHAN_EV_BIND
+ *
+ * Used when RTE_EVENT_DMA_ADAPTER_CAP_INTERNAL_PORT_OP_FWD is not
+ * supported in OP_FWD mode.
+ * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD
+ * @see RTE_EVENT_DMA_ADAPTER_CAP_INTERNAL_PORT_OP_FWD
+ *
+ * @see struct rte_event::event
+ */
+ int16_t dma_dev_id;
+ /**< DMA device ID to be used with OP_FORWARD mode.
+ * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD
+ */
+ uint16_t vchan;
+ /**< DMA vchan ID to be used with OP_FORWARD mode
+ * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD
+ */
+ uint16_t nb_src;
+ /**< Number of source segments. */
+ uint16_t nb_dst;
+ /**< Number of destination segments. */
+ struct rte_dma_sge src_dst_seg[0];
+ /**< Source and destination segments. */
+};
+
#ifdef __cplusplus
}
#endif
@@ -1153,6 +1218,80 @@ rte_dma_burst_capacity(int16_t dev_id, uint16_t vchan)
return ret;
}
+/**
+ * Enqueue rte_dma_ops to DMA device, can only be used underlying supports
+ * RTE_DMA_CAPA_OPS_ENQ_DEQ and rte_dma_conf::enable_enq_deq is enabled in
+ * rte_dma_configure()
+ * The ops enqueued will be immediately submitted to the DMA device.
+ * The enqueue should be coupled with dequeue to retrieve completed ops, calls
+ * to rte_dma_submit(), rte_dma_completed() and rte_dma_completed_status()
+ * are not valid.
+ *
+ * @param dev_id
+ * The identifier of the device.
+ * @param vchan
+ * The identifier of virtual DMA channel.
+ * @param ops
+ * Pointer to rte_dma_op array.
+ * @param nb_ops
+ * Number of rte_dma_op in the ops array
+ * @return uint16_t
+ * - Number of successfully submitted ops.
+ */
+static inline uint16_t
+rte_dma_enqueue_ops(int16_t dev_id, uint16_t vchan, struct rte_dma_op **ops, uint16_t nb_ops)
+{
+ struct rte_dma_fp_object *obj = &rte_dma_fp_objs[dev_id];
+ uint16_t ret;
+
+#ifdef RTE_DMADEV_DEBUG
+ if (!rte_dma_is_valid(dev_id))
+ return 0;
+ if (*obj->enqueue == NULL)
+ return 0;
+#endif
+
+ ret = (*obj->enqueue)(obj->dev_private, vchan, ops, nb_ops);
+ rte_dma_trace_enqueue_ops(dev_id, vchan, (void **)ops, nb_ops);
+
+ return ret;
+}
+
+/**
+ * Dequeue completed rte_dma_ops submitted to the DMA device, can only be used
+ * underlying supports RTE_DMA_CAPA_OPS_ENQ_DEQ and rte_dma_conf::enable_enq_deq
+ * is enabled in rte_dma_configure()
+ *
+ * @param dev_id
+ * The identifier of the device.
+ * @param vchan
+ * The identifier of virtual DMA channel.
+ * @param ops
+ * Pointer to rte_dma_op array.
+ * @param nb_ops
+ * Size of rte_dma_op array.
+ * @return
+ * - Number of successfully completed ops. Should be less or equal to nb_ops.
+ */
+static inline uint16_t
+rte_dma_dequeue_ops(int16_t dev_id, uint16_t vchan, struct rte_dma_op **ops, uint16_t nb_ops)
+{
+ struct rte_dma_fp_object *obj = &rte_dma_fp_objs[dev_id];
+ uint16_t ret;
+
+#ifdef RTE_DMADEV_DEBUG
+ if (!rte_dma_is_valid(dev_id))
+ return 0;
+ if (*obj->dequeue == NULL)
+ return 0;
+#endif
+
+ ret = (*obj->dequeue)(obj->dev_private, vchan, ops, nb_ops);
+ rte_dma_trace_dequeue_ops(dev_id, vchan, (void **)ops, nb_ops);
+
+ return ret;
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/dmadev/rte_dmadev_core.h b/lib/dmadev/rte_dmadev_core.h
index 29f52514d7..20a467178f 100644
--- a/lib/dmadev/rte_dmadev_core.h
+++ b/lib/dmadev/rte_dmadev_core.h
@@ -50,6 +50,14 @@ typedef uint16_t (*rte_dma_completed_status_t)(void *dev_private,
/** @internal Used to check the remaining space in descriptor ring. */
typedef uint16_t (*rte_dma_burst_capacity_t)(const void *dev_private, uint16_t vchan);
+/** @internal Used to enqueue a rte_dma_op to the dma engine. */
+typedef uint16_t (*rte_dma_enqueue_ops_t)(void *dev_private, uint16_t vchan,
+ struct rte_dma_op **ops, uint16_t nb_ops);
+
+/** @internal Used to dequeue rte_dma_op from the dma engine. */
+typedef uint16_t (*rte_dma_dequeue_ops_t)(void *dev_private, uint16_t vchan,
+ struct rte_dma_op **ops, uint16_t nb_ops);
+
/**
* @internal
* Fast-path dmadev functions and related data are hold in a flat array.
@@ -73,6 +81,8 @@ struct __rte_cache_aligned rte_dma_fp_object {
rte_dma_completed_t completed;
rte_dma_completed_status_t completed_status;
rte_dma_burst_capacity_t burst_capacity;
+ rte_dma_enqueue_ops_t enqueue;
+ rte_dma_dequeue_ops_t dequeue;
};
extern struct rte_dma_fp_object *rte_dma_fp_objs;
diff --git a/lib/dmadev/rte_dmadev_trace.h b/lib/dmadev/rte_dmadev_trace.h
index 1de92655f2..04d9a2741b 100644
--- a/lib/dmadev/rte_dmadev_trace.h
+++ b/lib/dmadev/rte_dmadev_trace.h
@@ -41,7 +41,7 @@ RTE_TRACE_POINT(
rte_trace_point_emit_i16(dev_id);
rte_trace_point_emit_u16(dev_conf->nb_vchans);
rte_trace_point_emit_u16(dev_conf->priority);
- rte_trace_point_emit_u8(dev_conf->enable_silent);
+ rte_trace_point_emit_u64(dev_conf->flags);
rte_trace_point_emit_int(ret);
)
diff --git a/lib/dmadev/rte_dmadev_trace_fp.h b/lib/dmadev/rte_dmadev_trace_fp.h
index a1374e78b7..3db655fa65 100644
--- a/lib/dmadev/rte_dmadev_trace_fp.h
+++ b/lib/dmadev/rte_dmadev_trace_fp.h
@@ -125,6 +125,26 @@ RTE_TRACE_POINT_FP(
rte_trace_point_emit_u16(ret);
)
+RTE_TRACE_POINT_FP(
+ rte_dma_trace_enqueue_ops,
+ RTE_TRACE_POINT_ARGS(int16_t dev_id, uint16_t vchan, void **ops,
+ uint16_t nb_ops),
+ rte_trace_point_emit_i16(dev_id);
+ rte_trace_point_emit_u16(vchan);
+ rte_trace_point_emit_ptr(ops);
+ rte_trace_point_emit_u16(nb_ops);
+)
+
+RTE_TRACE_POINT_FP(
+ rte_dma_trace_dequeue_ops,
+ RTE_TRACE_POINT_ARGS(int16_t dev_id, uint16_t vchan, void **ops,
+ uint16_t nb_ops),
+ rte_trace_point_emit_i16(dev_id);
+ rte_trace_point_emit_u16(vchan);
+ rte_trace_point_emit_ptr(ops);
+ rte_trace_point_emit_u16(nb_ops);
+)
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/dmadev/rte_dmadev_trace_points.c b/lib/dmadev/rte_dmadev_trace_points.c
index 1c8998fb98..9a97a44a9c 100644
--- a/lib/dmadev/rte_dmadev_trace_points.c
+++ b/lib/dmadev/rte_dmadev_trace_points.c
@@ -64,3 +64,9 @@ RTE_TRACE_POINT_REGISTER(rte_dma_trace_completed_status,
RTE_EXPORT_EXPERIMENTAL_SYMBOL(__rte_dma_trace_burst_capacity, 24.03)
RTE_TRACE_POINT_REGISTER(rte_dma_trace_burst_capacity,
lib.dmadev.burst_capacity)
+
+RTE_TRACE_POINT_REGISTER(rte_dma_trace_enqueue_ops,
+ lib.dmadev.enqueue_ops)
+
+RTE_TRACE_POINT_REGISTER(rte_dma_trace_dequeue_ops,
+ lib.dmadev.dequeue_ops)
--
2.43.0
^ permalink raw reply [flat|nested] 5+ messages in thread
* [25.11 PATCH 2/3] dma/cnxk: implement enqueue dequeue ops
2025-04-16 10:09 [25.11 PATCH 0/3] Introduce DMA enqueue/dequeue operations pbhagavatula
2025-04-16 10:09 ` [25.11 PATCH 1/3] dmadev: add enqueue dequeue operations pbhagavatula
@ 2025-04-16 10:09 ` pbhagavatula
2025-04-16 10:09 ` [25.11 PATCH 3/3] eventdev: refactor DMA adapter ops pbhagavatula
2 siblings, 0 replies; 5+ messages in thread
From: pbhagavatula @ 2025-04-16 10:09 UTC (permalink / raw)
To: jerinj, Vamsi Attunuru
Cc: g.singh, sachin.saxena, hemant.agrawal, fengchengwen,
bruce.richardson, kevin.laatz, conor.walsh, gmuthukrishn,
vvelumuri, anatoly.burakov, dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Implement DMA enqueue/dequeue operations when
application enables it via configuration.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
drivers/dma/cnxk/cnxk_dmadev.c | 58 ++++++++++---
drivers/dma/cnxk/cnxk_dmadev.h | 7 ++
drivers/dma/cnxk/cnxk_dmadev_fp.c | 140 ++++++++++++++++++++++++++++++
3 files changed, 191 insertions(+), 14 deletions(-)
diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c
index 90bb69011f..3e45ef7d02 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.c
+++ b/drivers/dma/cnxk/cnxk_dmadev.c
@@ -7,6 +7,7 @@
#include <cnxk_dmadev.h>
static int cnxk_stats_reset(struct rte_dma_dev *dev, uint16_t vchan);
+static void cnxk_set_fp_ops(struct rte_dma_dev *dev, uint8_t enable_enq_deq);
static int
cnxk_dmadev_info_get(const struct rte_dma_dev *dev, struct rte_dma_info *dev_info, uint32_t size)
@@ -19,7 +20,7 @@ cnxk_dmadev_info_get(const struct rte_dma_dev *dev, struct rte_dma_info *dev_inf
dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM | RTE_DMA_CAPA_MEM_TO_DEV |
RTE_DMA_CAPA_DEV_TO_MEM | RTE_DMA_CAPA_DEV_TO_DEV |
RTE_DMA_CAPA_OPS_COPY | RTE_DMA_CAPA_OPS_COPY_SG |
- RTE_DMA_CAPA_M2D_AUTO_FREE;
+ RTE_DMA_CAPA_M2D_AUTO_FREE | RTE_DMA_CAPA_OPS_ENQ_DEQ;
if (roc_feature_dpi_has_priority()) {
dev_info->dev_capa |= RTE_DMA_CAPA_PRI_POLICY_SP;
dev_info->nb_priorities = CN10K_DPI_MAX_PRI;
@@ -114,6 +115,8 @@ cnxk_dmadev_configure(struct rte_dma_dev *dev, const struct rte_dma_conf *conf,
if (roc_feature_dpi_has_priority())
dpivf->rdpi.priority = conf->priority;
+ cnxk_set_fp_ops(dev, conf->flags & RTE_DMA_CFG_FLAG_ENQ_DEQ);
+
return 0;
}
@@ -270,6 +273,14 @@ cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
return -ENOMEM;
}
+ size = (max_desc * sizeof(struct rte_dma_op *));
+ dpi_conf->c_desc.ops = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE);
+ if (dpi_conf->c_desc.ops == NULL) {
+ plt_err("Failed to allocate for ops array");
+ rte_free(dpi_conf->c_desc.compl_ptr);
+ return -ENOMEM;
+ }
+
for (i = 0; i < max_desc; i++)
dpi_conf->c_desc.compl_ptr[i * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
@@ -528,6 +539,37 @@ cnxk_stats_reset(struct rte_dma_dev *dev, uint16_t vchan)
return 0;
}
+static void
+cnxk_set_fp_ops(struct rte_dma_dev *dev, uint8_t ena_enq_deq)
+{
+
+ dev->fp_obj->copy = cnxk_dmadev_copy;
+ dev->fp_obj->copy_sg = cnxk_dmadev_copy_sg;
+ dev->fp_obj->submit = cnxk_dmadev_submit;
+ dev->fp_obj->completed = cnxk_dmadev_completed;
+ dev->fp_obj->completed_status = cnxk_dmadev_completed_status;
+ dev->fp_obj->burst_capacity = cnxk_damdev_burst_capacity;
+
+ if (roc_model_is_cn10k()) {
+ dev->fp_obj->copy = cn10k_dmadev_copy;
+ dev->fp_obj->copy_sg = cn10k_dmadev_copy_sg;
+ }
+
+ if (ena_enq_deq) {
+ dev->fp_obj->copy = NULL;
+ dev->fp_obj->copy_sg = NULL;
+ dev->fp_obj->submit = NULL;
+ dev->fp_obj->completed = NULL;
+ dev->fp_obj->completed_status = NULL;
+
+ dev->fp_obj->enqueue = cnxk_dma_ops_enqueue;
+ dev->fp_obj->dequeue = cnxk_dma_ops_dequeue;
+
+ if (roc_model_is_cn10k())
+ dev->fp_obj->enqueue = cn10k_dma_ops_enqueue;
+ }
+}
+
static const struct rte_dma_dev_ops cnxk_dmadev_ops = {
.dev_close = cnxk_dmadev_close,
.dev_configure = cnxk_dmadev_configure,
@@ -571,19 +613,7 @@ cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_de
dmadev->fp_obj->dev_private = dpivf;
dmadev->dev_ops = &cnxk_dmadev_ops;
- dmadev->fp_obj->copy = cnxk_dmadev_copy;
- dmadev->fp_obj->copy_sg = cnxk_dmadev_copy_sg;
- dmadev->fp_obj->submit = cnxk_dmadev_submit;
- dmadev->fp_obj->completed = cnxk_dmadev_completed;
- dmadev->fp_obj->completed_status = cnxk_dmadev_completed_status;
- dmadev->fp_obj->burst_capacity = cnxk_damdev_burst_capacity;
-
- if (roc_model_is_cn10k()) {
- dpivf->is_cn10k = true;
- dmadev->fp_obj->copy = cn10k_dmadev_copy;
- dmadev->fp_obj->copy_sg = cn10k_dmadev_copy_sg;
- }
-
+ dpivf->is_cn10k = roc_model_is_cn10k();
dpivf->mcs_lock = NULL;
rdpi = &dpivf->rdpi;
diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h
index 9a232a5464..18039e43fb 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.h
+++ b/drivers/dma/cnxk/cnxk_dmadev.h
@@ -93,6 +93,7 @@ struct cnxk_dpi_cdesc_data_s {
uint16_t head;
uint16_t tail;
uint8_t *compl_ptr;
+ struct rte_dma_op **ops;
};
struct cnxk_dpi_conf {
@@ -131,5 +132,11 @@ int cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iov
int cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge *src,
const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst,
uint64_t flags);
+uint16_t cnxk_dma_ops_enqueue(void *dev_private, uint16_t vchan, struct rte_dma_op **ops,
+ uint16_t nb_ops);
+uint16_t cn10k_dma_ops_enqueue(void *dev_private, uint16_t vchan, struct rte_dma_op **ops,
+ uint16_t nb_ops);
+uint16_t cnxk_dma_ops_dequeue(void *dev_private, uint16_t vchan, struct rte_dma_op **ops,
+ uint16_t nb_ops);
#endif
diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c
index dea73c5b41..1e998af3d1 100644
--- a/drivers/dma/cnxk/cnxk_dmadev_fp.c
+++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c
@@ -670,3 +670,143 @@ cnxk_dma_adapter_dequeue(uintptr_t get_work1)
return (uintptr_t)op;
}
+
+uint16_t
+cnxk_dma_ops_enqueue(void *dev_private, uint16_t vchan, struct rte_dma_op **ops, uint16_t nb_ops)
+{
+ struct cnxk_dpi_vf_s *dpivf = dev_private;
+ struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
+ const struct rte_dma_sge *fptr, *lptr;
+ uint16_t src, dst, nwords = 0;
+ struct rte_dma_op *op;
+ uint16_t space, i;
+ uint8_t *comp_ptr;
+ uint64_t hdr[4];
+ int rc;
+
+ space = ((dpi_conf->c_desc.max_cnt + dpi_conf->c_desc.head - dpi_conf->c_desc.tail) &
+ dpi_conf->c_desc.max_cnt);
+ space = RTE_MIN(space, nb_ops);
+
+ for (i = 0; i < space; i++) {
+ op = ops[i];
+ comp_ptr =
+ &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET];
+ dpi_conf->c_desc.ops[dpi_conf->c_desc.tail] = op;
+ CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);
+
+ hdr[1] = dpi_conf->cmd.u | ((op->flags & RTE_DMA_OP_FLAG_AUTO_FREE) << 37);
+ hdr[2] = (uint64_t)comp_ptr;
+
+ src = op->nb_src;
+ dst = op->nb_dst;
+ /*
+ * For inbound case, src pointers are last pointers.
+ * For all other cases, src pointers are first pointers.
+ */
+ if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) {
+ fptr = &op->src_dst_seg[src];
+ lptr = &op->src_dst_seg[0];
+ RTE_SWAP(src, dst);
+ } else {
+ fptr = &op->src_dst_seg[0];
+ lptr = &op->src_dst_seg[src];
+ }
+ hdr[0] = ((uint64_t)dst << 54) | (uint64_t)src << 48;
+
+ rc = __dpi_queue_write_sg(dpivf, hdr, fptr, lptr, src, dst);
+ if (rc) {
+ CNXK_DPI_STRM_DEC(dpi_conf->c_desc, tail);
+ goto done;
+ }
+ nwords += CNXK_DPI_CMD_LEN(src, dst);
+ }
+
+done:
+ if (nwords) {
+ rte_wmb();
+ plt_write64(nwords, dpivf->rdpi.rbase + DPI_VDMA_DBELL);
+ dpi_conf->stats.submitted += i;
+ }
+
+ return i;
+}
+
+uint16_t
+cn10k_dma_ops_enqueue(void *dev_private, uint16_t vchan, struct rte_dma_op **ops, uint16_t nb_ops)
+{
+ struct cnxk_dpi_vf_s *dpivf = dev_private;
+ struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
+ uint16_t space, i, nwords = 0;
+ struct rte_dma_op *op;
+ uint16_t src, dst;
+ uint8_t *comp_ptr;
+ uint64_t hdr[4];
+ int rc;
+
+ space = (dpi_conf->c_desc.max_cnt + dpi_conf->c_desc.head - dpi_conf->c_desc.tail) &
+ dpi_conf->c_desc.max_cnt;
+ space = RTE_MIN(space, nb_ops);
+
+ for (i = 0; i < space; i++) {
+ op = ops[i];
+ src = op->nb_src;
+ dst = op->nb_dst;
+ comp_ptr =
+ &dpi_conf->c_desc.compl_ptr[dpi_conf->c_desc.tail * CNXK_DPI_COMPL_OFFSET];
+ dpi_conf->c_desc.ops[dpi_conf->c_desc.tail] = op;
+ CNXK_DPI_STRM_INC(dpi_conf->c_desc, tail);
+
+ hdr[0] = dpi_conf->cmd.u | (dst << 6) | src;
+ hdr[1] = (uint64_t)comp_ptr;
+ hdr[2] = (1UL << 47) | ((op->flags & RTE_DMA_OP_FLAG_AUTO_FREE) << 43);
+
+ rc = __dpi_queue_write_sg(dpivf, hdr, &op->src_dst_seg[0], &op->src_dst_seg[src],
+ src, dst);
+ if (rc) {
+ CNXK_DPI_STRM_DEC(dpi_conf->c_desc, tail);
+ goto done;
+ }
+ nwords += CNXK_DPI_CMD_LEN(src, dst);
+ }
+
+done:
+ if (nwords) {
+ rte_wmb();
+ plt_write64(nwords, dpivf->rdpi.rbase + DPI_VDMA_DBELL);
+ dpi_conf->stats.submitted += i;
+ }
+
+ return i;
+}
+
+uint16_t
+cnxk_dma_ops_dequeue(void *dev_private, uint16_t vchan, struct rte_dma_op **ops, uint16_t nb_ops)
+{
+ struct cnxk_dpi_vf_s *dpivf = dev_private;
+ struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
+ struct cnxk_dpi_cdesc_data_s *c_desc = &dpi_conf->c_desc;
+ struct rte_dma_op *op;
+ uint16_t space, cnt;
+ uint8_t status;
+
+ space = (c_desc->tail - c_desc->head) & c_desc->max_cnt;
+ space = RTE_MIN(nb_ops, space);
+ for (cnt = 0; cnt < space; cnt++) {
+ status = c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET];
+ op = c_desc->ops[c_desc->head];
+ op->status = status;
+ ops[cnt] = op;
+ if (status) {
+ if (status == CNXK_DPI_REQ_CDATA)
+ break;
+ dpi_conf->stats.errors++;
+ }
+ c_desc->compl_ptr[c_desc->head * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
+ CNXK_DPI_STRM_INC(*c_desc, head);
+ }
+
+ dpi_conf->stats.completed += cnt;
+
+ return cnt;
+}
--
2.43.0
^ permalink raw reply [flat|nested] 5+ messages in thread
* [25.11 PATCH 3/3] eventdev: refactor DMA adapter ops
2025-04-16 10:09 [25.11 PATCH 0/3] Introduce DMA enqueue/dequeue operations pbhagavatula
2025-04-16 10:09 ` [25.11 PATCH 1/3] dmadev: add enqueue dequeue operations pbhagavatula
2025-04-16 10:09 ` [25.11 PATCH 2/3] dma/cnxk: implement enqueue dequeue ops pbhagavatula
@ 2025-04-16 10:09 ` pbhagavatula
2 siblings, 0 replies; 5+ messages in thread
From: pbhagavatula @ 2025-04-16 10:09 UTC (permalink / raw)
To: jerinj, Amit Prakash Shukla, Vamsi Attunuru
Cc: g.singh, sachin.saxena, hemant.agrawal, fengchengwen,
bruce.richardson, kevin.laatz, conor.walsh, gmuthukrishn,
vvelumuri, anatoly.burakov, dev, Pavan Nikhilesh
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Migrate all invocations of rte_event_dma_adapter_op
API to rte_dma_op.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Amit Prakash Shukla <amitprakashs@marvell.com>
---
app/test-eventdev/test_perf_common.c | 6 +-
app/test-eventdev/test_perf_common.h | 4 +-
app/test/test_event_dma_adapter.c | 6 +-
.../prog_guide/eventdev/event_dma_adapter.rst | 6 +-
drivers/dma/cnxk/cnxk_dmadev.c | 2 +-
drivers/dma/cnxk/cnxk_dmadev_fp.c | 12 ++--
lib/eventdev/rte_event_dma_adapter.c | 18 +++---
lib/eventdev/rte_event_dma_adapter.h | 57 -------------------
8 files changed, 27 insertions(+), 84 deletions(-)
diff --git a/app/test-eventdev/test_perf_common.c b/app/test-eventdev/test_perf_common.c
index 627f07caa1..4e0109db52 100644
--- a/app/test-eventdev/test_perf_common.c
+++ b/app/test-eventdev/test_perf_common.c
@@ -562,11 +562,11 @@ crypto_adapter_enq_op_fwd(struct prod_data *p)
static inline void
dma_adapter_enq_op_fwd(struct prod_data *p)
{
- struct rte_event_dma_adapter_op *ops[BURST_SIZE] = {NULL};
+ struct rte_dma_op *ops[BURST_SIZE] = {NULL};
struct test_perf *t = p->t;
const uint32_t nb_flows = t->nb_flows;
const uint64_t nb_pkts = t->nb_pkts;
- struct rte_event_dma_adapter_op op;
+ struct rte_dma_op op;
struct rte_event evts[BURST_SIZE];
const uint8_t dev_id = p->dev_id;
struct evt_options *opt = t->opt;
@@ -2114,7 +2114,7 @@ perf_mempool_setup(struct evt_test *test, struct evt_options *opt)
} else if (opt->prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) {
t->pool = rte_mempool_create(test->name, /* mempool name */
opt->pool_sz, /* number of elements*/
- sizeof(struct rte_event_dma_adapter_op) +
+ sizeof(struct rte_dma_op) +
(sizeof(struct rte_dma_sge) * 2),
cache_sz, /* cache size*/
0, NULL, NULL, NULL, /* obj constructor */
diff --git a/app/test-eventdev/test_perf_common.h b/app/test-eventdev/test_perf_common.h
index d7333ad390..63078b0ee2 100644
--- a/app/test-eventdev/test_perf_common.h
+++ b/app/test-eventdev/test_perf_common.h
@@ -139,7 +139,7 @@ perf_mark_fwd_latency(enum evt_prod_type prod_type, struct rte_event *const ev)
}
pe->timestamp = rte_get_timer_cycles();
} else if (prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) {
- struct rte_event_dma_adapter_op *op = ev->event_ptr;
+ struct rte_dma_op *op = ev->event_ptr;
op->user_meta = rte_get_timer_cycles();
} else {
@@ -297,7 +297,7 @@ perf_process_last_stage_latency(struct rte_mempool *const pool, enum evt_prod_ty
tstamp = pe->timestamp;
rte_crypto_op_free(op);
} else if (prod_type == EVT_PROD_TYPE_EVENT_DMA_ADPTR) {
- struct rte_event_dma_adapter_op *op = ev->event_ptr;
+ struct rte_dma_op *op = ev->event_ptr;
to_free_in_bulk = op;
tstamp = op->user_meta;
diff --git a/app/test/test_event_dma_adapter.c b/app/test/test_event_dma_adapter.c
index 9988d4fc7b..7f72a4e81d 100644
--- a/app/test/test_event_dma_adapter.c
+++ b/app/test/test_event_dma_adapter.c
@@ -234,7 +234,7 @@ test_op_forward_mode(void)
{
struct rte_mbuf *src_mbuf[TEST_MAX_OP];
struct rte_mbuf *dst_mbuf[TEST_MAX_OP];
- struct rte_event_dma_adapter_op *op;
+ struct rte_dma_op *op;
struct rte_event ev[TEST_MAX_OP];
int ret, i;
@@ -266,7 +266,7 @@ test_op_forward_mode(void)
op->vchan = TEST_DMA_VCHAN_ID;
op->event_meta = dma_response_info.event;
- /* Fill in event info and update event_ptr with rte_event_dma_adapter_op */
+ /* Fill in event info and update event_ptr with rte_dma_op */
memset(&ev[i], 0, sizeof(struct rte_event));
ev[i].event = 0;
ev[i].op = RTE_EVENT_OP_NEW;
@@ -396,7 +396,7 @@ configure_dmadev(void)
rte_socket_id());
RTE_TEST_ASSERT_NOT_NULL(params.dst_mbuf_pool, "Can't create DMA_DST_MBUFPOOL\n");
- elt_size = sizeof(struct rte_event_dma_adapter_op) + (sizeof(struct rte_dma_sge) * 2);
+ elt_size = sizeof(struct rte_dma_op) + (sizeof(struct rte_dma_sge) * 2);
params.op_mpool = rte_mempool_create("EVENT_DMA_OP_POOL", DMA_OP_POOL_SIZE, elt_size, 0,
0, NULL, NULL, NULL, NULL, rte_socket_id(), 0);
RTE_TEST_ASSERT_NOT_NULL(params.op_mpool, "Can't create DMA_OP_POOL\n");
diff --git a/doc/guides/prog_guide/eventdev/event_dma_adapter.rst b/doc/guides/prog_guide/eventdev/event_dma_adapter.rst
index e040d89e8b..e8437a3297 100644
--- a/doc/guides/prog_guide/eventdev/event_dma_adapter.rst
+++ b/doc/guides/prog_guide/eventdev/event_dma_adapter.rst
@@ -144,7 +144,7 @@ on which it enqueues events towards the DMA adapter using ``rte_event_enqueue_bu
uint32_t cap;
int ret;
- /* Fill in event info and update event_ptr with rte_event_dma_adapter_op */
+ /* Fill in event info and update event_ptr with rte_dma_op */
memset(&ev, 0, sizeof(ev));
.
.
@@ -244,11 +244,11 @@ Set event response information
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In the ``RTE_EVENT_DMA_ADAPTER_OP_FORWARD`` / ``RTE_EVENT_DMA_ADAPTER_OP_NEW`` mode,
-the application specifies the dmadev ID and vchan ID in ``struct rte_event_dma_adapter_op``
+the application specifies the dmadev ID and vchan ID in ``struct rte_dma_op``
and the event information (response information)
needed to enqueue an event after the DMA operation has completed.
The response information is specified in ``struct rte_event``
-and appended to the ``struct rte_event_dma_adapter_op``.
+and appended to the ``struct rte_dma_op``.
Start the adapter instance
diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c
index 3e45ef7d02..666514bac0 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.c
+++ b/drivers/dma/cnxk/cnxk_dmadev.c
@@ -618,7 +618,7 @@ cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_de
rdpi = &dpivf->rdpi;
rdpi->pci_dev = pci_dev;
- rc = roc_dpi_dev_init(rdpi, offsetof(struct rte_event_dma_adapter_op, impl_opaque));
+ rc = roc_dpi_dev_init(rdpi, offsetof(struct rte_dma_op, impl_opaque));
if (rc < 0)
goto err_out_free;
diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c
index 1e998af3d1..4655924555 100644
--- a/drivers/dma/cnxk/cnxk_dmadev_fp.c
+++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c
@@ -451,7 +451,7 @@ uint16_t
cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
{
const struct rte_dma_sge *src, *dst;
- struct rte_event_dma_adapter_op *op;
+ struct rte_dma_op *op;
struct cnxk_dpi_conf *dpi_conf;
struct cnxk_dpi_vf_s *dpivf;
struct cn10k_sso_hws *work;
@@ -511,7 +511,7 @@ uint16_t
cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
{
const struct rte_dma_sge *fptr, *lptr;
- struct rte_event_dma_adapter_op *op;
+ struct rte_dma_op *op;
struct cn9k_sso_hws_dual *work;
struct cnxk_dpi_conf *dpi_conf;
struct cnxk_dpi_vf_s *dpivf;
@@ -527,7 +527,7 @@ cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event
for (count = 0; count < nb_events; count++) {
op = ev[count].event_ptr;
rsp_info = (struct rte_event *)((uint8_t *)op +
- sizeof(struct rte_event_dma_adapter_op));
+ sizeof(struct rte_dma_op));
dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
dpi_conf = &dpivf->conf[op->vchan];
@@ -582,7 +582,7 @@ uint16_t
cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
{
const struct rte_dma_sge *fptr, *lptr;
- struct rte_event_dma_adapter_op *op;
+ struct rte_dma_op *op;
struct cnxk_dpi_conf *dpi_conf;
struct cnxk_dpi_vf_s *dpivf;
struct cn9k_sso_hws *work;
@@ -649,11 +649,11 @@ RTE_EXPORT_INTERNAL_SYMBOL(cnxk_dma_adapter_dequeue)
uintptr_t
cnxk_dma_adapter_dequeue(uintptr_t get_work1)
{
- struct rte_event_dma_adapter_op *op;
+ struct rte_dma_op *op;
struct cnxk_dpi_conf *dpi_conf;
struct cnxk_dpi_vf_s *dpivf;
- op = (struct rte_event_dma_adapter_op *)get_work1;
+ op = (struct rte_dma_op *)get_work1;
dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
dpi_conf = &dpivf->conf[op->vchan];
diff --git a/lib/eventdev/rte_event_dma_adapter.c b/lib/eventdev/rte_event_dma_adapter.c
index 9ac6a4a205..484cd50011 100644
--- a/lib/eventdev/rte_event_dma_adapter.c
+++ b/lib/eventdev/rte_event_dma_adapter.c
@@ -40,8 +40,8 @@ struct __rte_cache_aligned dma_ops_circular_buffer {
/* Size of circular buffer */
uint16_t size;
- /* Pointer to hold rte_event_dma_adapter_op for processing */
- struct rte_event_dma_adapter_op **op_buffer;
+ /* Pointer to hold rte_dma_op for processing */
+ struct rte_dma_op **op_buffer;
};
/* Vchan information */
@@ -202,7 +202,7 @@ edma_circular_buffer_space_for_batch(struct dma_ops_circular_buffer *bufp)
static inline int
edma_circular_buffer_init(const char *name, struct dma_ops_circular_buffer *buf, uint16_t sz)
{
- buf->op_buffer = rte_zmalloc(name, sizeof(struct rte_event_dma_adapter_op *) * sz, 0);
+ buf->op_buffer = rte_zmalloc(name, sizeof(struct rte_dma_op *) * sz, 0);
if (buf->op_buffer == NULL)
return -ENOMEM;
@@ -218,7 +218,7 @@ edma_circular_buffer_free(struct dma_ops_circular_buffer *buf)
}
static inline int
-edma_circular_buffer_add(struct dma_ops_circular_buffer *bufp, struct rte_event_dma_adapter_op *op)
+edma_circular_buffer_add(struct dma_ops_circular_buffer *bufp, struct rte_dma_op *op)
{
uint16_t *tail = &bufp->tail;
@@ -236,7 +236,7 @@ edma_circular_buffer_flush_to_dma_dev(struct event_dma_adapter *adapter,
struct dma_ops_circular_buffer *bufp, uint8_t dma_dev_id,
uint16_t vchan, uint16_t *nb_ops_flushed)
{
- struct rte_event_dma_adapter_op *op;
+ struct rte_dma_op *op;
uint16_t *head = &bufp->head;
uint16_t *tail = &bufp->tail;
struct dma_vchan_info *tq;
@@ -503,7 +503,7 @@ edma_enq_to_dma_dev(struct event_dma_adapter *adapter, struct rte_event *ev, uns
{
struct rte_event_dma_adapter_stats *stats = &adapter->dma_stats;
struct dma_vchan_info *vchan_qinfo = NULL;
- struct rte_event_dma_adapter_op *dma_op;
+ struct rte_dma_op *dma_op;
uint16_t vchan, nb_enqueued = 0;
int16_t dma_dev_id;
unsigned int i, n;
@@ -646,7 +646,7 @@ edma_adapter_enq_run(struct event_dma_adapter *adapter, unsigned int max_enq)
#define DMA_ADAPTER_MAX_EV_ENQ_RETRIES 100
static inline uint16_t
-edma_ops_enqueue_burst(struct event_dma_adapter *adapter, struct rte_event_dma_adapter_op **ops,
+edma_ops_enqueue_burst(struct event_dma_adapter *adapter, struct rte_dma_op **ops,
uint16_t num)
{
struct rte_event_dma_adapter_stats *stats = &adapter->dma_stats;
@@ -692,7 +692,7 @@ edma_circular_buffer_flush_to_evdev(struct event_dma_adapter *adapter,
struct dma_ops_circular_buffer *bufp,
uint16_t *enqueue_count)
{
- struct rte_event_dma_adapter_op **ops = bufp->op_buffer;
+ struct rte_dma_op **ops = bufp->op_buffer;
uint16_t n = 0, nb_ops_flushed;
uint16_t *head = &bufp->head;
uint16_t *tail = &bufp->tail;
@@ -741,7 +741,7 @@ edma_adapter_deq_run(struct event_dma_adapter *adapter, unsigned int max_deq)
struct rte_event_dma_adapter_stats *stats = &adapter->dma_stats;
struct dma_vchan_info *vchan_info;
struct dma_ops_circular_buffer *tq_buf;
- struct rte_event_dma_adapter_op *ops;
+ struct rte_dma_op *ops;
uint16_t n, nb_deq, nb_enqueued, i;
struct dma_device_info *dev_info;
uint16_t vchan, num_vchan;
diff --git a/lib/eventdev/rte_event_dma_adapter.h b/lib/eventdev/rte_event_dma_adapter.h
index 5c480b82ff..453754d13b 100644
--- a/lib/eventdev/rte_event_dma_adapter.h
+++ b/lib/eventdev/rte_event_dma_adapter.h
@@ -151,63 +151,6 @@
extern "C" {
#endif
-/**
- * A structure used to hold event based DMA operation entry. All the information
- * required for a DMA transfer shall be populated in "struct rte_event_dma_adapter_op"
- * instance.
- */
-struct rte_event_dma_adapter_op {
- uint64_t flags;
- /**< Flags related to the operation.
- * @see RTE_DMA_OP_FLAG_*
- */
- struct rte_mempool *op_mp;
- /**< Mempool from which op is allocated. */
- enum rte_dma_status_code status;
- /**< Status code for this operation. */
- uint32_t rsvd;
- /**< Reserved for future use. */
- uint64_t impl_opaque[2];
- /**< Implementation-specific opaque data.
- * An dma device implementation use this field to hold
- * implementation specific values to share between dequeue and enqueue
- * operations.
- * The application should not modify this field.
- */
- uint64_t user_meta;
- /**< Memory to store user specific metadata.
- * The dma device implementation should not modify this area.
- */
- uint64_t event_meta;
- /**< Event metadata of DMA completion event.
- * Used when RTE_EVENT_DMA_ADAPTER_CAP_INTERNAL_PORT_VCHAN_EV_BIND is not
- * supported in OP_NEW mode.
- * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_NEW
- * @see RTE_EVENT_DMA_ADAPTER_CAP_INTERNAL_PORT_VCHAN_EV_BIND
- *
- * Used when RTE_EVENT_DMA_ADAPTER_CAP_INTERNAL_PORT_OP_FWD is not
- * supported in OP_FWD mode.
- * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD
- * @see RTE_EVENT_DMA_ADAPTER_CAP_INTERNAL_PORT_OP_FWD
- *
- * @see struct rte_event::event
- */
- int16_t dma_dev_id;
- /**< DMA device ID to be used with OP_FORWARD mode.
- * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD
- */
- uint16_t vchan;
- /**< DMA vchan ID to be used with OP_FORWARD mode
- * @see rte_event_dma_adapter_mode::RTE_EVENT_DMA_ADAPTER_OP_FORWARD
- */
- uint16_t nb_src;
- /**< Number of source segments. */
- uint16_t nb_dst;
- /**< Number of destination segments. */
- struct rte_dma_sge src_dst_seg[];
- /**< Source and destination segments. */
-};
-
/**
* DMA event adapter mode
*/
--
2.43.0
^ permalink raw reply [flat|nested] 5+ messages in thread