DPDK patches and discussions
 help / color / mirror / Atom feed
From: <pbhagavatula@marvell.com>
To: <jerinj@marvell.com>, Cheng Jiang <honest.jiang@foxmail.com>,
	"Chengwen Feng" <fengchengwen@huawei.com>
Cc: <vattunuru@marvell.com>, <g.singh@nxp.com>,
	<sachin.saxena@nxp.com>, <hemant.agrawal@nxp.com>,
	<bruce.richardson@intel.com>, <kevin.laatz@intel.com>,
	<conor.walsh@intel.com>, <gmuthukrishn@marvell.com>,
	<vvelumuri@marvell.com>, <anatoly.burakov@intel.com>,
	<dev@dpdk.org>, "Pavan Nikhilesh" <pbhagavatula@marvell.com>
Subject: [25.11 PATCH v2 3/5] app/dma-perf: add option to measure enq deq ops
Date: Tue, 20 May 2025 00:26:02 +0530	[thread overview]
Message-ID: <20250519185604.5584-4-pbhagavatula@marvell.com> (raw)
In-Reply-To: <20250519185604.5584-1-pbhagavatula@marvell.com>

From: Pavan Nikhilesh <pbhagavatula@marvell.com>

This patch adds a option to measure performanc of
enq/deq operations in the benchmark app.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 app/test-dma-perf/benchmark.c | 137 +++++++++++++++++++++++++++++++---
 app/test-dma-perf/config.ini  |   3 +
 app/test-dma-perf/main.c      |  13 +++-
 app/test-dma-perf/main.h      |   1 +
 doc/guides/tools/dmaperf.rst  |   5 ++
 5 files changed, 148 insertions(+), 11 deletions(-)

diff --git a/app/test-dma-perf/benchmark.c b/app/test-dma-perf/benchmark.c
index 6d617ea200..4425fc97cf 100644
--- a/app/test-dma-perf/benchmark.c
+++ b/app/test-dma-perf/benchmark.c
@@ -54,6 +54,7 @@ struct lcore_params {
 	struct rte_mbuf **srcs;
 	struct rte_mbuf **dsts;
 	struct sge_info sge;
+	struct rte_dma_op **dma_ops;
 	volatile struct worker_info worker_info;
 };
 
@@ -198,6 +199,16 @@ configure_dmadev_queue(uint32_t dev_id, struct test_configure *cfg, uint8_t sges
 	if (vchan_data_populate(dev_id, &qconf, cfg, dev_num) != 0)
 		rte_exit(EXIT_FAILURE, "Error with vchan data populate.\n");
 
+	if (rte_dma_info_get(dev_id, &info) != 0)
+		rte_exit(EXIT_FAILURE, "Error with getting device info.\n");
+
+	if (cfg->use_ops && !(info.dev_capa & RTE_DMA_CAPA_OPS_ENQ_DEQ))
+		rte_exit(EXIT_FAILURE, "Error with device %s not support enq_deq ops.\n",
+			 info.dev_name);
+
+	if (cfg->use_ops)
+		dev_config.flags = RTE_DMA_CFG_FLAG_ENQ_DEQ;
+
 	if (rte_dma_configure(dev_id, &dev_config) != 0)
 		rte_exit(EXIT_FAILURE, "Error with dma configure.\n");
 
@@ -395,6 +406,61 @@ do_dma_sg_mem_copy(void *p)
 	return 0;
 }
 
+static inline int
+do_dma_enq_deq_mem_copy(void *p)
+{
+#define DEQ_SZ 64
+	struct lcore_params *para = (struct lcore_params *)p;
+	volatile struct worker_info *worker_info = &(para->worker_info);
+	struct rte_dma_op **dma_ops = para->dma_ops;
+	uint16_t kick_batch = para->kick_batch, sz;
+	uint16_t enq, deq, poll_cnt;
+	uint64_t tenq, tdeq;
+	const uint16_t dev_id = para->dev_id;
+	uint32_t nr_buf = para->nr_buf;
+	struct rte_dma_op *op[DEQ_SZ];
+	uint32_t i;
+
+	worker_info->stop_flag = false;
+	worker_info->ready_flag = true;
+
+	while (!worker_info->start_flag)
+		;
+
+	if (kick_batch > nr_buf)
+		kick_batch = nr_buf;
+
+	tenq = 0;
+	tdeq = 0;
+	while (1) {
+		for (i = 0; i < nr_buf; i += kick_batch) {
+			sz = RTE_MIN(nr_buf - i, kick_batch);
+			enq = rte_dma_enqueue_ops(dev_id, 0, &dma_ops[i], sz);
+			while (enq < sz) {
+				do {
+					deq = rte_dma_dequeue_ops(dev_id, 0, op, DEQ_SZ);
+					tdeq += deq;
+				} while (deq);
+				enq += rte_dma_enqueue_ops(dev_id, 0, &dma_ops[i + enq], sz - enq);
+				if (worker_info->stop_flag)
+					break;
+			}
+			tenq += enq;
+
+			worker_info->total_cpl += enq;
+		}
+
+		if (worker_info->stop_flag)
+			break;
+	}
+
+	poll_cnt = 0;
+	while ((tenq != tdeq) && (poll_cnt++ < POLL_MAX))
+		tdeq += rte_dma_dequeue_ops(dev_id, 0, op, DEQ_SZ);
+
+	return 0;
+}
+
 static inline int
 do_cpu_mem_copy(void *p)
 {
@@ -436,16 +502,17 @@ dummy_free_ext_buf(void *addr, void *opaque)
 }
 
 static int
-setup_memory_env(struct test_configure *cfg,
-			 struct rte_mbuf ***srcs, struct rte_mbuf ***dsts,
-			 struct rte_dma_sge **src_sges, struct rte_dma_sge **dst_sges)
+setup_memory_env(struct test_configure *cfg, struct rte_mbuf ***srcs, struct rte_mbuf ***dsts,
+		 struct rte_dma_sge **src_sges, struct rte_dma_sge **dst_sges,
+		 struct rte_dma_op ***dma_ops)
 {
 	unsigned int cur_buf_size = cfg->buf_size.cur;
 	unsigned int buf_size = cur_buf_size + RTE_PKTMBUF_HEADROOM;
-	unsigned int nr_sockets;
+	bool is_src_numa_incorrect, is_dst_numa_incorrect;
 	uint32_t nr_buf = cfg->nr_buf;
+	unsigned int nr_sockets;
+	uintptr_t ops;
 	uint32_t i;
-	bool is_src_numa_incorrect, is_dst_numa_incorrect;
 
 	nr_sockets = rte_socket_count();
 	is_src_numa_incorrect = (cfg->src_numa_node >= nr_sockets);
@@ -540,6 +607,34 @@ setup_memory_env(struct test_configure *cfg,
 			if (!((i+1) % nb_dst_sges))
 				(*dst_sges)[i].length += (cur_buf_size % nb_dst_sges);
 		}
+
+		if (cfg->use_ops) {
+
+			nr_buf /= RTE_MAX(nb_src_sges, nb_dst_sges);
+			*dma_ops = rte_zmalloc(NULL, nr_buf * (sizeof(struct rte_dma_op *)),
+					       RTE_CACHE_LINE_SIZE);
+			if (*dma_ops == NULL) {
+				printf("Error: dma_ops container malloc failed.\n");
+				return -1;
+			}
+
+			ops = (uintptr_t)rte_zmalloc(
+				NULL,
+				nr_buf * (sizeof(struct rte_dma_op) + ((nb_src_sges + nb_dst_sges) *
+								       sizeof(struct rte_dma_sge))),
+				RTE_CACHE_LINE_SIZE);
+			if (ops == 0) {
+				printf("Error: dma_ops malloc failed.\n");
+				return -1;
+			}
+
+			for (i = 0; i < nr_buf; i++)
+				(*dma_ops)[i] =
+					(struct rte_dma_op *)(ops +
+							      (i * (sizeof(struct rte_dma_op) +
+								    ((nb_src_sges + nb_dst_sges) *
+								     sizeof(struct rte_dma_sge)))));
+		}
 	}
 
 	return 0;
@@ -582,8 +677,12 @@ get_work_function(struct test_configure *cfg)
 	if (cfg->is_dma) {
 		if (!cfg->is_sg)
 			fn = do_dma_plain_mem_copy;
-		else
-			fn = do_dma_sg_mem_copy;
+		else {
+			if (cfg->use_ops)
+				fn = do_dma_enq_deq_mem_copy;
+			else
+				fn = do_dma_sg_mem_copy;
+		}
 	} else {
 		fn = do_cpu_mem_copy;
 	}
@@ -680,6 +779,7 @@ mem_copy_benchmark(struct test_configure *cfg)
 	struct rte_dma_sge *src_sges = NULL, *dst_sges = NULL;
 	struct vchan_dev_config *vchan_dev = NULL;
 	struct lcore_dma_map_t *lcore_dma_map = NULL;
+	struct rte_dma_op **dma_ops = NULL;
 	unsigned int buf_size = cfg->buf_size.cur;
 	uint16_t kick_batch = cfg->kick_batch.cur;
 	uint16_t nb_workers = cfg->num_worker;
@@ -690,13 +790,13 @@ mem_copy_benchmark(struct test_configure *cfg)
 	float mops, mops_total;
 	float bandwidth, bandwidth_total;
 	uint32_t nr_sgsrc = 0, nr_sgdst = 0;
-	uint32_t nr_buf;
+	uint32_t nr_buf, nr_ops;
 	int ret = 0;
 
 	nr_buf = align_buffer_count(cfg, &nr_sgsrc, &nr_sgdst);
 	cfg->nr_buf = nr_buf;
 
-	if (setup_memory_env(cfg, &srcs, &dsts, &src_sges, &dst_sges) < 0)
+	if (setup_memory_env(cfg, &srcs, &dsts, &src_sges, &dst_sges, &dma_ops) < 0)
 		goto out;
 
 	if (cfg->is_dma)
@@ -751,6 +851,25 @@ mem_copy_benchmark(struct test_configure *cfg)
 				goto out;
 		}
 
+		if (cfg->is_sg && cfg->use_ops) {
+			nr_ops = nr_buf / RTE_MAX(cfg->nb_src_sges, cfg->nb_dst_sges);
+			lcores[i]->nr_buf = nr_ops / nb_workers;
+			lcores[i]->dma_ops = dma_ops + (nr_ops / nb_workers * i);
+			for (j = 0; j < (nr_ops / nb_workers); j++) {
+				for (k = 0; k < cfg->nb_src_sges; k++)
+					lcores[i]->dma_ops[j]->src_dst_seg[k] =
+						lcores[i]->sge.srcs[(j * cfg->nb_src_sges) + k];
+
+				for (k = 0; k < cfg->nb_dst_sges; k++)
+					lcores[i]->dma_ops[j]->src_dst_seg[k + cfg->nb_src_sges] =
+						lcores[i]->sge.dsts[(j * cfg->nb_dst_sges) + k];
+
+				lcores[i]->dma_ops[j]->nb_src = cfg->nb_src_sges;
+				lcores[i]->dma_ops[j]->nb_dst = cfg->nb_dst_sges;
+				lcores[i]->dma_ops[j]->vchan = 0;
+			}
+		}
+
 		rte_eal_remote_launch(get_work_function(cfg), (void *)(lcores[i]), lcore_id);
 	}
 
diff --git a/app/test-dma-perf/config.ini b/app/test-dma-perf/config.ini
index 61e49dbae5..fa59f6b140 100644
--- a/app/test-dma-perf/config.ini
+++ b/app/test-dma-perf/config.ini
@@ -52,6 +52,8 @@
 ;
 ; For DMA scatter-gather memory copy, the parameters need to be configured
 ; and they are valid only when type is DMA_MEM_COPY.
+;
+; To use Enqueue Dequeue operations, set ``use_enq_deq_ops=1`` in the configuration.
 
 ; To specify a configuration file, use the "--config" flag followed by the path to the file.
 
@@ -88,6 +90,7 @@ test_seconds=2
 lcore_dma0=lcore=10,dev=0000:00:04.1,dir=mem2mem
 lcore_dma1=lcore=11,dev=0000:00:04.2,dir=mem2mem
 eal_args=--in-memory --file-prefix=test
+use_enq_deq_ops=0
 
 [case3]
 skip=1
diff --git a/app/test-dma-perf/main.c b/app/test-dma-perf/main.c
index 0586b3e1d0..cb4aee878f 100644
--- a/app/test-dma-perf/main.c
+++ b/app/test-dma-perf/main.c
@@ -297,8 +297,8 @@ load_configs(const char *path)
 	char section_name[CFG_NAME_LEN];
 	const char *case_type;
 	const char *lcore_dma;
-	const char *mem_size_str, *buf_size_str, *ring_size_str, *kick_batch_str,
-		*src_sges_str, *dst_sges_str;
+	const char *mem_size_str, *buf_size_str, *ring_size_str, *kick_batch_str, *src_sges_str,
+		*dst_sges_str, *use_dma_ops;
 	const char *skip;
 	struct rte_kvargs *kvlist;
 	int args_nr, nb_vp;
@@ -349,6 +349,15 @@ load_configs(const char *path)
 			continue;
 		}
 
+		if (is_dma) {
+			use_dma_ops =
+				rte_cfgfile_get_entry(cfgfile, section_name, "use_enq_deq_ops");
+			if (use_dma_ops != NULL && (atoi(use_dma_ops) == 1))
+				test_case->use_ops = true;
+			else
+				test_case->use_ops = false;
+		}
+
 		test_case->is_dma = is_dma;
 		test_case->src_numa_node = (int)atoi(rte_cfgfile_get_entry(cfgfile,
 								section_name, "src_numa_node"));
diff --git a/app/test-dma-perf/main.h b/app/test-dma-perf/main.h
index 59eb648b3d..d6cc613250 100644
--- a/app/test-dma-perf/main.h
+++ b/app/test-dma-perf/main.h
@@ -58,6 +58,7 @@ struct test_configure {
 	uint16_t opcode;
 	bool is_dma;
 	bool is_sg;
+	bool use_ops;
 	struct lcore_dma_config dma_config[MAX_WORKER_NB];
 	struct test_configure_entry mem_size;
 	struct test_configure_entry buf_size;
diff --git a/doc/guides/tools/dmaperf.rst b/doc/guides/tools/dmaperf.rst
index b7ff41065f..7abbbf9260 100644
--- a/doc/guides/tools/dmaperf.rst
+++ b/doc/guides/tools/dmaperf.rst
@@ -69,6 +69,7 @@ along with the application to demonstrate all the parameters.
    lcore_dma1=lcore=11,dev=0000:00:04.2,dir=dev2mem,raddr=0x200000000,coreid=1,pfid=2,vfid=3
    lcore_dma2=lcore=12,dev=0000:00:04.3,dir=mem2dev,raddr=0x200000000,coreid=1,pfid=2,vfid=3
    eal_args=--in-memory --file-prefix=test
+   use_enq_deq_ops=0
 
 The configuration file is divided into multiple sections, each section represents a test case.
 The four mandatory variables ``mem_size``, ``buf_size``, ``dma_ring_size``, and ``kick_batch``
@@ -83,6 +84,7 @@ The variables for mem2dev and dev2mem copy are
 and can vary for each device.
 
 For scatter-gather copy test ``dma_src_sge``, ``dma_dst_sge`` must be configured.
+Enqueue and dequeue operations can be enabled by setting ``use_enq_deq_ops=1``.
 
 Each case can only have one variable change,
 and each change will generate a scenario, so each case can have multiple scenarios.
@@ -170,6 +172,9 @@ Configuration Parameters
 ``eal_args``
   Specifies the EAL arguments.
 
+  ``use_enq_deq_ops``
+  Specifies whether to use enqueue/dequeue operations.
+  ``0`` indicates to not use and ``1`` to use.
 
 Running the Application
 -----------------------
-- 
2.43.0


  parent reply	other threads:[~2025-05-19 18:56 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-16 10:09 [25.11 PATCH 0/3] Introduce DMA enqueue/dequeue operations pbhagavatula
2025-04-16 10:09 ` [25.11 PATCH 1/3] dmadev: add enqueue dequeue operations pbhagavatula
2025-04-24  7:01   ` fengchengwen
2025-05-02 10:38     ` [EXTERNAL] " Pavan Nikhilesh Bhagavatula
2025-04-16 10:09 ` [25.11 PATCH 2/3] dma/cnxk: implement enqueue dequeue ops pbhagavatula
2025-04-16 10:09 ` [25.11 PATCH 3/3] eventdev: refactor DMA adapter ops pbhagavatula
2025-05-19 18:55 ` [25.11 PATCH v2 0/5] Introduce DMA enqueue/dequeue operations pbhagavatula
2025-05-19 18:56   ` [25.11 PATCH v2 1/5] dmadev: add enqueue dequeue operations pbhagavatula
2025-05-19 18:56   ` [25.11 PATCH v2 2/5] test/dma: " pbhagavatula
2025-05-19 18:56   ` pbhagavatula [this message]
2025-05-19 18:56   ` [25.11 PATCH v2 4/5] dma/cnxk: implement enqueue dequeue ops pbhagavatula
2025-05-19 18:56   ` [25.11 PATCH v2 5/5] eventdev: refactor DMA adapter ops pbhagavatula

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250519185604.5584-4-pbhagavatula@marvell.com \
    --to=pbhagavatula@marvell.com \
    --cc=anatoly.burakov@intel.com \
    --cc=bruce.richardson@intel.com \
    --cc=conor.walsh@intel.com \
    --cc=dev@dpdk.org \
    --cc=fengchengwen@huawei.com \
    --cc=g.singh@nxp.com \
    --cc=gmuthukrishn@marvell.com \
    --cc=hemant.agrawal@nxp.com \
    --cc=honest.jiang@foxmail.com \
    --cc=jerinj@marvell.com \
    --cc=kevin.laatz@intel.com \
    --cc=sachin.saxena@nxp.com \
    --cc=vattunuru@marvell.com \
    --cc=vvelumuri@marvell.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).