DPDK patches and discussions
 help / color / mirror / Atom feed
From: Gagandeep Singh <g.singh@nxp.com>
To: dev@dpdk.org, Hemant Agrawal <hemant.agrawal@nxp.com>,
	Anatoly Burakov <anatoly.burakov@intel.com>
Cc: Jun Yang <jun.yang@nxp.com>
Subject: [v7 02/15] dma/dpaa2: refactor driver code
Date: Tue, 15 Oct 2024 12:43:50 +0530	[thread overview]
Message-ID: <20241015071403.1940833-3-g.singh@nxp.com> (raw)
In-Reply-To: <20241015071403.1940833-1-g.singh@nxp.com>

From: Jun Yang <jun.yang@nxp.com>

refactor the driver code with changes in:
- multiple HW queues
- SMA single copy and SG copy
- silent mode

Signed-off-by: Jun Yang <jun.yang@nxp.com>
---
 doc/guides/dmadevs/dpaa2.rst           |    8 +
 drivers/dma/dpaa2/dpaa2_qdma.c         | 2200 ++++++++++++------------
 drivers/dma/dpaa2/dpaa2_qdma.h         |  148 +-
 drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h |  130 +-
 drivers/dma/dpaa2/version.map          |   13 -
 5 files changed, 1150 insertions(+), 1349 deletions(-)
 delete mode 100644 drivers/dma/dpaa2/version.map

diff --git a/doc/guides/dmadevs/dpaa2.rst b/doc/guides/dmadevs/dpaa2.rst
index d2c26231e2..eeeb5d52a8 100644
--- a/doc/guides/dmadevs/dpaa2.rst
+++ b/doc/guides/dmadevs/dpaa2.rst
@@ -73,3 +73,11 @@ Platform Requirement
 
 DPAA2 drivers for DPDK can only work on NXP SoCs as listed in the
 ``Supported DPAA2 SoCs``.
+
+Device Arguments
+----------------
+* Use dev arg option ``fle_pre_populate=1`` to pre-populate all
+  DMA descriptors with pre-initialized values.
+  usage example: ``fslmc:dpdmai.1,fle_pre_populate=1``
+* Use dev arg option ``desc_debug=1`` to enable descriptor debugs.
+  usage example: ``fslmc:dpdmai.1,desc_debug=1``
diff --git a/drivers/dma/dpaa2/dpaa2_qdma.c b/drivers/dma/dpaa2/dpaa2_qdma.c
index 5d4749eae3..e364cc8f3d 100644
--- a/drivers/dma/dpaa2/dpaa2_qdma.c
+++ b/drivers/dma/dpaa2/dpaa2_qdma.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2018-2022 NXP
+ * Copyright 2018-2024 NXP
  */
 
 #include <rte_eal.h>
@@ -14,220 +14,370 @@
 #include "dpaa2_qdma.h"
 #include "dpaa2_qdma_logs.h"
 
-#define DPAA2_QDMA_PREFETCH "prefetch"
+#define DPAA2_QDMA_FLE_PRE_POPULATE "fle_pre_populate"
+#define DPAA2_QDMA_DESC_DEBUG "desc_debug"
 
-uint32_t dpaa2_coherent_no_alloc_cache;
-uint32_t dpaa2_coherent_alloc_cache;
+static uint32_t dpaa2_coherent_no_alloc_cache;
+static uint32_t dpaa2_coherent_alloc_cache;
 
-static inline int
-qdma_populate_fd_pci(phys_addr_t src, phys_addr_t dest,
-		     uint32_t len, struct qbman_fd *fd,
-		     struct dpaa2_qdma_rbp *rbp, int ser)
+static struct fsl_mc_io s_proc_mc_reg;
+
+static int
+check_devargs_handler(__rte_unused const char *key, const char *value,
+		      __rte_unused void *opaque)
 {
-	fd->simple_pci.saddr_lo = lower_32_bits((uint64_t) (src));
-	fd->simple_pci.saddr_hi = upper_32_bits((uint64_t) (src));
+	if (strcmp(value, "1"))
+		return -1;
 
-	fd->simple_pci.len_sl = len;
+	return 0;
+}
 
-	fd->simple_pci.bmt = 1;
-	fd->simple_pci.fmt = 3;
-	fd->simple_pci.sl = 1;
-	fd->simple_pci.ser = ser;
+static int
+dpaa2_qdma_get_devargs(struct rte_devargs *devargs, const char *key)
+{
+	struct rte_kvargs *kvlist;
 
-	fd->simple_pci.sportid = rbp->sportid;	/*pcie 3 */
-	fd->simple_pci.srbp = rbp->srbp;
-	if (rbp->srbp)
-		fd->simple_pci.rdttype = 0;
-	else
-		fd->simple_pci.rdttype = dpaa2_coherent_alloc_cache;
+	if (!devargs)
+		return 0;
 
-	/*dest is pcie memory */
-	fd->simple_pci.dportid = rbp->dportid;	/*pcie 3 */
-	fd->simple_pci.drbp = rbp->drbp;
-	if (rbp->drbp)
-		fd->simple_pci.wrttype = 0;
-	else
-		fd->simple_pci.wrttype = dpaa2_coherent_no_alloc_cache;
+	kvlist = rte_kvargs_parse(devargs->args, NULL);
+	if (!kvlist)
+		return 0;
 
-	fd->simple_pci.daddr_lo = lower_32_bits((uint64_t) (dest));
-	fd->simple_pci.daddr_hi = upper_32_bits((uint64_t) (dest));
+	if (!rte_kvargs_count(kvlist, key)) {
+		rte_kvargs_free(kvlist);
+		return 0;
+	}
 
-	return 0;
+	if (rte_kvargs_process(kvlist, key,
+			       check_devargs_handler, NULL) < 0) {
+		rte_kvargs_free(kvlist);
+		return 0;
+	}
+	rte_kvargs_free(kvlist);
+
+	return 1;
 }
 
 static inline int
-qdma_populate_fd_ddr(phys_addr_t src, phys_addr_t dest,
-		     uint32_t len, struct qbman_fd *fd, int ser)
+qdma_cntx_idx_ring_eq(struct qdma_cntx_idx_ring *ring,
+	const uint16_t *elem, uint16_t nb,
+	uint16_t *free_space)
 {
-	fd->simple_ddr.saddr_lo = lower_32_bits((uint64_t) (src));
-	fd->simple_ddr.saddr_hi = upper_32_bits((uint64_t) (src));
-
-	fd->simple_ddr.len = len;
-
-	fd->simple_ddr.bmt = 1;
-	fd->simple_ddr.fmt = 3;
-	fd->simple_ddr.sl = 1;
-	fd->simple_ddr.ser = ser;
-	/**
-	 * src If RBP=0 {NS,RDTTYPE[3:0]}: 0_1011
-	 * Coherent copy of cacheable memory,
-	* lookup in downstream cache, no allocate
-	 * on miss
-	 */
-	fd->simple_ddr.rns = 0;
-	fd->simple_ddr.rdttype = dpaa2_coherent_alloc_cache;
-	/**
-	 * dest If RBP=0 {NS,WRTTYPE[3:0]}: 0_0111
-	 * Coherent write of cacheable memory,
-	 * lookup in downstream cache, no allocate on miss
-	 */
-	fd->simple_ddr.wns = 0;
-	fd->simple_ddr.wrttype = dpaa2_coherent_no_alloc_cache;
+	uint16_t i;
 
-	fd->simple_ddr.daddr_lo = lower_32_bits((uint64_t) (dest));
-	fd->simple_ddr.daddr_hi = upper_32_bits((uint64_t) (dest));
+	if (unlikely(nb > ring->free_space))
+		return 0;
 
-	return 0;
+	for (i = 0; i < nb; i++) {
+		ring->cntx_idx_ring[ring->tail] = elem[i];
+		ring->tail = (ring->tail + 1) &
+			(DPAA2_QDMA_MAX_DESC - 1);
+	}
+	ring->free_space -= nb;
+	ring->nb_in_ring += nb;
+
+	if (free_space)
+		*free_space = ring->free_space;
+
+	return nb;
 }
 
-static void
-dpaa2_qdma_populate_fle(struct qbman_fle *fle,
-			uint64_t fle_iova,
-			struct dpaa2_qdma_rbp *rbp,
-			uint64_t src, uint64_t dest,
-			size_t len, uint32_t flags, uint32_t fmt)
+static inline int
+qdma_cntx_idx_ring_dq(struct qdma_cntx_idx_ring *ring,
+	uint16_t *elem, uint16_t max)
 {
-	struct qdma_sdd *sdd;
-	uint64_t sdd_iova;
+	int ret = ring->nb_in_ring > max ? max : ring->nb_in_ring;
+
+	if (!ret)
+		return 0;
+
+	if ((ring->start + ret) < DPAA2_QDMA_MAX_DESC) {
+		rte_memcpy(elem,
+			&ring->cntx_idx_ring[ring->start],
+			ret * sizeof(uint16_t));
+		ring->start += ret;
+	} else {
+		rte_memcpy(elem,
+			&ring->cntx_idx_ring[ring->start],
+			(DPAA2_QDMA_MAX_DESC - ring->start) *
+			sizeof(uint16_t));
+		rte_memcpy(&elem[DPAA2_QDMA_MAX_DESC - ring->start],
+			&ring->cntx_idx_ring[0],
+			(ret - DPAA2_QDMA_MAX_DESC + ring->start) *
+			sizeof(uint16_t));
+		ring->start = (ring->start + ret) & (DPAA2_QDMA_MAX_DESC - 1);
+	}
+	ring->free_space += ret;
+	ring->nb_in_ring -= ret;
+
+	return ret;
+}
+
+static int
+dpaa2_qdma_multi_eq(struct qdma_virt_queue *qdma_vq)
+{
+	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_vq->dpdmai_dev;
+	uint16_t txq_id = dpdmai_dev->tx_queue[qdma_vq->vq_id].fqid;
+	struct qbman_eq_desc eqdesc;
+	struct qbman_swp *swp;
+	uint32_t num_to_send = 0;
+	uint16_t num_tx = 0;
+	uint32_t enqueue_loop, loop;
+	int ret;
+	struct qbman_fd *fd = qdma_vq->fd;
+	uint16_t nb_fds = qdma_vq->fd_idx, idx, dst_idx;
+
+	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
+		ret = dpaa2_affine_qbman_swp();
+		if (ret) {
+			DPAA2_QDMA_ERR("Failed to allocate IO portal, tid: %d",
+				rte_gettid());
+			return -EIO;
+		}
+	}
+	swp = DPAA2_PER_LCORE_PORTAL;
+
+	/* Prepare enqueue descriptor */
+	qbman_eq_desc_clear(&eqdesc);
+	qbman_eq_desc_set_fq(&eqdesc, txq_id);
+	qbman_eq_desc_set_no_orp(&eqdesc, 0);
+	qbman_eq_desc_set_response(&eqdesc, 0, 0);
+
+	while (nb_fds > 0) {
+		num_to_send = (nb_fds > dpaa2_eqcr_size) ?
+			dpaa2_eqcr_size : nb_fds;
 
-	sdd = (struct qdma_sdd *)
-			((uintptr_t)(uint64_t)fle - QDMA_FLE_FLE_OFFSET +
-			QDMA_FLE_SDD_OFFSET);
-	sdd_iova = fle_iova - QDMA_FLE_FLE_OFFSET + QDMA_FLE_SDD_OFFSET;
+		/* Enqueue the packet to the QBMAN */
+		enqueue_loop = 0;
+		loop = num_to_send;
+
+		while (enqueue_loop < loop) {
+			ret = qbman_swp_enqueue_multiple(swp,
+				&eqdesc,
+				&fd[num_tx + enqueue_loop],
+				NULL,
+				loop - enqueue_loop);
+			if (likely(ret >= 0))
+				enqueue_loop += ret;
+		}
+		num_tx += num_to_send;
+		nb_fds -= loop;
+	}
+
+	qdma_vq->num_enqueues += num_tx;
+	if (unlikely(num_tx != qdma_vq->fd_idx)) {
+		dst_idx = 0;
+		for (idx = num_tx; idx < qdma_vq->fd_idx; idx++) {
+			rte_memcpy(&qdma_vq->fd[dst_idx],
+				&qdma_vq->fd[idx],
+				sizeof(struct qbman_fd));
+			dst_idx++;
+		}
+	}
+	qdma_vq->fd_idx -= num_tx;
+
+	return num_tx;
+}
+
+static void
+fle_sdd_pre_populate(struct qdma_cntx_fle_sdd *fle_sdd,
+	struct dpaa2_qdma_rbp *rbp, uint64_t src, uint64_t dest,
+	uint32_t fmt)
+{
+	struct qbman_fle *fle = fle_sdd->fle;
+	struct qdma_sdd *sdd = fle_sdd->sdd;
+	uint64_t sdd_iova = DPAA2_VADDR_TO_IOVA(sdd);
 
 	/* first frame list to source descriptor */
-	DPAA2_SET_FLE_ADDR(fle, sdd_iova);
-	DPAA2_SET_FLE_LEN(fle, (2 * (sizeof(struct qdma_sdd))));
+	DPAA2_SET_FLE_ADDR(&fle[DPAA2_QDMA_SDD_FLE], sdd_iova);
+	DPAA2_SET_FLE_LEN(&fle[DPAA2_QDMA_SDD_FLE],
+		DPAA2_QDMA_MAX_SDD * (sizeof(struct qdma_sdd)));
 
 	/* source and destination descriptor */
 	if (rbp && rbp->enable) {
 		/* source */
-		sdd->read_cmd.portid = rbp->sportid;
-		sdd->rbpcmd_simple.pfid = rbp->spfid;
-		sdd->rbpcmd_simple.vfid = rbp->svfid;
+		sdd[DPAA2_QDMA_SRC_SDD].read_cmd.portid =
+			rbp->sportid;
+		sdd[DPAA2_QDMA_SRC_SDD].rbpcmd_simple.pfid =
+			rbp->spfid;
+		sdd[DPAA2_QDMA_SRC_SDD].rbpcmd_simple.vfid =
+			rbp->svfid;
+		sdd[DPAA2_QDMA_SRC_SDD].rbpcmd_simple.vfa =
+			rbp->svfa;
 
 		if (rbp->srbp) {
-			sdd->read_cmd.rbp = rbp->srbp;
-			sdd->read_cmd.rdtype = DPAA2_RBP_MEM_RW;
+			sdd[DPAA2_QDMA_SRC_SDD].read_cmd.rbp =
+				rbp->srbp;
+			sdd[DPAA2_QDMA_SRC_SDD].read_cmd.rdtype =
+				DPAA2_RBP_MEM_RW;
 		} else {
-			sdd->read_cmd.rdtype = dpaa2_coherent_no_alloc_cache;
+			sdd[DPAA2_QDMA_SRC_SDD].read_cmd.rdtype =
+				dpaa2_coherent_no_alloc_cache;
 		}
-		sdd++;
 		/* destination */
-		sdd->write_cmd.portid = rbp->dportid;
-		sdd->rbpcmd_simple.pfid = rbp->dpfid;
-		sdd->rbpcmd_simple.vfid = rbp->dvfid;
+		sdd[DPAA2_QDMA_DST_SDD].write_cmd.portid =
+			rbp->dportid;
+		sdd[DPAA2_QDMA_DST_SDD].rbpcmd_simple.pfid =
+			rbp->dpfid;
+		sdd[DPAA2_QDMA_DST_SDD].rbpcmd_simple.vfid =
+			rbp->dvfid;
+		sdd[DPAA2_QDMA_DST_SDD].rbpcmd_simple.vfa =
+			rbp->dvfa;
 
 		if (rbp->drbp) {
-			sdd->write_cmd.rbp = rbp->drbp;
-			sdd->write_cmd.wrttype = DPAA2_RBP_MEM_RW;
+			sdd[DPAA2_QDMA_DST_SDD].write_cmd.rbp =
+				rbp->drbp;
+			sdd[DPAA2_QDMA_DST_SDD].write_cmd.wrttype =
+				DPAA2_RBP_MEM_RW;
 		} else {
-			sdd->write_cmd.wrttype = dpaa2_coherent_alloc_cache;
+			sdd[DPAA2_QDMA_DST_SDD].write_cmd.wrttype =
+				dpaa2_coherent_alloc_cache;
 		}
-
 	} else {
-		sdd->read_cmd.rdtype = dpaa2_coherent_no_alloc_cache;
-		sdd++;
-		sdd->write_cmd.wrttype = dpaa2_coherent_alloc_cache;
+		sdd[DPAA2_QDMA_SRC_SDD].read_cmd.rdtype =
+			dpaa2_coherent_no_alloc_cache;
+		sdd[DPAA2_QDMA_DST_SDD].write_cmd.wrttype =
+			dpaa2_coherent_alloc_cache;
 	}
-	fle++;
 	/* source frame list to source buffer */
-	if (flags & RTE_DPAA2_QDMA_JOB_SRC_PHY) {
-		DPAA2_SET_FLE_ADDR(fle, src);
+	DPAA2_SET_FLE_ADDR(&fle[DPAA2_QDMA_SRC_FLE], src);
 #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
-		DPAA2_SET_FLE_BMT(fle);
+	DPAA2_SET_FLE_BMT(&fle[DPAA2_QDMA_SRC_FLE]);
 #endif
-	} else {
-		DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(src));
-	}
-	fle->word4.fmt = fmt;
-	DPAA2_SET_FLE_LEN(fle, len);
+	fle[DPAA2_QDMA_SRC_FLE].word4.fmt = fmt;
 
-	fle++;
 	/* destination frame list to destination buffer */
-	if (flags & RTE_DPAA2_QDMA_JOB_DEST_PHY) {
+	DPAA2_SET_FLE_ADDR(&fle[DPAA2_QDMA_DST_FLE], dest);
 #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
-		DPAA2_SET_FLE_BMT(fle);
+	DPAA2_SET_FLE_BMT(&fle[DPAA2_QDMA_DST_FLE]);
 #endif
-		DPAA2_SET_FLE_ADDR(fle, dest);
-	} else {
-		DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(dest));
-	}
-	fle->word4.fmt = fmt;
-	DPAA2_SET_FLE_LEN(fle, len);
+	fle[DPAA2_QDMA_DST_FLE].word4.fmt = fmt;
 
 	/* Final bit: 1, for last frame list */
-	DPAA2_SET_FLE_FIN(fle);
+	DPAA2_SET_FLE_FIN(&fle[DPAA2_QDMA_DST_FLE]);
 }
 
-static inline int
-dpdmai_dev_set_fd_us(struct qdma_virt_queue *qdma_vq,
-		     struct qbman_fd *fd,
-		     struct rte_dpaa2_qdma_job **job,
-		     uint16_t nb_jobs)
+static void
+sg_entry_pre_populate(struct qdma_cntx_sg *sg_cntx)
 {
+	uint16_t i;
+	struct qdma_sg_entry *src_sge = sg_cntx->sg_src_entry;
+	struct qdma_sg_entry *dst_sge = sg_cntx->sg_dst_entry;
+
+	for (i = 0; i < RTE_DPAA2_QDMA_JOB_SUBMIT_MAX; i++) {
+		/* source SG */
+		src_sge[i].ctrl.sl = QDMA_SG_SL_LONG;
+		src_sge[i].ctrl.fmt = QDMA_SG_FMT_SDB;
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+		src_sge[i].ctrl.bmt = QDMA_SG_BMT_ENABLE;
+#else
+		src_sge[i].ctrl.bmt = QDMA_SG_BMT_DISABLE;
+#endif
+		/* destination SG */
+		dst_sge[i].ctrl.sl = QDMA_SG_SL_LONG;
+		dst_sge[i].ctrl.fmt = QDMA_SG_FMT_SDB;
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+		dst_sge[i].ctrl.bmt = QDMA_SG_BMT_ENABLE;
+#else
+		dst_sge[i].ctrl.bmt = QDMA_SG_BMT_DISABLE;
+#endif
+	}
+}
+
+static void
+fle_sdd_sg_pre_populate(struct qdma_cntx_sg *sg_cntx,
+	struct qdma_virt_queue *qdma_vq)
+{
+	struct qdma_sg_entry *src_sge = sg_cntx->sg_src_entry;
+	struct qdma_sg_entry *dst_sge = sg_cntx->sg_dst_entry;
+	rte_iova_t src_sge_iova, dst_sge_iova;
 	struct dpaa2_qdma_rbp *rbp = &qdma_vq->rbp;
-	struct rte_dpaa2_qdma_job **ppjob;
-	size_t iova;
-	int ret = 0, loop;
-	int ser = (qdma_vq->flags & DPAA2_QDMA_VQ_NO_RESPONSE) ?
-				0 : 1;
-
-	for (loop = 0; loop < nb_jobs; loop++) {
-		if (job[loop]->src & QDMA_RBP_UPPER_ADDRESS_MASK)
-			iova = (size_t)job[loop]->dest;
-		else
-			iova = (size_t)job[loop]->src;
-
-		/* Set the metadata */
-		job[loop]->vq_id = qdma_vq->vq_id;
-		ppjob = (struct rte_dpaa2_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1;
-		*ppjob = job[loop];
-
-		if ((rbp->drbp == 1) || (rbp->srbp == 1))
-			ret = qdma_populate_fd_pci((phys_addr_t)job[loop]->src,
-					(phys_addr_t)job[loop]->dest,
-					job[loop]->len, &fd[loop], rbp, ser);
-		else
-			ret = qdma_populate_fd_ddr((phys_addr_t)job[loop]->src,
-					(phys_addr_t)job[loop]->dest,
-					job[loop]->len, &fd[loop], ser);
+
+	memset(sg_cntx, 0, sizeof(struct qdma_cntx_sg));
+
+	src_sge_iova = DPAA2_VADDR_TO_IOVA(src_sge);
+	dst_sge_iova = DPAA2_VADDR_TO_IOVA(dst_sge);
+
+	sg_entry_pre_populate(sg_cntx);
+	fle_sdd_pre_populate(&sg_cntx->fle_sdd,
+		rbp, src_sge_iova, dst_sge_iova,
+		QBMAN_FLE_WORD4_FMT_SGE);
+}
+
+static inline uint32_t
+sg_entry_post_populate(const struct rte_dma_sge *src,
+	const struct rte_dma_sge *dst, struct qdma_cntx_sg *sg_cntx,
+	uint16_t nb_sge)
+{
+	uint16_t i;
+	uint32_t total_len = 0;
+	struct qdma_sg_entry *src_sge = sg_cntx->sg_src_entry;
+	struct qdma_sg_entry *dst_sge = sg_cntx->sg_dst_entry;
+
+	for (i = 0; i < (nb_sge - 1); i++) {
+		if (unlikely(src[i].length != dst[i].length))
+			return -ENOTSUP;
+		src_sge->addr_lo = (uint32_t)src[i].addr;
+		src_sge->addr_hi = (src[i].addr >> 32);
+		src_sge->data_len.data_len_sl0 = src[i].length;
+
+		dst_sge->addr_lo = (uint32_t)dst[i].addr;
+		dst_sge->addr_hi = (dst[i].addr >> 32);
+		dst_sge->data_len.data_len_sl0 = dst[i].length;
+		total_len += dst[i].length;
+
+		src_sge->ctrl.f = 0;
+		dst_sge->ctrl.f = 0;
+		src_sge++;
+		dst_sge++;
 	}
 
-	return ret;
+	if (unlikely(src[i].length != dst[i].length))
+		return -ENOTSUP;
+
+	src_sge->addr_lo = (uint32_t)src[i].addr;
+	src_sge->addr_hi = (src[i].addr >> 32);
+	src_sge->data_len.data_len_sl0 = src[i].length;
+
+	dst_sge->addr_lo = (uint32_t)dst[i].addr;
+	dst_sge->addr_hi = (dst[i].addr >> 32);
+	dst_sge->data_len.data_len_sl0 = dst[i].length;
+
+	total_len += dst[i].length;
+	sg_cntx->job_nb = nb_sge;
+
+	src_sge->ctrl.f = QDMA_SG_F;
+	dst_sge->ctrl.f = QDMA_SG_F;
+
+	return total_len;
+}
+
+static inline void
+sg_fle_post_populate(struct qbman_fle fle[],
+	size_t len)
+{
+	DPAA2_SET_FLE_LEN(&fle[DPAA2_QDMA_SRC_FLE], len);
+	DPAA2_SET_FLE_LEN(&fle[DPAA2_QDMA_DST_FLE], len);
 }
 
-static uint32_t
-qdma_populate_sg_entry(struct rte_dpaa2_qdma_job **jobs,
-		       struct qdma_sg_entry *src_sge,
-		       struct qdma_sg_entry *dst_sge,
-		       uint16_t nb_jobs)
+static inline uint32_t
+sg_entry_populate(const struct rte_dma_sge *src,
+	const struct rte_dma_sge *dst, struct qdma_cntx_sg *sg_cntx,
+	uint16_t nb_sge)
 {
 	uint16_t i;
 	uint32_t total_len = 0;
-	uint64_t iova;
+	struct qdma_sg_entry *src_sge = sg_cntx->sg_src_entry;
+	struct qdma_sg_entry *dst_sge = sg_cntx->sg_dst_entry;
 
-	for (i = 0; i < nb_jobs; i++) {
-		/* source SG */
-		if (likely(jobs[i]->flags & RTE_DPAA2_QDMA_JOB_SRC_PHY)) {
-			src_sge->addr_lo = (uint32_t)jobs[i]->src;
-			src_sge->addr_hi = (jobs[i]->src >> 32);
-		} else {
-			iova = DPAA2_VADDR_TO_IOVA(jobs[i]->src);
-			src_sge->addr_lo = (uint32_t)iova;
-			src_sge->addr_hi = iova >> 32;
-		}
-		src_sge->data_len.data_len_sl0 = jobs[i]->len;
+	for (i = 0; i < nb_sge; i++) {
+		if (unlikely(src[i].length != dst[i].length))
+			return -ENOTSUP;
+
+		src_sge->addr_lo = (uint32_t)src[i].addr;
+		src_sge->addr_hi = (src[i].addr >> 32);
+		src_sge->data_len.data_len_sl0 = src[i].length;
 		src_sge->ctrl.sl = QDMA_SG_SL_LONG;
 		src_sge->ctrl.fmt = QDMA_SG_FMT_SDB;
 #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
@@ -235,16 +385,9 @@ qdma_populate_sg_entry(struct rte_dpaa2_qdma_job **jobs,
 #else
 		src_sge->ctrl.bmt = QDMA_SG_BMT_DISABLE;
 #endif
-		/* destination SG */
-		if (likely(jobs[i]->flags & RTE_DPAA2_QDMA_JOB_DEST_PHY)) {
-			dst_sge->addr_lo = (uint32_t)jobs[i]->dest;
-			dst_sge->addr_hi = (jobs[i]->dest >> 32);
-		} else {
-			iova = DPAA2_VADDR_TO_IOVA(jobs[i]->dest);
-			dst_sge->addr_lo = (uint32_t)iova;
-			dst_sge->addr_hi = iova >> 32;
-		}
-		dst_sge->data_len.data_len_sl0 = jobs[i]->len;
+		dst_sge->addr_lo = (uint32_t)dst[i].addr;
+		dst_sge->addr_hi = (dst[i].addr >> 32);
+		dst_sge->data_len.data_len_sl0 = dst[i].length;
 		dst_sge->ctrl.sl = QDMA_SG_SL_LONG;
 		dst_sge->ctrl.fmt = QDMA_SG_FMT_SDB;
 #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
@@ -252,9 +395,9 @@ qdma_populate_sg_entry(struct rte_dpaa2_qdma_job **jobs,
 #else
 		dst_sge->ctrl.bmt = QDMA_SG_BMT_DISABLE;
 #endif
-		total_len += jobs[i]->len;
+		total_len += src[i].length;
 
-		if (i == (nb_jobs - 1)) {
+		if (i == (nb_sge - 1)) {
 			src_sge->ctrl.f = QDMA_SG_F;
 			dst_sge->ctrl.f = QDMA_SG_F;
 		} else {
@@ -265,325 +408,452 @@ qdma_populate_sg_entry(struct rte_dpaa2_qdma_job **jobs,
 		dst_sge++;
 	}
 
+	sg_cntx->job_nb = nb_sge;
+
 	return total_len;
 }
 
-static inline int
-dpdmai_dev_set_multi_fd_lf_no_rsp(struct qdma_virt_queue *qdma_vq,
-				  struct qbman_fd *fd,
-				  struct rte_dpaa2_qdma_job **job,
-				  uint16_t nb_jobs)
+static inline void
+fle_populate(struct qbman_fle fle[],
+	struct qdma_sdd sdd[], uint64_t sdd_iova,
+	struct dpaa2_qdma_rbp *rbp,
+	uint64_t src_iova, uint64_t dst_iova, size_t len,
+	uint32_t fmt)
 {
-	struct dpaa2_qdma_rbp *rbp = &qdma_vq->rbp;
-	struct rte_dpaa2_qdma_job **ppjob;
-	uint16_t i;
-	void *elem;
-	struct qbman_fle *fle;
-	uint64_t elem_iova, fle_iova;
+	/* first frame list to source descriptor */
+	DPAA2_SET_FLE_ADDR(&fle[DPAA2_QDMA_SDD_FLE], sdd_iova);
+	DPAA2_SET_FLE_LEN(&fle[DPAA2_QDMA_SDD_FLE],
+		(DPAA2_QDMA_MAX_SDD * (sizeof(struct qdma_sdd))));
 
-	for (i = 0; i < nb_jobs; i++) {
-		elem = job[i]->usr_elem;
-#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
-		elem_iova = rte_mempool_virt2iova(elem);
-#else
-		elem_iova = DPAA2_VADDR_TO_IOVA(elem);
-#endif
+	/* source and destination descriptor */
+	if (rbp && rbp->enable) {
+		/* source */
+		sdd[DPAA2_QDMA_SRC_SDD].read_cmd.portid =
+			rbp->sportid;
+		sdd[DPAA2_QDMA_SRC_SDD].rbpcmd_simple.pfid =
+			rbp->spfid;
+		sdd[DPAA2_QDMA_SRC_SDD].rbpcmd_simple.vfid =
+			rbp->svfid;
+		sdd[DPAA2_QDMA_SRC_SDD].rbpcmd_simple.vfa =
+			rbp->svfa;
 
-		ppjob = (struct rte_dpaa2_qdma_job **)
-			((uintptr_t)(uint64_t)elem +
-			 QDMA_FLE_SINGLE_JOB_OFFSET);
-		*ppjob = job[i];
+		if (rbp->srbp) {
+			sdd[DPAA2_QDMA_SRC_SDD].read_cmd.rbp =
+				rbp->srbp;
+			sdd[DPAA2_QDMA_SRC_SDD].read_cmd.rdtype =
+				DPAA2_RBP_MEM_RW;
+		} else {
+			sdd[DPAA2_QDMA_SRC_SDD].read_cmd.rdtype =
+				dpaa2_coherent_no_alloc_cache;
+		}
+		/* destination */
+		sdd[DPAA2_QDMA_DST_SDD].write_cmd.portid =
+			rbp->dportid;
+		sdd[DPAA2_QDMA_DST_SDD].rbpcmd_simple.pfid =
+			rbp->dpfid;
+		sdd[DPAA2_QDMA_DST_SDD].rbpcmd_simple.vfid =
+			rbp->dvfid;
+		sdd[DPAA2_QDMA_DST_SDD].rbpcmd_simple.vfa =
+			rbp->dvfa;
 
-		job[i]->vq_id = qdma_vq->vq_id;
+		if (rbp->drbp) {
+			sdd[DPAA2_QDMA_DST_SDD].write_cmd.rbp =
+				rbp->drbp;
+			sdd[DPAA2_QDMA_DST_SDD].write_cmd.wrttype =
+				DPAA2_RBP_MEM_RW;
+		} else {
+			sdd[DPAA2_QDMA_DST_SDD].write_cmd.wrttype =
+				dpaa2_coherent_alloc_cache;
+		}
 
-		fle = (struct qbman_fle *)
-			((uintptr_t)(uint64_t)elem + QDMA_FLE_FLE_OFFSET);
-		fle_iova = elem_iova + QDMA_FLE_FLE_OFFSET;
+	} else {
+		sdd[DPAA2_QDMA_SRC_SDD].read_cmd.rdtype =
+			dpaa2_coherent_no_alloc_cache;
+		sdd[DPAA2_QDMA_DST_SDD].write_cmd.wrttype =
+			dpaa2_coherent_alloc_cache;
+	}
+	/* source frame list to source buffer */
+	DPAA2_SET_FLE_ADDR(&fle[DPAA2_QDMA_SRC_FLE], src_iova);
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+	DPAA2_SET_FLE_BMT(&fle[DPAA2_QDMA_SRC_FLE]);
+#endif
+	fle[DPAA2_QDMA_SRC_FLE].word4.fmt = fmt;
+	DPAA2_SET_FLE_LEN(&fle[DPAA2_QDMA_SRC_FLE], len);
 
-		DPAA2_SET_FD_ADDR(&fd[i], fle_iova);
-		DPAA2_SET_FD_COMPOUND_FMT(&fd[i]);
+	/* destination frame list to destination buffer */
+	DPAA2_SET_FLE_ADDR(&fle[DPAA2_QDMA_DST_FLE], dst_iova);
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+	DPAA2_SET_FLE_BMT(&fle[DPAA2_QDMA_DST_FLE]);
+#endif
+	fle[DPAA2_QDMA_DST_FLE].word4.fmt = fmt;
+	DPAA2_SET_FLE_LEN(&fle[DPAA2_QDMA_DST_FLE], len);
 
-		memset(fle, 0, DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle) +
-				DPAA2_QDMA_MAX_SDD * sizeof(struct qdma_sdd));
+	/* Final bit: 1, for last frame list */
+	DPAA2_SET_FLE_FIN(&fle[DPAA2_QDMA_DST_FLE]);
+}
 
-		dpaa2_qdma_populate_fle(fle, fle_iova, rbp,
-			job[i]->src, job[i]->dest, job[i]->len,
-			job[i]->flags, QBMAN_FLE_WORD4_FMT_SBF);
-	}
+static inline void
+fle_post_populate(struct qbman_fle fle[],
+	uint64_t src, uint64_t dest, size_t len)
+{
+	DPAA2_SET_FLE_ADDR(&fle[DPAA2_QDMA_SRC_FLE], src);
+	DPAA2_SET_FLE_LEN(&fle[DPAA2_QDMA_SRC_FLE], len);
 
-	return 0;
+	DPAA2_SET_FLE_ADDR(&fle[DPAA2_QDMA_DST_FLE], dest);
+	DPAA2_SET_FLE_LEN(&fle[DPAA2_QDMA_DST_FLE], len);
 }
 
 static inline int
-dpdmai_dev_set_multi_fd_lf(struct qdma_virt_queue *qdma_vq,
-			   struct qbman_fd *fd,
-			   struct rte_dpaa2_qdma_job **job,
-			   uint16_t nb_jobs)
+dpaa2_qdma_submit(void *dev_private, uint16_t vchan)
 {
-	struct dpaa2_qdma_rbp *rbp = &qdma_vq->rbp;
-	struct rte_dpaa2_qdma_job **ppjob;
-	uint16_t i;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
+	uint16_t expected = qdma_vq->fd_idx;
 	int ret;
-	void *elem[DPAA2_QDMA_MAX_DESC];
-	struct qbman_fle *fle;
-	uint64_t elem_iova, fle_iova;
 
-	ret = rte_mempool_get_bulk(qdma_vq->fle_pool, elem, nb_jobs);
-	if (ret) {
-		DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
-		return ret;
-	}
+	ret = dpaa2_qdma_multi_eq(qdma_vq);
+	if (likely(ret == expected))
+		return 0;
 
-	for (i = 0; i < nb_jobs; i++) {
-#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
-		elem_iova = rte_mempool_virt2iova(elem[i]);
-#else
-		elem_iova = DPAA2_VADDR_TO_IOVA(elem[i]);
-#endif
+	return -EBUSY;
+}
 
-		ppjob = (struct rte_dpaa2_qdma_job **)
-			((uintptr_t)(uint64_t)elem[i] +
-			 QDMA_FLE_SINGLE_JOB_OFFSET);
-		*ppjob = job[i];
+static inline void
+dpaa2_qdma_fle_dump(const struct qbman_fle *fle)
+{
+	DPAA2_QDMA_INFO("addr:0x%08x-0x%08x, len:%d, frc:0x%08x, bpid:%d",
+		fle->addr_hi, fle->addr_lo, fle->length, fle->frc,
+		fle->word4.bpid);
+	DPAA2_QDMA_INFO("ivp:%d, bmt:%d, off:%d, fmt:%d, sl:%d, f:%d",
+		fle->word4.ivp, fle->word4.bmt, fle->word4.offset,
+		fle->word4.fmt, fle->word4.sl, fle->word4.f);
+}
 
-		job[i]->vq_id = qdma_vq->vq_id;
+static inline void
+dpaa2_qdma_sdd_dump(const struct qdma_sdd *sdd)
+{
+	DPAA2_QDMA_INFO("stride:%d, rbpcmd:0x%08x, cmd:0x%08x",
+		sdd->stride, sdd->rbpcmd, sdd->cmd);
+}
+
+static inline void
+dpaa2_qdma_sge_dump(const struct qdma_sg_entry *sge)
+{
+	DPAA2_QDMA_INFO("addr 0x%08x-0x%08x, len:0x%08x, ctl:0x%08x",
+		sge->addr_hi, sge->addr_lo, sge->data_len.data_len_sl0,
+		sge->ctrl_fields);
+}
 
-		fle = (struct qbman_fle *)
-			((uintptr_t)(uint64_t)elem[i] + QDMA_FLE_FLE_OFFSET);
-		fle_iova = elem_iova + QDMA_FLE_FLE_OFFSET;
+static void
+dpaa2_qdma_long_fmt_dump(const struct qbman_fle *fle)
+{
+	int i;
+	const struct qdma_cntx_fle_sdd *fle_sdd;
+	const struct qdma_sdd *sdd;
+	const struct qdma_cntx_sg *cntx_sg = NULL;
+	const struct qdma_cntx_long *cntx_long = NULL;
 
-		DPAA2_SET_FD_ADDR(&fd[i], fle_iova);
-		DPAA2_SET_FD_COMPOUND_FMT(&fd[i]);
-		DPAA2_SET_FD_FRC(&fd[i], QDMA_SER_CTX);
+	fle_sdd = container_of(fle, const struct qdma_cntx_fle_sdd, fle[0]);
+	sdd = fle_sdd->sdd;
 
-		memset(fle, 0, DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle) +
-			DPAA2_QDMA_MAX_SDD * sizeof(struct qdma_sdd));
+	for (i = 0; i < DPAA2_QDMA_MAX_FLE; i++) {
+		DPAA2_QDMA_INFO("fle[%d] info:", i);
+		dpaa2_qdma_fle_dump(&fle[i]);
+	}
 
-		dpaa2_qdma_populate_fle(fle, fle_iova, rbp,
-				job[i]->src, job[i]->dest, job[i]->len,
-				job[i]->flags, QBMAN_FLE_WORD4_FMT_SBF);
+	if (fle[DPAA2_QDMA_SRC_FLE].word4.fmt !=
+		fle[DPAA2_QDMA_DST_FLE].word4.fmt) {
+		DPAA2_QDMA_ERR("fle[%d].fmt(%d) != fle[%d].fmt(%d)",
+			DPAA2_QDMA_SRC_FLE,
+			fle[DPAA2_QDMA_SRC_FLE].word4.fmt,
+			DPAA2_QDMA_DST_FLE,
+			fle[DPAA2_QDMA_DST_FLE].word4.fmt);
+
+		return;
+	} else if (fle[DPAA2_QDMA_SRC_FLE].word4.fmt ==
+		QBMAN_FLE_WORD4_FMT_SGE) {
+		cntx_sg = container_of(fle_sdd, const struct qdma_cntx_sg,
+			fle_sdd);
+	} else if (fle[DPAA2_QDMA_SRC_FLE].word4.fmt ==
+		QBMAN_FLE_WORD4_FMT_SBF) {
+		cntx_long = container_of(fle_sdd, const struct qdma_cntx_long,
+			fle_sdd);
+	} else {
+		DPAA2_QDMA_ERR("Unsupported fle format:%d",
+			fle[DPAA2_QDMA_SRC_FLE].word4.fmt);
+		return;
 	}
 
-	return 0;
+	for (i = 0; i < DPAA2_QDMA_MAX_SDD; i++) {
+		DPAA2_QDMA_INFO("sdd[%d] info:", i);
+		dpaa2_qdma_sdd_dump(&sdd[i]);
+	}
+
+	if (cntx_long) {
+		DPAA2_QDMA_INFO("long format/Single buffer cntx idx:%d",
+			cntx_long->cntx_idx);
+	}
+
+	if (cntx_sg) {
+		DPAA2_QDMA_INFO("long format/SG format, job number:%d",
+			cntx_sg->job_nb);
+		if (!cntx_sg->job_nb ||
+			cntx_sg->job_nb > RTE_DPAA2_QDMA_JOB_SUBMIT_MAX) {
+			DPAA2_QDMA_ERR("Invalid SG job number:%d",
+				cntx_sg->job_nb);
+			return;
+		}
+		for (i = 0; i < cntx_sg->job_nb; i++) {
+			DPAA2_QDMA_INFO("sg[%d] src info:", i);
+			dpaa2_qdma_sge_dump(&cntx_sg->sg_src_entry[i]);
+			DPAA2_QDMA_INFO("sg[%d] dst info:", i);
+			dpaa2_qdma_sge_dump(&cntx_sg->sg_dst_entry[i]);
+			DPAA2_QDMA_INFO("cntx_idx[%d]:%d", i,
+				cntx_sg->cntx_idx[i]);
+		}
+	}
 }
 
-static inline int
-dpdmai_dev_set_sg_fd_lf(struct qdma_virt_queue *qdma_vq,
-			struct qbman_fd *fd,
-			struct rte_dpaa2_qdma_job **job,
-			uint16_t nb_jobs)
+static int
+dpaa2_qdma_copy_sg(void *dev_private,
+	uint16_t vchan,
+	const struct rte_dma_sge *src,
+	const struct rte_dma_sge *dst,
+	uint16_t nb_src, uint16_t nb_dst,
+	uint64_t flags)
 {
-	struct dpaa2_qdma_rbp *rbp = &qdma_vq->rbp;
-	struct rte_dpaa2_qdma_job **ppjob;
-	void *elem;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
+	int ret = 0, expected, i;
+	uint32_t len;
+	struct qbman_fd *fd = &qdma_vq->fd[qdma_vq->fd_idx];
+	struct qdma_cntx_sg *cntx_sg = NULL;
+	rte_iova_t cntx_iova, fle_iova, sdd_iova;
+	rte_iova_t src_sge_iova, dst_sge_iova;
 	struct qbman_fle *fle;
-	uint64_t elem_iova, fle_iova, src, dst;
-	int ret = 0, i;
-	struct qdma_sg_entry *src_sge, *dst_sge;
-	uint32_t len, fmt, flags;
-
-	/*
-	 * Get an FLE/SDD from FLE pool.
-	 * Note: IO metadata is before the FLE and SDD memory.
-	 */
-	if (qdma_vq->flags & DPAA2_QDMA_VQ_NO_RESPONSE) {
-		elem = job[0]->usr_elem;
+	struct qdma_sdd *sdd;
+	const uint16_t *idx_addr = NULL;
+
+	if (unlikely(nb_src != nb_dst)) {
+		DPAA2_QDMA_ERR("SG entry src num(%d) != dst num(%d)",
+			nb_src, nb_dst);
+		return -ENOTSUP;
+	}
+
+	if (unlikely(nb_src > RTE_DPAA2_QDMA_JOB_SUBMIT_MAX)) {
+		DPAA2_QDMA_ERR("SG entry number(%d) > MAX(%d)",
+			nb_src, RTE_DPAA2_QDMA_JOB_SUBMIT_MAX);
+		return -EINVAL;
+	}
+
+	memset(fd, 0, sizeof(struct qbman_fd));
+
+	if (qdma_dev->is_silent) {
+		cntx_sg = qdma_vq->cntx_sg[qdma_vq->silent_idx];
 	} else {
-		ret = rte_mempool_get(qdma_vq->fle_pool, &elem);
-		if (ret) {
-			DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
+		ret = rte_mempool_get(qdma_vq->fle_pool,
+			(void **)&cntx_sg);
+		if (ret)
 			return ret;
-		}
+		DPAA2_SET_FD_FRC(fd, QDMA_SER_CTX);
+		idx_addr = DPAA2_QDMA_IDXADDR_FROM_SG_FLAG(flags);
+		for (i = 0; i < nb_src; i++)
+			cntx_sg->cntx_idx[i] = idx_addr[i];
 	}
 
 #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
-	elem_iova = rte_mempool_virt2iova(elem);
+	cntx_iova = rte_mempool_virt2iova(cntx_sg);
 #else
-	elem_iova = DPAA2_VADDR_TO_IOVA(elem);
+	cntx_iova = DPAA2_VADDR_TO_IOVA(cntx_sg);
 #endif
 
-	/* Set the metadata */
-	/* Save job context. */
-	*((uint16_t *)
-	((uintptr_t)(uint64_t)elem + QDMA_FLE_JOB_NB_OFFSET)) = nb_jobs;
-	ppjob = (struct rte_dpaa2_qdma_job **)
-		((uintptr_t)(uint64_t)elem + QDMA_FLE_SG_JOBS_OFFSET);
-	for (i = 0; i < nb_jobs; i++)
-		ppjob[i] = job[i];
-
-	ppjob[0]->vq_id = qdma_vq->vq_id;
-
-	fle = (struct qbman_fle *)
-		((uintptr_t)(uint64_t)elem + QDMA_FLE_FLE_OFFSET);
-	fle_iova = elem_iova + QDMA_FLE_FLE_OFFSET;
+	fle = cntx_sg->fle_sdd.fle;
+	fle_iova = cntx_iova +
+		offsetof(struct qdma_cntx_sg, fle_sdd) +
+		offsetof(struct qdma_cntx_fle_sdd, fle);
 
 	DPAA2_SET_FD_ADDR(fd, fle_iova);
 	DPAA2_SET_FD_COMPOUND_FMT(fd);
-	if (!(qdma_vq->flags & DPAA2_QDMA_VQ_NO_RESPONSE))
-		DPAA2_SET_FD_FRC(fd, QDMA_SER_CTX);
+	DPAA2_SET_FD_FLC(fd, (uint64_t)cntx_sg);
+
+	if (qdma_vq->fle_pre_populate) {
+		if (unlikely(!fle[DPAA2_QDMA_SRC_FLE].length)) {
+			fle_sdd_sg_pre_populate(cntx_sg, qdma_vq);
+			if (!qdma_dev->is_silent && cntx_sg && idx_addr) {
+				for (i = 0; i < nb_src; i++)
+					cntx_sg->cntx_idx[i] = idx_addr[i];
+			}
+		}
 
-	/* Populate FLE */
-	if (likely(nb_jobs > 1)) {
-		src_sge = (struct qdma_sg_entry *)
-			((uintptr_t)(uint64_t)elem + QDMA_FLE_SG_ENTRY_OFFSET);
-		dst_sge = src_sge + DPAA2_QDMA_MAX_SG_NB;
-		src = elem_iova + QDMA_FLE_SG_ENTRY_OFFSET;
-		dst = src +
-			DPAA2_QDMA_MAX_SG_NB * sizeof(struct qdma_sg_entry);
-		len = qdma_populate_sg_entry(job, src_sge, dst_sge, nb_jobs);
-		fmt = QBMAN_FLE_WORD4_FMT_SGE;
-		flags = RTE_DPAA2_QDMA_JOB_SRC_PHY | RTE_DPAA2_QDMA_JOB_DEST_PHY;
+		len = sg_entry_post_populate(src, dst,
+			cntx_sg, nb_src);
+		sg_fle_post_populate(fle, len);
 	} else {
-		src = job[0]->src;
-		dst = job[0]->dest;
-		len = job[0]->len;
-		fmt = QBMAN_FLE_WORD4_FMT_SBF;
-		flags = job[0]->flags;
+		sdd = cntx_sg->fle_sdd.sdd;
+		sdd_iova = cntx_iova +
+			offsetof(struct qdma_cntx_sg, fle_sdd) +
+			offsetof(struct qdma_cntx_fle_sdd, sdd);
+		src_sge_iova = cntx_iova +
+			offsetof(struct qdma_cntx_sg, sg_src_entry);
+		dst_sge_iova = cntx_iova +
+			offsetof(struct qdma_cntx_sg, sg_dst_entry);
+		len = sg_entry_populate(src, dst,
+			cntx_sg, nb_src);
+
+		fle_populate(fle, sdd, sdd_iova,
+			&qdma_vq->rbp, src_sge_iova, dst_sge_iova, len,
+			QBMAN_FLE_WORD4_FMT_SGE);
 	}
 
-	memset(fle, 0, DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle) +
-			DPAA2_QDMA_MAX_SDD * sizeof(struct qdma_sdd));
-
-	dpaa2_qdma_populate_fle(fle, fle_iova, rbp,
-					src, dst, len, flags, fmt);
-
-	return 0;
-}
-
-static inline uint16_t
-dpdmai_dev_get_job_us(struct qdma_virt_queue *qdma_vq __rte_unused,
-		      const struct qbman_fd *fd,
-		      struct rte_dpaa2_qdma_job **job, uint16_t *nb_jobs)
-{
-	uint16_t vqid;
-	size_t iova;
-	struct rte_dpaa2_qdma_job **ppjob;
+	if (unlikely(qdma_vq->flags & DPAA2_QDMA_DESC_DEBUG_FLAG))
+		dpaa2_qdma_long_fmt_dump(cntx_sg->fle_sdd.fle);
 
-	if (fd->simple_pci.saddr_hi & (QDMA_RBP_UPPER_ADDRESS_MASK >> 32))
-		iova = (size_t)(((uint64_t)fd->simple_pci.daddr_hi) << 32
-				| (uint64_t)fd->simple_pci.daddr_lo);
-	else
-		iova = (size_t)(((uint64_t)fd->simple_pci.saddr_hi) << 32
-				| (uint64_t)fd->simple_pci.saddr_lo);
+	qdma_vq->fd_idx++;
+	qdma_vq->silent_idx =
+		(qdma_vq->silent_idx + 1) & (DPAA2_QDMA_MAX_DESC - 1);
 
-	ppjob = (struct rte_dpaa2_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1;
-	*job = (struct rte_dpaa2_qdma_job *)*ppjob;
-	(*job)->status = (fd->simple_pci.acc_err << 8) |
-					(fd->simple_pci.error);
-	vqid = (*job)->vq_id;
-	*nb_jobs = 1;
+	if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
+		expected = qdma_vq->fd_idx;
+		ret = dpaa2_qdma_multi_eq(qdma_vq);
+		if (likely(ret == expected))
+			return 0;
+	} else {
+		return 0;
+	}
 
-	return vqid;
+	return ret;
 }
 
-static inline uint16_t
-dpdmai_dev_get_single_job_lf(struct qdma_virt_queue *qdma_vq,
-			     const struct qbman_fd *fd,
-			     struct rte_dpaa2_qdma_job **job,
-			     uint16_t *nb_jobs)
+static int
+dpaa2_qdma_copy(void *dev_private, uint16_t vchan,
+	rte_iova_t src, rte_iova_t dst,
+	uint32_t length, uint64_t flags)
 {
+	struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
+	int ret = 0, expected;
+	struct qbman_fd *fd = &qdma_vq->fd[qdma_vq->fd_idx];
+	struct qdma_cntx_long *cntx_long = NULL;
+	rte_iova_t cntx_iova, fle_iova, sdd_iova;
 	struct qbman_fle *fle;
-	struct rte_dpaa2_qdma_job **ppjob = NULL;
-	uint16_t status;
+	struct qdma_sdd *sdd;
 
-	/*
-	 * Fetch metadata from FLE. job and vq_id were set
-	 * in metadata in the enqueue operation.
-	 */
-	fle = (struct qbman_fle *)
-			DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd));
+	memset(fd, 0, sizeof(struct qbman_fd));
 
-	*nb_jobs = 1;
-	ppjob = (struct rte_dpaa2_qdma_job **)((uintptr_t)(uint64_t)fle -
-			QDMA_FLE_FLE_OFFSET + QDMA_FLE_SINGLE_JOB_OFFSET);
+	if (qdma_dev->is_silent) {
+		cntx_long = qdma_vq->cntx_long[qdma_vq->silent_idx];
+	} else {
+		ret = rte_mempool_get(qdma_vq->fle_pool,
+			(void **)&cntx_long);
+		if (ret)
+			return ret;
+		DPAA2_SET_FD_FRC(fd, QDMA_SER_CTX);
+		cntx_long->cntx_idx = DPAA2_QDMA_IDX_FROM_FLAG(flags);
+	}
 
-	status = (DPAA2_GET_FD_ERR(fd) << 8) | (DPAA2_GET_FD_FRC(fd) & 0xFF);
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+	cntx_iova = rte_mempool_virt2iova(cntx_long);
+#else
+	cntx_iova = DPAA2_VADDR_TO_IOVA(cntx_long);
+#endif
 
-	*job = *ppjob;
-	(*job)->status = status;
+	fle = cntx_long->fle_sdd.fle;
+	fle_iova = cntx_iova +
+		offsetof(struct qdma_cntx_long, fle_sdd) +
+		offsetof(struct qdma_cntx_fle_sdd, fle);
 
-	/* Free FLE to the pool */
-	rte_mempool_put(qdma_vq->fle_pool,
-			(void *)
-			((uintptr_t)(uint64_t)fle - QDMA_FLE_FLE_OFFSET));
+	DPAA2_SET_FD_ADDR(fd, fle_iova);
+	DPAA2_SET_FD_COMPOUND_FMT(fd);
+	DPAA2_SET_FD_FLC(fd, (uint64_t)cntx_long);
+
+	if (qdma_vq->fle_pre_populate) {
+		if (unlikely(!fle[DPAA2_QDMA_SRC_FLE].length)) {
+			fle_sdd_pre_populate(&cntx_long->fle_sdd,
+				&qdma_vq->rbp,
+				0, 0, QBMAN_FLE_WORD4_FMT_SBF);
+			if (!qdma_dev->is_silent && cntx_long) {
+				cntx_long->cntx_idx =
+					DPAA2_QDMA_IDX_FROM_FLAG(flags);
+			}
+		}
 
-	return (*job)->vq_id;
-}
+		fle_post_populate(fle, src, dst, length);
+	} else {
+		sdd = cntx_long->fle_sdd.sdd;
+		sdd_iova = cntx_iova +
+			offsetof(struct qdma_cntx_long, fle_sdd) +
+			offsetof(struct qdma_cntx_fle_sdd, sdd);
+		fle_populate(fle, sdd, sdd_iova, &qdma_vq->rbp,
+			src, dst, length,
+			QBMAN_FLE_WORD4_FMT_SBF);
+	}
 
-static inline uint16_t
-dpdmai_dev_get_sg_job_lf(struct qdma_virt_queue *qdma_vq,
-			 const struct qbman_fd *fd,
-			 struct rte_dpaa2_qdma_job **job,
-			 uint16_t *nb_jobs)
-{
-	struct qbman_fle *fle;
-	struct rte_dpaa2_qdma_job **ppjob = NULL;
-	uint16_t i, status;
+	if (unlikely(qdma_vq->flags & DPAA2_QDMA_DESC_DEBUG_FLAG))
+		dpaa2_qdma_long_fmt_dump(cntx_long->fle_sdd.fle);
 
-	/*
-	 * Fetch metadata from FLE. job and vq_id were set
-	 * in metadata in the enqueue operation.
-	 */
-	fle = (struct qbman_fle *)
-			DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd));
-	*nb_jobs = *((uint16_t *)((uintptr_t)(uint64_t)fle -
-				QDMA_FLE_FLE_OFFSET + QDMA_FLE_JOB_NB_OFFSET));
-	ppjob = (struct rte_dpaa2_qdma_job **)((uintptr_t)(uint64_t)fle -
-				QDMA_FLE_FLE_OFFSET + QDMA_FLE_SG_JOBS_OFFSET);
-	status = (DPAA2_GET_FD_ERR(fd) << 8) | (DPAA2_GET_FD_FRC(fd) & 0xFF);
-
-	for (i = 0; i < (*nb_jobs); i++) {
-		job[i] = ppjob[i];
-		job[i]->status = status;
-	}
+	qdma_vq->fd_idx++;
+	qdma_vq->silent_idx =
+		(qdma_vq->silent_idx + 1) & (DPAA2_QDMA_MAX_DESC - 1);
 
-	/* Free FLE to the pool */
-	rte_mempool_put(qdma_vq->fle_pool,
-			(void *)
-			((uintptr_t)(uint64_t)fle - QDMA_FLE_FLE_OFFSET));
+	if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
+		expected = qdma_vq->fd_idx;
+		ret = dpaa2_qdma_multi_eq(qdma_vq);
+		if (likely(ret == expected))
+			return 0;
+	} else {
+		return 0;
+	}
 
-	return job[0]->vq_id;
+	return ret;
 }
 
-/* Function to receive a QDMA job for a given device and queue*/
-static int
-dpdmai_dev_dequeue_multijob_prefetch(struct qdma_virt_queue *qdma_vq,
-				     uint16_t *vq_id,
-				     struct rte_dpaa2_qdma_job **job,
-				     uint16_t nb_jobs)
+static uint16_t
+dpaa2_qdma_dequeue(void *dev_private,
+	uint16_t vchan, const uint16_t nb_cpls,
+	uint16_t *cntx_idx, bool *has_error)
 {
-	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_vq->dpdmai_dev;
-	struct dpaa2_queue *rxq = &(dpdmai_dev->rx_queue[0]);
+	struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
+
+	struct dpaa2_queue *rxq;
 	struct qbman_result *dq_storage, *dq_storage1 = NULL;
 	struct qbman_pull_desc pulldesc;
 	struct qbman_swp *swp;
 	struct queue_storage_info_t *q_storage;
+	uint32_t fqid;
 	uint8_t status, pending;
 	uint8_t num_rx = 0;
 	const struct qbman_fd *fd;
-	uint16_t vqid, num_rx_ret;
-	uint16_t rx_fqid = rxq->fqid;
 	int ret, pull_size;
+	struct qbman_fle *fle;
+	struct qdma_cntx_fle_sdd *fle_sdd;
+	struct qdma_cntx_sg *cntx_sg;
+	struct qdma_cntx_long *cntx_long;
+	uint16_t free_space = 0, fle_elem_nb = 0;
 
-	if (qdma_vq->flags & DPAA2_QDMA_VQ_FD_SG_FORMAT) {
-		/** Make sure there are enough space to get jobs.*/
-		if (unlikely(nb_jobs < DPAA2_QDMA_MAX_SG_NB))
-			return -EINVAL;
-		nb_jobs = 1;
-	}
+	if (unlikely(qdma_dev->is_silent))
+		return 0;
 
 	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
 		ret = dpaa2_affine_qbman_swp();
 		if (ret) {
-			DPAA2_QDMA_ERR(
-				"Failed to allocate IO portal, tid: %d",
+			DPAA2_QDMA_ERR("Allocate portal err, tid(%d)",
 				rte_gettid());
+			if (has_error)
+				*has_error = true;
 			return 0;
 		}
 	}
 	swp = DPAA2_PER_LCORE_PORTAL;
 
-	pull_size = (nb_jobs > dpaa2_dqrr_size) ? dpaa2_dqrr_size : nb_jobs;
+	pull_size = (nb_cpls > dpaa2_dqrr_size) ?
+		dpaa2_dqrr_size : nb_cpls;
+	rxq = &(dpdmai_dev->rx_queue[qdma_vq->vq_id]);
+	fqid = rxq->fqid;
 	q_storage = rxq->q_storage;
 
 	if (unlikely(!q_storage->active_dqs)) {
@@ -592,21 +862,20 @@ dpdmai_dev_dequeue_multijob_prefetch(struct qdma_virt_queue *qdma_vq,
 		q_storage->last_num_pkts = pull_size;
 		qbman_pull_desc_clear(&pulldesc);
 		qbman_pull_desc_set_numframes(&pulldesc,
-					      q_storage->last_num_pkts);
-		qbman_pull_desc_set_fq(&pulldesc, rx_fqid);
+			q_storage->last_num_pkts);
+		qbman_pull_desc_set_fq(&pulldesc, fqid);
 		qbman_pull_desc_set_storage(&pulldesc, dq_storage,
-				(size_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
+			(size_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
 		if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
 			while (!qbman_check_command_complete(
-				get_swp_active_dqs(
-				DPAA2_PER_LCORE_DPIO->index)))
+			       get_swp_active_dqs(
+			       DPAA2_PER_LCORE_DPIO->index)))
 				;
 			clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
 		}
 		while (1) {
 			if (qbman_swp_pull(swp, &pulldesc)) {
-				DPAA2_QDMA_DP_WARN(
-					"VDQ command not issued.QBMAN busy");
+				DPAA2_QDMA_DP_WARN("QBMAN busy");
 					/* Portal was busy, try again */
 				continue;
 			}
@@ -615,7 +884,7 @@ dpdmai_dev_dequeue_multijob_prefetch(struct qdma_virt_queue *qdma_vq,
 		q_storage->active_dqs = dq_storage;
 		q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
 		set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index,
-				   dq_storage);
+			dq_storage);
 	}
 
 	dq_storage = q_storage->active_dqs;
@@ -629,7 +898,7 @@ dpdmai_dev_dequeue_multijob_prefetch(struct qdma_virt_queue *qdma_vq,
 	dq_storage1 = q_storage->dq_storage[q_storage->toggle];
 	qbman_pull_desc_clear(&pulldesc);
 	qbman_pull_desc_set_numframes(&pulldesc, pull_size);
-	qbman_pull_desc_set_fq(&pulldesc, rx_fqid);
+	qbman_pull_desc_set_fq(&pulldesc, fqid);
 	qbman_pull_desc_set_storage(&pulldesc, dq_storage1,
 		(size_t)(DPAA2_VADDR_TO_IOVA(dq_storage1)), 1);
 
@@ -662,563 +931,239 @@ dpdmai_dev_dequeue_multijob_prefetch(struct qdma_virt_queue *qdma_vq,
 				continue;
 		}
 		fd = qbman_result_DQ_fd(dq_storage);
-
-		vqid = qdma_vq->get_job(qdma_vq, fd, &job[num_rx],
-								&num_rx_ret);
-		if (vq_id)
-			vq_id[num_rx] = vqid;
-
-		dq_storage++;
-		num_rx += num_rx_ret;
-	} while (pending);
-
-	if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
-		while (!qbman_check_command_complete(
-			get_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)))
-			;
-		clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
-	}
-	/* issue a volatile dequeue command for next pull */
-	while (1) {
-		if (qbman_swp_pull(swp, &pulldesc)) {
-			DPAA2_QDMA_DP_WARN(
-				"VDQ command is not issued. QBMAN is busy (2)");
-			continue;
-		}
-		break;
-	}
-
-	q_storage->active_dqs = dq_storage1;
-	q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
-	set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index, dq_storage1);
-
-	return num_rx;
-}
-
-static int
-dpdmai_dev_dequeue_multijob_no_prefetch(struct qdma_virt_queue *qdma_vq,
-					uint16_t *vq_id,
-					struct rte_dpaa2_qdma_job **job,
-					uint16_t nb_jobs)
-{
-	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_vq->dpdmai_dev;
-	struct dpaa2_queue *rxq = &(dpdmai_dev->rx_queue[0]);
-	struct qbman_result *dq_storage;
-	struct qbman_pull_desc pulldesc;
-	struct qbman_swp *swp;
-	uint8_t status, pending;
-	uint8_t num_rx = 0;
-	const struct qbman_fd *fd;
-	uint16_t vqid, num_rx_ret;
-	uint16_t rx_fqid = rxq->fqid;
-	int ret, next_pull, num_pulled = 0;
-
-	if (qdma_vq->flags & DPAA2_QDMA_VQ_FD_SG_FORMAT) {
-		/** Make sure there are enough space to get jobs.*/
-		if (unlikely(nb_jobs < DPAA2_QDMA_MAX_SG_NB))
-			return -EINVAL;
-		nb_jobs = 1;
-	}
-
-	next_pull = nb_jobs;
-
-	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
-		ret = dpaa2_affine_qbman_swp();
-		if (ret) {
-			DPAA2_QDMA_ERR(
-				"Failed to allocate IO portal, tid: %d",
-				rte_gettid());
-			return 0;
-		}
-	}
-	swp = DPAA2_PER_LCORE_PORTAL;
-
-	rxq = &(dpdmai_dev->rx_queue[0]);
-
-	do {
-		dq_storage = rxq->q_storage->dq_storage[0];
-		/* Prepare dequeue descriptor */
-		qbman_pull_desc_clear(&pulldesc);
-		qbman_pull_desc_set_fq(&pulldesc, rx_fqid);
-		qbman_pull_desc_set_storage(&pulldesc, dq_storage,
-			(uint64_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
-
-		if (next_pull > dpaa2_dqrr_size) {
-			qbman_pull_desc_set_numframes(&pulldesc,
-					dpaa2_dqrr_size);
-			next_pull -= dpaa2_dqrr_size;
+		fle_sdd = (void *)(uintptr_t)DPAA2_GET_FD_FLC(fd);
+		fle = fle_sdd->fle;
+		qdma_vq->fle_elem[fle_elem_nb] = fle_sdd;
+		fle_elem_nb++;
+		if (fle[DPAA2_QDMA_SRC_FLE].word4.fmt ==
+			QBMAN_FLE_WORD4_FMT_SGE) {
+			cntx_sg = container_of(fle_sdd,
+				struct qdma_cntx_sg, fle_sdd);
+			ret = qdma_cntx_idx_ring_eq(qdma_vq->ring_cntx_idx,
+				cntx_sg->cntx_idx,
+				cntx_sg->job_nb, &free_space);
 		} else {
-			qbman_pull_desc_set_numframes(&pulldesc, next_pull);
-			next_pull = 0;
-		}
-
-		while (1) {
-			if (qbman_swp_pull(swp, &pulldesc)) {
-				DPAA2_QDMA_DP_WARN(
-					"VDQ command not issued. QBMAN busy");
-				/* Portal was busy, try again */
-				continue;
-			}
-			break;
-		}
-
-		rte_prefetch0((void *)((size_t)(dq_storage + 1)));
-		/* Check if the previous issued command is completed. */
-		while (!qbman_check_command_complete(dq_storage))
-			;
-
-		num_pulled = 0;
-		pending = 1;
-
-		do {
-			/* Loop until dq_storage is updated
-			 * with new token by QBMAN
-			 */
-			while (!qbman_check_new_result(dq_storage))
-				;
-			rte_prefetch0((void *)((size_t)(dq_storage + 2)));
-
-			if (qbman_result_DQ_is_pull_complete(dq_storage)) {
-				pending = 0;
-				/* Check for valid frame. */
-				status = qbman_result_DQ_flags(dq_storage);
-				if (unlikely((status &
-					QBMAN_DQ_STAT_VALIDFRAME) == 0))
-					continue;
-			}
-			fd = qbman_result_DQ_fd(dq_storage);
-
-			vqid = qdma_vq->get_job(qdma_vq, fd,
-						&job[num_rx], &num_rx_ret);
-			if (vq_id)
-				vq_id[num_rx] = vqid;
-
-			dq_storage++;
-			num_rx += num_rx_ret;
-			num_pulled++;
-
-		} while (pending);
-	/* Last VDQ provided all packets and more packets are requested */
-	} while (next_pull && num_pulled == dpaa2_dqrr_size);
-
-	return num_rx;
-}
-
-static int
-dpdmai_dev_submit_multi(struct qdma_virt_queue *qdma_vq,
-			struct rte_dpaa2_qdma_job **job,
-			uint16_t nb_jobs)
-{
-	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_vq->dpdmai_dev;
-	uint16_t txq_id = dpdmai_dev->tx_queue[0].fqid;
-	struct qbman_fd fd[DPAA2_QDMA_MAX_DESC];
-	struct qbman_eq_desc eqdesc;
-	struct qbman_swp *swp;
-	uint32_t num_to_send = 0;
-	uint16_t num_tx = 0;
-	uint32_t enqueue_loop, loop;
-	int ret;
-
-	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
-		ret = dpaa2_affine_qbman_swp();
-		if (ret) {
-			DPAA2_QDMA_ERR(
-				"Failed to allocate IO portal, tid: %d",
-				rte_gettid());
-			return 0;
+			cntx_long = container_of(fle_sdd,
+				struct qdma_cntx_long, fle_sdd);
+			ret = qdma_cntx_idx_ring_eq(qdma_vq->ring_cntx_idx,
+				&cntx_long->cntx_idx,
+				1, &free_space);
 		}
-	}
-	swp = DPAA2_PER_LCORE_PORTAL;
-
-	/* Prepare enqueue descriptor */
-	qbman_eq_desc_clear(&eqdesc);
-	qbman_eq_desc_set_fq(&eqdesc, txq_id);
-	qbman_eq_desc_set_no_orp(&eqdesc, 0);
-	qbman_eq_desc_set_response(&eqdesc, 0, 0);
-
-	if (qdma_vq->flags & DPAA2_QDMA_VQ_FD_SG_FORMAT) {
-		uint16_t fd_nb;
-		uint16_t sg_entry_nb = nb_jobs > DPAA2_QDMA_MAX_SG_NB ?
-						DPAA2_QDMA_MAX_SG_NB : nb_jobs;
-		uint16_t job_idx = 0;
-		uint16_t fd_sg_nb[8];
-		uint16_t nb_jobs_ret = 0;
-
-		if (nb_jobs % DPAA2_QDMA_MAX_SG_NB)
-			fd_nb = nb_jobs / DPAA2_QDMA_MAX_SG_NB + 1;
-		else
-			fd_nb = nb_jobs / DPAA2_QDMA_MAX_SG_NB;
-
-		memset(&fd[0], 0, sizeof(struct qbman_fd) * fd_nb);
-
-		for (loop = 0; loop < fd_nb; loop++) {
-			ret = qdma_vq->set_fd(qdma_vq, &fd[loop], &job[job_idx],
-					      sg_entry_nb);
-			if (unlikely(ret < 0))
-				return 0;
-			fd_sg_nb[loop] = sg_entry_nb;
-			nb_jobs -= sg_entry_nb;
-			job_idx += sg_entry_nb;
-			sg_entry_nb = nb_jobs > DPAA2_QDMA_MAX_SG_NB ?
-						DPAA2_QDMA_MAX_SG_NB : nb_jobs;
-		}
-
-		/* Enqueue the packet to the QBMAN */
-		enqueue_loop = 0;
-
-		while (enqueue_loop < fd_nb) {
-			ret = qbman_swp_enqueue_multiple(swp,
-					&eqdesc, &fd[enqueue_loop],
-					NULL, fd_nb - enqueue_loop);
-			if (likely(ret >= 0)) {
-				for (loop = 0; loop < (uint32_t)ret; loop++)
-					nb_jobs_ret +=
-						fd_sg_nb[enqueue_loop + loop];
-				enqueue_loop += ret;
-			}
-		}
-
-		return nb_jobs_ret;
-	}
-
-	memset(fd, 0, nb_jobs * sizeof(struct qbman_fd));
-
-	while (nb_jobs > 0) {
-		num_to_send = (nb_jobs > dpaa2_eqcr_size) ?
-			dpaa2_eqcr_size : nb_jobs;
-
-		ret = qdma_vq->set_fd(qdma_vq, &fd[num_tx],
-						&job[num_tx], num_to_send);
-		if (unlikely(ret < 0))
-			break;
-
-		/* Enqueue the packet to the QBMAN */
-		enqueue_loop = 0;
-		loop = num_to_send;
-
-		while (enqueue_loop < loop) {
-			ret = qbman_swp_enqueue_multiple(swp,
-						&eqdesc,
-						&fd[num_tx + enqueue_loop],
-						NULL,
-						loop - enqueue_loop);
-			if (likely(ret >= 0))
-				enqueue_loop += ret;
-		}
-		num_tx += num_to_send;
-		nb_jobs -= loop;
-	}
-
-	qdma_vq->num_enqueues += num_tx;
-
-	return num_tx;
-}
-
-static inline int
-dpaa2_qdma_submit(void *dev_private, uint16_t vchan)
-{
-	struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
-
-	dpdmai_dev_submit_multi(qdma_vq, qdma_vq->job_list,
-				qdma_vq->num_valid_jobs);
-
-	qdma_vq->num_valid_jobs = 0;
-
-	return 0;
-}
-
-static int
-dpaa2_qdma_enqueue(void *dev_private, uint16_t vchan,
-		   rte_iova_t src, rte_iova_t dst,
-		   uint32_t length, uint64_t flags)
-{
-	struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
-	struct rte_dpaa2_qdma_job *job;
-	int idx, ret;
-
-	idx = (uint16_t)(qdma_vq->num_enqueues + qdma_vq->num_valid_jobs);
-
-	ret = rte_mempool_get(qdma_vq->job_pool, (void **)&job);
-	if (ret) {
-		DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
-		return -ENOSPC;
-	}
-
-	job->src = src;
-	job->dest = dst;
-	job->len = length;
-	job->flags = flags;
-	job->status = 0;
-	job->vq_id = vchan;
-
-	qdma_vq->job_list[qdma_vq->num_valid_jobs] = job;
-	qdma_vq->num_valid_jobs++;
-
-	if (flags & RTE_DMA_OP_FLAG_SUBMIT)
-		dpaa2_qdma_submit(dev_private, vchan);
-
-	return idx;
-}
-
-int
-rte_dpaa2_qdma_copy_multi(int16_t dev_id, uint16_t vchan,
-			  struct rte_dpaa2_qdma_job **jobs,
-			  uint16_t nb_cpls)
-{
-	struct rte_dma_fp_object *obj = &rte_dma_fp_objs[dev_id];
-	struct dpaa2_dpdmai_dev *dpdmai_dev = obj->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
-
-	return dpdmai_dev_submit_multi(qdma_vq, jobs, nb_cpls);
-}
-
-static uint16_t
-dpaa2_qdma_dequeue_multi(struct qdma_device *qdma_dev,
-			 struct qdma_virt_queue *qdma_vq,
-			 struct rte_dpaa2_qdma_job **jobs,
-			 uint16_t nb_jobs)
-{
-	struct qdma_virt_queue *temp_qdma_vq;
-	int ring_count;
-	int ret = 0, i;
-
-	if (qdma_vq->flags & DPAA2_QDMA_VQ_FD_SG_FORMAT) {
-		/** Make sure there are enough space to get jobs.*/
-		if (unlikely(nb_jobs < DPAA2_QDMA_MAX_SG_NB))
-			return -EINVAL;
-	}
-
-	/* Only dequeue when there are pending jobs on VQ */
-	if (qdma_vq->num_enqueues == qdma_vq->num_dequeues)
-		return 0;
-
-	if (!(qdma_vq->flags & DPAA2_QDMA_VQ_FD_SG_FORMAT) &&
-		qdma_vq->num_enqueues < (qdma_vq->num_dequeues + nb_jobs))
-		nb_jobs = RTE_MIN((qdma_vq->num_enqueues -
-				qdma_vq->num_dequeues), nb_jobs);
-
-	if (qdma_vq->exclusive_hw_queue) {
-		/* In case of exclusive queue directly fetch from HW queue */
-		ret = qdma_vq->dequeue_job(qdma_vq, NULL, jobs, nb_jobs);
-		if (ret < 0) {
-			DPAA2_QDMA_ERR(
-				"Dequeue from DPDMAI device failed: %d", ret);
-			return ret;
-		}
-	} else {
-		uint16_t temp_vq_id[DPAA2_QDMA_MAX_DESC];
+		if (!ret || free_space < RTE_DPAA2_QDMA_JOB_SUBMIT_MAX)
+			pending = 0;
 
-		/* Get the QDMA completed jobs from the software ring.
-		 * In case they are not available on the ring poke the HW
-		 * to fetch completed jobs from corresponding HW queues
-		 */
-		ring_count = rte_ring_count(qdma_vq->status_ring);
-		if (ring_count < nb_jobs) {
-			ret = qdma_vq->dequeue_job(qdma_vq,
-					temp_vq_id, jobs, nb_jobs);
-			for (i = 0; i < ret; i++) {
-				temp_qdma_vq = &qdma_dev->vqs[temp_vq_id[i]];
-				rte_ring_enqueue(temp_qdma_vq->status_ring,
-					(void *)(jobs[i]));
-			}
-			ring_count = rte_ring_count(
-					qdma_vq->status_ring);
-		}
+		dq_storage++;
+	} while (pending);
 
-		if (ring_count) {
-			/* Dequeue job from the software ring
-			 * to provide to the user
-			 */
-			ret = rte_ring_dequeue_bulk(qdma_vq->status_ring,
-						    (void **)jobs,
-						    ring_count, NULL);
+	if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
+		while (!qbman_check_command_complete(
+		       get_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)))
+			;
+		clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
+	}
+	/* issue a volatile dequeue command for next pull */
+	while (1) {
+		if (qbman_swp_pull(swp, &pulldesc)) {
+			DPAA2_QDMA_DP_WARN("QBMAN is busy (2)");
+			continue;
 		}
+		break;
 	}
 
-	qdma_vq->num_dequeues += ret;
-	return ret;
-}
-
-static uint16_t
-dpaa2_qdma_dequeue_status(void *dev_private, uint16_t vchan,
-			  const uint16_t nb_cpls,
-			  uint16_t *last_idx,
-			  enum rte_dma_status_code *st)
-{
-	struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
-	struct rte_dpaa2_qdma_job *jobs[DPAA2_QDMA_MAX_DESC];
-	int ret, i;
-
-	ret = dpaa2_qdma_dequeue_multi(qdma_dev, qdma_vq, jobs, nb_cpls);
-
-	for (i = 0; i < ret; i++)
-		st[i] = jobs[i]->status;
-
-	rte_mempool_put_bulk(qdma_vq->job_pool, (void **)jobs, ret);
-
-	if (last_idx != NULL)
-		*last_idx = (uint16_t)(qdma_vq->num_dequeues - 1);
-
-	return ret;
-}
-
-static uint16_t
-dpaa2_qdma_dequeue(void *dev_private,
-		   uint16_t vchan, const uint16_t nb_cpls,
-		   uint16_t *last_idx, bool *has_error)
-{
-	struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
-	struct rte_dpaa2_qdma_job *jobs[DPAA2_QDMA_MAX_DESC];
-	int ret;
-
-	RTE_SET_USED(has_error);
-
-	ret = dpaa2_qdma_dequeue_multi(qdma_dev, qdma_vq,
-				jobs, nb_cpls);
-
-	rte_mempool_put_bulk(qdma_vq->job_pool, (void **)jobs, ret);
+	q_storage->active_dqs = dq_storage1;
+	q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
+	set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index, dq_storage1);
 
-	if (last_idx != NULL)
-		*last_idx = (uint16_t)(qdma_vq->num_dequeues - 1);
+	rte_mempool_put_bulk(qdma_vq->fle_pool,
+		qdma_vq->fle_elem, fle_elem_nb);
 
-	return ret;
-}
+	num_rx = qdma_cntx_idx_ring_dq(qdma_vq->ring_cntx_idx,
+		cntx_idx, nb_cpls);
 
-uint16_t
-rte_dpaa2_qdma_completed_multi(int16_t dev_id, uint16_t vchan,
-			       struct rte_dpaa2_qdma_job **jobs,
-			       uint16_t nb_cpls)
-{
-	struct rte_dma_fp_object *obj = &rte_dma_fp_objs[dev_id];
-	struct dpaa2_dpdmai_dev *dpdmai_dev = obj->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
+	if (has_error)
+		*has_error = false;
 
-	return dpaa2_qdma_dequeue_multi(qdma_dev, qdma_vq, jobs, nb_cpls);
+	return num_rx;
 }
 
 static int
 dpaa2_qdma_info_get(const struct rte_dma_dev *dev,
-		    struct rte_dma_info *dev_info,
-		    uint32_t info_sz)
+	struct rte_dma_info *dev_info,
+	uint32_t info_sz __rte_unused)
 {
-	RTE_SET_USED(dev);
-	RTE_SET_USED(info_sz);
+	struct dpaa2_dpdmai_dev *dpdmai_dev = dev->data->dev_private;
 
 	dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM |
-			     RTE_DMA_CAPA_MEM_TO_DEV |
-			     RTE_DMA_CAPA_DEV_TO_DEV |
-			     RTE_DMA_CAPA_DEV_TO_MEM |
-			     RTE_DMA_CAPA_SILENT |
-			     RTE_DMA_CAPA_OPS_COPY;
-	dev_info->max_vchans = DPAA2_QDMA_MAX_VHANS;
+		RTE_DMA_CAPA_MEM_TO_DEV |
+		RTE_DMA_CAPA_DEV_TO_DEV |
+		RTE_DMA_CAPA_DEV_TO_MEM |
+		RTE_DMA_CAPA_SILENT |
+		RTE_DMA_CAPA_OPS_COPY |
+		RTE_DMA_CAPA_OPS_COPY_SG;
+	dev_info->dev_capa |= RTE_DMA_CAPA_DPAA2_QDMA_FLAGS_INDEX;
+	dev_info->max_vchans = dpdmai_dev->num_queues;
 	dev_info->max_desc = DPAA2_QDMA_MAX_DESC;
 	dev_info->min_desc = DPAA2_QDMA_MIN_DESC;
+	dev_info->max_sges = RTE_DPAA2_QDMA_JOB_SUBMIT_MAX;
+	dev_info->dev_name = dev->device->name;
+	if (dpdmai_dev->qdma_dev)
+		dev_info->nb_vchans = dpdmai_dev->qdma_dev->num_vqs;
 
 	return 0;
 }
 
 static int
 dpaa2_qdma_configure(struct rte_dma_dev *dev,
-		     const struct rte_dma_conf *dev_conf,
-		     uint32_t conf_sz)
+	const struct rte_dma_conf *dev_conf,
+	uint32_t conf_sz)
 {
-	char name[32]; /* RTE_MEMZONE_NAMESIZE = 32 */
 	struct dpaa2_dpdmai_dev *dpdmai_dev = dev->data->dev_private;
 	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	uint16_t i;
+	struct dpdmai_rx_queue_cfg rx_queue_cfg;
+	struct dpdmai_rx_queue_attr rx_attr;
+	struct dpdmai_tx_queue_attr tx_attr;
+	struct dpaa2_queue *rxq;
+	int ret = 0;
 
 	DPAA2_QDMA_FUNC_TRACE();
 
 	RTE_SET_USED(conf_sz);
 
-	/* In case QDMA device is not in stopped state, return -EBUSY */
-	if (qdma_dev->state == 1) {
-		DPAA2_QDMA_ERR(
-			"Device is in running state. Stop before config.");
-		return -1;
-	}
+	if (dev_conf->nb_vchans > dpdmai_dev->num_queues) {
+		DPAA2_QDMA_ERR("%s config queues(%d) > hw queues(%d)",
+			dev->data->dev_name, dev_conf->nb_vchans,
+			dpdmai_dev->num_queues);
 
-	/* Allocate Virtual Queues */
-	sprintf(name, "qdma_%d_vq", dev->data->dev_id);
-	qdma_dev->vqs = rte_malloc(name,
-			(sizeof(struct qdma_virt_queue) * dev_conf->nb_vchans),
-			RTE_CACHE_LINE_SIZE);
-	if (!qdma_dev->vqs) {
-		DPAA2_QDMA_ERR("qdma_virtual_queues allocation failed");
-		return -ENOMEM;
+		return -ENOTSUP;
 	}
-	qdma_dev->num_vqs = dev_conf->nb_vchans;
-
-	return 0;
-}
-
-static int
-check_devargs_handler(__rte_unused const char *key,
-		      const char *value,
-		      __rte_unused void *opaque)
-{
-	if (strcmp(value, "1"))
-		return -1;
 
-	return 0;
-}
+	if (qdma_dev->vqs) {
+		DPAA2_QDMA_DEBUG("%s: queues de-config(%d)/re-config(%d)",
+			dev->data->dev_name,
+			qdma_dev->num_vqs, dev_conf->nb_vchans);
+		for (i = 0; i < qdma_dev->num_vqs; i++) {
+			if ((qdma_dev->vqs[i].num_enqueues !=
+				qdma_dev->vqs[i].num_dequeues) &&
+				!qdma_dev->is_silent) {
+				DPAA2_QDMA_ERR("VQ(%d) %"PRIu64" jobs in dma.",
+					i, qdma_dev->vqs[i].num_enqueues -
+					qdma_dev->vqs[i].num_dequeues);
+				return -EBUSY;
+			}
+		}
+		for (i = 0; i < qdma_dev->num_vqs; i++) {
+			if (qdma_dev->vqs[i].fle_pool) {
+				rte_mempool_free(qdma_dev->vqs[i].fle_pool);
+				qdma_dev->vqs[i].fle_pool = NULL;
+			}
+			if (qdma_dev->vqs[i].ring_cntx_idx) {
+				rte_free(qdma_dev->vqs[i].ring_cntx_idx);
+				qdma_dev->vqs[i].ring_cntx_idx = NULL;
+			}
+			rxq = &dpdmai_dev->rx_queue[i];
+			if (rxq->q_storage) {
+				DPAA2_QDMA_DEBUG("%s rxq[%d] re-configure",
+					dev->data->dev_name, i);
+				dpaa2_free_dq_storage(rxq->q_storage);
+				rte_free(rxq->q_storage);
+				rxq->q_storage = NULL;
+			}
+		}
+		rte_free(qdma_dev->vqs);
+		qdma_dev->vqs = NULL;
+		qdma_dev->num_vqs = 0;
+	}
 
-static int
-dpaa2_qdma_get_devargs(struct rte_devargs *devargs, const char *key)
-{
-	struct rte_kvargs *kvlist;
+	/* Set up Rx Queues */
+	for (i = 0; i < dev_conf->nb_vchans; i++) {
+		memset(&rx_queue_cfg, 0, sizeof(struct dpdmai_rx_queue_cfg));
+		rxq = &dpdmai_dev->rx_queue[i];
+		ret = dpdmai_set_rx_queue(&s_proc_mc_reg,
+				CMD_PRI_LOW,
+				dpdmai_dev->token,
+				i, 0, &rx_queue_cfg);
+		if (ret) {
+			DPAA2_QDMA_ERR("%s RXQ%d set failed(%d)",
+				dev->data->dev_name, i, ret);
+			return ret;
+		}
+	}
 
-	if (!devargs)
-		return 0;
+	/* Get Rx and Tx queues FQID's */
+	for (i = 0; i < dev_conf->nb_vchans; i++) {
+		ret = dpdmai_get_rx_queue(&s_proc_mc_reg, CMD_PRI_LOW,
+				dpdmai_dev->token, i, 0, &rx_attr);
+		if (ret) {
+			DPAA2_QDMA_ERR("Get DPDMAI%d-RXQ%d failed(%d)",
+				dpdmai_dev->dpdmai_id, i, ret);
+			return ret;
+		}
+		dpdmai_dev->rx_queue[i].fqid = rx_attr.fqid;
 
-	kvlist = rte_kvargs_parse(devargs->args, NULL);
-	if (!kvlist)
-		return 0;
+		ret = dpdmai_get_tx_queue(&s_proc_mc_reg, CMD_PRI_LOW,
+				dpdmai_dev->token, i, 0, &tx_attr);
+		if (ret) {
+			DPAA2_QDMA_ERR("Get DPDMAI%d-TXQ%d failed(%d)",
+				dpdmai_dev->dpdmai_id, i, ret);
+			return ret;
+		}
+		dpdmai_dev->tx_queue[i].fqid = tx_attr.fqid;
+	}
 
-	if (!rte_kvargs_count(kvlist, key)) {
-		rte_kvargs_free(kvlist);
-		return 0;
+	/* Allocate Virtual Queues */
+	qdma_dev->vqs = rte_zmalloc(NULL,
+		(sizeof(struct qdma_virt_queue) * dev_conf->nb_vchans),
+		RTE_CACHE_LINE_SIZE);
+	if (!qdma_dev->vqs) {
+		DPAA2_QDMA_ERR("%s: VQs(%d) alloc failed.",
+			dev->data->dev_name, dev_conf->nb_vchans);
+		return -ENOMEM;
 	}
+	for (i = 0; i < dev_conf->nb_vchans; i++) {
+		qdma_dev->vqs[i].vq_id = i;
+		rxq = &dpdmai_dev->rx_queue[i];
+		/* Allocate DQ storage for the DPDMAI Rx queues */
+		rxq->q_storage = rte_zmalloc(NULL,
+			sizeof(struct queue_storage_info_t),
+			RTE_CACHE_LINE_SIZE);
+		if (!rxq->q_storage) {
+			DPAA2_QDMA_ERR("%s Q[%d] storage alloc failed",
+				dev->data->dev_name, i);
+			ret = -ENOMEM;
+			goto alloc_failed;
+		}
 
-	if (rte_kvargs_process(kvlist, key,
-			       check_devargs_handler, NULL) < 0) {
-		rte_kvargs_free(kvlist);
-		return 0;
+		memset(rxq->q_storage, 0, sizeof(struct queue_storage_info_t));
+		ret = dpaa2_alloc_dq_storage(rxq->q_storage);
+		if (ret) {
+			DPAA2_QDMA_ERR("%s Q[%d] dq storage alloc failed",
+				dev->data->dev_name, i);
+			ret = -ENOMEM;
+			goto alloc_failed;
+		}
 	}
-	rte_kvargs_free(kvlist);
 
-	return 1;
-}
+	qdma_dev->num_vqs = dev_conf->nb_vchans;
+	qdma_dev->is_silent = dev_conf->enable_silent;
 
-/* Enable FD in Ultra Short format */
-void
-rte_dpaa2_qdma_vchan_fd_us_enable(int16_t dev_id, uint16_t vchan)
-{
-	struct rte_dma_fp_object *obj = &rte_dma_fp_objs[dev_id];
-	struct dpaa2_dpdmai_dev *dpdmai_dev = obj->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	return 0;
 
-	qdma_dev->vqs[vchan].flags |= DPAA2_QDMA_VQ_FD_SHORT_FORMAT;
-}
+alloc_failed:
+	for (i = 0; i < dev_conf->nb_vchans; i++) {
+		rxq = &dpdmai_dev->rx_queue[i];
+		if (rxq->q_storage) {
+			dpaa2_free_dq_storage(rxq->q_storage);
+			rte_free(rxq->q_storage);
+			rxq->q_storage = NULL;
+		}
+	}
 
-/* Enable internal SG processing */
-void
-rte_dpaa2_qdma_vchan_internal_sg_enable(int16_t dev_id, uint16_t vchan)
-{
-	struct rte_dma_fp_object *obj = &rte_dma_fp_objs[dev_id];
-	struct dpaa2_dpdmai_dev *dpdmai_dev = obj->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	rte_free(qdma_dev->vqs);
+	qdma_dev->vqs = NULL;
+	qdma_dev->num_vqs = 0;
 
-	qdma_dev->vqs[vchan].flags |= DPAA2_QDMA_VQ_FD_SG_FORMAT;
+	return ret;
 }
 
 static int
@@ -1257,16 +1202,14 @@ dpaa2_qdma_vchan_rbp_set(struct qdma_virt_queue *vq,
 
 static int
 dpaa2_qdma_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
-		       const struct rte_dma_vchan_conf *conf,
-		       uint32_t conf_sz)
+	const struct rte_dma_vchan_conf *conf,
+	uint32_t conf_sz)
 {
 	struct dpaa2_dpdmai_dev *dpdmai_dev = dev->data->dev_private;
 	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
 	uint32_t pool_size;
-	char ring_name[32];
 	char pool_name[64];
-	int fd_long_format = 1;
-	int sg_enable = 0, ret;
+	int ret;
 
 	DPAA2_QDMA_FUNC_TRACE();
 
@@ -1276,99 +1219,67 @@ dpaa2_qdma_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
 	if (ret)
 		return ret;
 
-	if (qdma_dev->vqs[vchan].flags & DPAA2_QDMA_VQ_FD_SG_FORMAT)
-		sg_enable = 1;
-
-	if (qdma_dev->vqs[vchan].flags & DPAA2_QDMA_VQ_FD_SHORT_FORMAT)
-		fd_long_format = 0;
-
-	if (dev->data->dev_conf.enable_silent)
-		qdma_dev->vqs[vchan].flags |= DPAA2_QDMA_VQ_NO_RESPONSE;
-
-	if (sg_enable) {
-		if (qdma_dev->num_vqs != 1) {
-			DPAA2_QDMA_ERR(
-				"qDMA SG format only supports physical queue!");
-			return -ENODEV;
-		}
-		if (!fd_long_format) {
-			DPAA2_QDMA_ERR(
-				"qDMA SG format only supports long FD format!");
-			return -ENODEV;
-		}
-		pool_size = QDMA_FLE_SG_POOL_SIZE;
-	} else {
-		pool_size = QDMA_FLE_SINGLE_POOL_SIZE;
-	}
+	if (dpaa2_qdma_get_devargs(dev->device->devargs, DPAA2_QDMA_FLE_PRE_POPULATE))
+		qdma_dev->vqs[vchan].fle_pre_populate = 1;
+	else
+		qdma_dev->vqs[vchan].fle_pre_populate = 0;
 
-	if (qdma_dev->num_vqs == 1)
-		qdma_dev->vqs[vchan].exclusive_hw_queue = 1;
-	else {
-		/* Allocate a Ring for Virtual Queue in VQ mode */
-		snprintf(ring_name, sizeof(ring_name), "status ring %d %d",
-			 dev->data->dev_id, vchan);
-		qdma_dev->vqs[vchan].status_ring = rte_ring_create(ring_name,
-			conf->nb_desc, rte_socket_id(), 0);
-		if (!qdma_dev->vqs[vchan].status_ring) {
-			DPAA2_QDMA_ERR("Status ring creation failed for vq");
-			return rte_errno;
-		}
-	}
+	if (dpaa2_qdma_get_devargs(dev->device->devargs, DPAA2_QDMA_DESC_DEBUG))
+		qdma_dev->vqs[vchan].flags |= DPAA2_QDMA_DESC_DEBUG_FLAG;
+	else
+		qdma_dev->vqs[vchan].flags &= (~DPAA2_QDMA_DESC_DEBUG_FLAG);
 
 	snprintf(pool_name, sizeof(pool_name),
 		"qdma_fle_pool_dev%d_qid%d", dpdmai_dev->dpdmai_id, vchan);
+	pool_size = RTE_MAX(sizeof(struct qdma_cntx_sg),
+			    sizeof(struct qdma_cntx_long));
+
 	qdma_dev->vqs[vchan].fle_pool = rte_mempool_create(pool_name,
-			conf->nb_desc, pool_size,
-			QDMA_FLE_CACHE_SIZE(conf->nb_desc), 0,
-			NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
+			DPAA2_QDMA_MAX_DESC * 2, pool_size,
+			512, 0, NULL, NULL, NULL, NULL,
+			SOCKET_ID_ANY, 0);
 	if (!qdma_dev->vqs[vchan].fle_pool) {
-		DPAA2_QDMA_ERR("qdma_fle_pool create failed");
-		return -ENOMEM;
-	}
-
-	snprintf(pool_name, sizeof(pool_name),
-		"qdma_job_pool_dev%d_qid%d", dpdmai_dev->dpdmai_id, vchan);
-	qdma_dev->vqs[vchan].job_pool = rte_mempool_create(pool_name,
-			conf->nb_desc, pool_size,
-			QDMA_FLE_CACHE_SIZE(conf->nb_desc), 0,
-			NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
-	if (!qdma_dev->vqs[vchan].job_pool) {
-		DPAA2_QDMA_ERR("qdma_job_pool create failed");
+		DPAA2_QDMA_ERR("%s create failed", pool_name);
 		return -ENOMEM;
 	}
 
-	if (fd_long_format) {
-		if (sg_enable) {
-			qdma_dev->vqs[vchan].set_fd = dpdmai_dev_set_sg_fd_lf;
-			qdma_dev->vqs[vchan].get_job = dpdmai_dev_get_sg_job_lf;
-		} else {
-			if (dev->data->dev_conf.enable_silent)
-				qdma_dev->vqs[vchan].set_fd =
-					dpdmai_dev_set_multi_fd_lf_no_rsp;
-			else
-				qdma_dev->vqs[vchan].set_fd =
-					dpdmai_dev_set_multi_fd_lf;
-			qdma_dev->vqs[vchan].get_job = dpdmai_dev_get_single_job_lf;
+	if (qdma_dev->is_silent) {
+		ret = rte_mempool_get_bulk(qdma_dev->vqs[vchan].fle_pool,
+				(void **)qdma_dev->vqs[vchan].cntx_sg,
+				DPAA2_QDMA_MAX_DESC);
+		if (ret) {
+			DPAA2_QDMA_ERR("sg cntx get from %s for silent mode",
+				       pool_name);
+			return ret;
+		}
+		ret = rte_mempool_get_bulk(qdma_dev->vqs[vchan].fle_pool,
+				(void **)qdma_dev->vqs[vchan].cntx_long,
+				DPAA2_QDMA_MAX_DESC);
+		if (ret) {
+			DPAA2_QDMA_ERR("long cntx get from %s for silent mode",
+				       pool_name);
+			return ret;
 		}
 	} else {
-		qdma_dev->vqs[vchan].set_fd = dpdmai_dev_set_fd_us;
-		qdma_dev->vqs[vchan].get_job = dpdmai_dev_get_job_us;
-	}
-
-	if (dpaa2_qdma_get_devargs(dev->device->devargs,
-			DPAA2_QDMA_PREFETCH)) {
-		/* If no prefetch is configured. */
-		qdma_dev->vqs[vchan].dequeue_job =
-				dpdmai_dev_dequeue_multijob_prefetch;
-		DPAA2_QDMA_INFO("Prefetch RX Mode enabled");
-	} else {
-		qdma_dev->vqs[vchan].dequeue_job =
-			dpdmai_dev_dequeue_multijob_no_prefetch;
+		qdma_dev->vqs[vchan].ring_cntx_idx = rte_malloc(NULL,
+				sizeof(struct qdma_cntx_idx_ring),
+				RTE_CACHE_LINE_SIZE);
+		if (!qdma_dev->vqs[vchan].ring_cntx_idx) {
+			DPAA2_QDMA_ERR("DQ response ring alloc failed.");
+			return -ENOMEM;
+		}
+		qdma_dev->vqs[vchan].ring_cntx_idx->start = 0;
+		qdma_dev->vqs[vchan].ring_cntx_idx->tail = 0;
+		qdma_dev->vqs[vchan].ring_cntx_idx->free_space =
+				QDMA_CNTX_IDX_RING_MAX_FREE;
+		qdma_dev->vqs[vchan].ring_cntx_idx->nb_in_ring = 0;
+		qdma_dev->vqs[vchan].fle_elem = rte_malloc(NULL,
+				sizeof(void *) * DPAA2_QDMA_MAX_DESC,
+				RTE_CACHE_LINE_SIZE);
 	}
 
 	qdma_dev->vqs[vchan].dpdmai_dev = dpdmai_dev;
 	qdma_dev->vqs[vchan].nb_desc = conf->nb_desc;
-	qdma_dev->vqs[vchan].enqueue_job = dpdmai_dev_submit_multi;
 
 	return 0;
 }
@@ -1377,11 +1288,17 @@ static int
 dpaa2_qdma_start(struct rte_dma_dev *dev)
 {
 	struct dpaa2_dpdmai_dev *dpdmai_dev = dev->data->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	int ret;
 
 	DPAA2_QDMA_FUNC_TRACE();
 
-	qdma_dev->state = 1;
+	/* Enable the device */
+	ret = dpdmai_enable(&s_proc_mc_reg, CMD_PRI_LOW,
+			dpdmai_dev->token);
+	if (ret) {
+		DPAA2_QDMA_ERR("Enabling device failed with err: %d", ret);
+		return ret;
+	}
 
 	return 0;
 }
@@ -1390,46 +1307,71 @@ static int
 dpaa2_qdma_stop(struct rte_dma_dev *dev)
 {
 	struct dpaa2_dpdmai_dev *dpdmai_dev = dev->data->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	int ret;
 
 	DPAA2_QDMA_FUNC_TRACE();
 
-	qdma_dev->state = 0;
+	/* Disable the device */
+	ret = dpdmai_disable(&s_proc_mc_reg, CMD_PRI_LOW,
+			dpdmai_dev->token);
+	if (ret) {
+		DPAA2_QDMA_ERR("Disable device failed with err: %d", ret);
+		return ret;
+	}
 
 	return 0;
 }
 
 static int
-dpaa2_qdma_reset(struct rte_dma_dev *dev)
+dpaa2_qdma_close(struct rte_dma_dev *dev)
 {
 	struct dpaa2_dpdmai_dev *dpdmai_dev = dev->data->dev_private;
 	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct dpaa2_queue *rxq;
 	int i;
 
 	DPAA2_QDMA_FUNC_TRACE();
 
-	/* In case QDMA device is not in stopped state, return -EBUSY */
-	if (qdma_dev->state == 1) {
-		DPAA2_QDMA_ERR(
-			"Device is in running state. Stop before reset.");
-		return -EBUSY;
-	}
+	if (!qdma_dev)
+		return 0;
 
 	/* In case there are pending jobs on any VQ, return -EBUSY */
 	for (i = 0; i < qdma_dev->num_vqs; i++) {
-		if (qdma_dev->vqs[i].in_use && (qdma_dev->vqs[i].num_enqueues !=
-		    qdma_dev->vqs[i].num_dequeues)) {
-			DPAA2_QDMA_ERR("Jobs are still pending on VQ: %d", i);
+		if ((qdma_dev->vqs[i].num_enqueues !=
+		    qdma_dev->vqs[i].num_dequeues) &&
+		    !qdma_dev->is_silent) {
+			DPAA2_QDMA_ERR("VQ(%d) pending: eq(%"PRIu64") != dq(%"PRId64")",
+				i, qdma_dev->vqs[i].num_enqueues,
+				qdma_dev->vqs[i].num_dequeues);
 			return -EBUSY;
 		}
 	}
 
-	/* Reset and free virtual queues */
+	/* Free RXQ storages */
 	for (i = 0; i < qdma_dev->num_vqs; i++) {
-		rte_ring_free(qdma_dev->vqs[i].status_ring);
+		rxq = &dpdmai_dev->rx_queue[i];
+		if (rxq->q_storage) {
+			dpaa2_free_dq_storage(rxq->q_storage);
+			rte_free(rxq->q_storage);
+			rxq->q_storage = NULL;
+		}
+	}
+
+	if (qdma_dev->vqs) {
+		/* Free RXQ fle pool */
+		for (i = 0; i < qdma_dev->num_vqs; i++) {
+			if (qdma_dev->vqs[i].fle_pool) {
+				rte_mempool_free(qdma_dev->vqs[i].fle_pool);
+				qdma_dev->vqs[i].fle_pool = NULL;
+			}
+			if (qdma_dev->vqs[i].ring_cntx_idx) {
+				rte_free(qdma_dev->vqs[i].ring_cntx_idx);
+				qdma_dev->vqs[i].ring_cntx_idx = NULL;
+			}
+		}
+		rte_free(qdma_dev->vqs);
+		qdma_dev->vqs = NULL;
 	}
-	rte_free(qdma_dev->vqs);
-	qdma_dev->vqs = NULL;
 
 	/* Reset QDMA device structure */
 	qdma_dev->num_vqs = 0;
@@ -1438,18 +1380,8 @@ dpaa2_qdma_reset(struct rte_dma_dev *dev)
 }
 
 static int
-dpaa2_qdma_close(__rte_unused struct rte_dma_dev *dev)
-{
-	DPAA2_QDMA_FUNC_TRACE();
-
-	dpaa2_qdma_reset(dev);
-
-	return 0;
-}
-
-static int
-dpaa2_qdma_stats_get(const struct rte_dma_dev *dmadev, uint16_t vchan,
-		    struct rte_dma_stats *rte_stats, uint32_t size)
+dpaa2_qdma_stats_get(const struct rte_dma_dev *dmadev,
+	uint16_t vchan, struct rte_dma_stats *rte_stats, uint32_t size)
 {
 	struct dpaa2_dpdmai_dev *dpdmai_dev = dmadev->data->dev_private;
 	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
@@ -1504,123 +1436,44 @@ static int
 dpaa2_dpdmai_dev_uninit(struct rte_dma_dev *dev)
 {
 	struct dpaa2_dpdmai_dev *dpdmai_dev = dev->data->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
 	int ret;
 
 	DPAA2_QDMA_FUNC_TRACE();
 
-	ret = dpdmai_disable(&dpdmai_dev->dpdmai, CMD_PRI_LOW,
-			     dpdmai_dev->token);
-	if (ret)
-		DPAA2_QDMA_ERR("dmdmai disable failed");
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+		DPAA2_QDMA_DEBUG("Un-attach DMA(%d) in the 2nd proess.",
+			dpdmai_dev->dpdmai_id);
+		return 0;
+	}
 
-	/* Set up the DQRR storage for Rx */
-	struct dpaa2_queue *rxq = &(dpdmai_dev->rx_queue[0]);
+	/* Close the device at underlying layer*/
+	ret = dpdmai_close(&s_proc_mc_reg, CMD_PRI_LOW,
+			dpdmai_dev->token);
+	if (ret) {
+		DPAA2_QDMA_ERR("dpdmai(%d) close failed(%d)",
+			dpdmai_dev->dpdmai_id, ret);
 
-	if (rxq->q_storage) {
-		dpaa2_free_dq_storage(rxq->q_storage);
-		rte_free(rxq->q_storage);
+		return ret;
 	}
 
-	/* Close the device at underlying layer*/
-	ret = dpdmai_close(&dpdmai_dev->dpdmai, CMD_PRI_LOW, dpdmai_dev->token);
-	if (ret)
-		DPAA2_QDMA_ERR("Failure closing dpdmai device");
+	if (qdma_dev) {
+		rte_free(qdma_dev);
+		dpdmai_dev->qdma_dev = NULL;
+	}
 
-	return 0;
+	return ret;
 }
 
 static int
-dpaa2_dpdmai_dev_init(struct rte_dma_dev *dev, int dpdmai_id)
+dpaa2_dpdmai_dev_init(struct rte_dma_dev *dev, uint32_t dpdmai_id)
 {
 	struct dpaa2_dpdmai_dev *dpdmai_dev = dev->data->dev_private;
-	struct dpdmai_rx_queue_cfg rx_queue_cfg;
 	struct dpdmai_attr attr;
-	struct dpdmai_rx_queue_attr rx_attr;
-	struct dpdmai_tx_queue_attr tx_attr;
-	struct dpaa2_queue *rxq;
-	int ret;
+	int ret, err;
 
 	DPAA2_QDMA_FUNC_TRACE();
 
-	/* Open DPDMAI device */
-	dpdmai_dev->dpdmai_id = dpdmai_id;
-	dpdmai_dev->dpdmai.regs = dpaa2_get_mcp_ptr(MC_PORTAL_INDEX);
-	dpdmai_dev->qdma_dev = rte_malloc(NULL, sizeof(struct qdma_device),
-					  RTE_CACHE_LINE_SIZE);
-	ret = dpdmai_open(&dpdmai_dev->dpdmai, CMD_PRI_LOW,
-			  dpdmai_dev->dpdmai_id, &dpdmai_dev->token);
-	if (ret) {
-		DPAA2_QDMA_ERR("dpdmai_open() failed with err: %d", ret);
-		return ret;
-	}
-
-	/* Get DPDMAI attributes */
-	ret = dpdmai_get_attributes(&dpdmai_dev->dpdmai, CMD_PRI_LOW,
-				    dpdmai_dev->token, &attr);
-	if (ret) {
-		DPAA2_QDMA_ERR("dpdmai get attributes failed with err: %d",
-			       ret);
-		goto init_err;
-	}
-	dpdmai_dev->num_queues = attr.num_of_queues;
-
-	/* Set up Rx Queue */
-	memset(&rx_queue_cfg, 0, sizeof(struct dpdmai_rx_queue_cfg));
-	ret = dpdmai_set_rx_queue(&dpdmai_dev->dpdmai,
-				  CMD_PRI_LOW,
-				  dpdmai_dev->token,
-				  0, 0, &rx_queue_cfg);
-	if (ret) {
-		DPAA2_QDMA_ERR("Setting Rx queue failed with err: %d",
-			       ret);
-		goto init_err;
-	}
-
-	/* Allocate DQ storage for the DPDMAI Rx queues */
-	rxq = &(dpdmai_dev->rx_queue[0]);
-	rxq->q_storage = rte_malloc("dq_storage",
-				    sizeof(struct queue_storage_info_t),
-				    RTE_CACHE_LINE_SIZE);
-	if (!rxq->q_storage) {
-		DPAA2_QDMA_ERR("q_storage allocation failed");
-		ret = -ENOMEM;
-		goto init_err;
-	}
-
-	memset(rxq->q_storage, 0, sizeof(struct queue_storage_info_t));
-	ret = dpaa2_alloc_dq_storage(rxq->q_storage);
-	if (ret) {
-		DPAA2_QDMA_ERR("dpaa2_alloc_dq_storage failed");
-		goto init_err;
-	}
-
-	/* Get Rx and Tx queues FQID */
-	ret = dpdmai_get_rx_queue(&dpdmai_dev->dpdmai, CMD_PRI_LOW,
-				  dpdmai_dev->token, 0, 0, &rx_attr);
-	if (ret) {
-		DPAA2_QDMA_ERR("Reading device failed with err: %d",
-			       ret);
-		goto init_err;
-	}
-	dpdmai_dev->rx_queue[0].fqid = rx_attr.fqid;
-
-	ret = dpdmai_get_tx_queue(&dpdmai_dev->dpdmai, CMD_PRI_LOW,
-				  dpdmai_dev->token, 0, 0, &tx_attr);
-	if (ret) {
-		DPAA2_QDMA_ERR("Reading device failed with err: %d",
-			       ret);
-		goto init_err;
-	}
-	dpdmai_dev->tx_queue[0].fqid = tx_attr.fqid;
-
-	/* Enable the device */
-	ret = dpdmai_enable(&dpdmai_dev->dpdmai, CMD_PRI_LOW,
-			    dpdmai_dev->token);
-	if (ret) {
-		DPAA2_QDMA_ERR("Enabling device failed with err: %d", ret);
-		goto init_err;
-	}
-
 	if (!dpaa2_coherent_no_alloc_cache) {
 		if (dpaa2_svr_family == SVR_LX2160A) {
 			dpaa2_coherent_no_alloc_cache =
@@ -1635,24 +1488,76 @@ dpaa2_dpdmai_dev_init(struct rte_dma_dev *dev, int dpdmai_id)
 		}
 	}
 
-	DPAA2_QDMA_DEBUG("Initialized dpdmai object successfully");
+	if (!s_proc_mc_reg.regs)
+		s_proc_mc_reg.regs = dpaa2_get_mcp_ptr(MC_PORTAL_INDEX);
+
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+		DPAA2_QDMA_DEBUG("Attach DMA(%d) in the 2nd proess.",
+			dpdmai_id);
+		if (dpdmai_id != dpdmai_dev->dpdmai_id) {
+			DPAA2_QDMA_ERR("Fatal: Attach DMA(%d) to DMA(%d)",
+				dpdmai_id, dpdmai_dev->dpdmai_id);
+			return -EINVAL;
+		}
+		if (!dpdmai_dev->qdma_dev) {
+			DPAA2_QDMA_ERR("Fatal: DMA(%d) qdma_dev NOT allocated",
+				dpdmai_id);
+			return -ENOMEM;
+		}
+		if (dpdmai_dev->qdma_dev->num_vqs) {
+			DPAA2_QDMA_WARN("DMA(%d) %d vqs were configured",
+				dpdmai_id, dpdmai_dev->qdma_dev->num_vqs);
+		}
+
+		return 0;
+	}
+
+	/* Open DPDMAI device */
+	dpdmai_dev->dpdmai_id = dpdmai_id;
+
+	if (dpdmai_dev->qdma_dev) {
+		rte_free(dpdmai_dev->qdma_dev);
+		dpdmai_dev->qdma_dev = NULL;
+	}
+	dpdmai_dev->qdma_dev = rte_zmalloc(NULL,
+		sizeof(struct qdma_device), RTE_CACHE_LINE_SIZE);
+	if (!dpdmai_dev->qdma_dev) {
+		DPAA2_QDMA_ERR("DMA(%d) alloc memory failed",
+			dpdmai_id);
+		return -ENOMEM;
+	}
+	ret = dpdmai_open(&s_proc_mc_reg, CMD_PRI_LOW,
+			dpdmai_dev->dpdmai_id, &dpdmai_dev->token);
+	if (ret) {
+		DPAA2_QDMA_ERR("%s: dma(%d) open failed(%d)",
+			__func__, dpdmai_dev->dpdmai_id, ret);
+		return ret;
+	}
 
-	/* Reset the QDMA device */
-	ret = dpaa2_qdma_reset(dev);
+	/* Get DPDMAI attributes */
+	ret = dpdmai_get_attributes(&s_proc_mc_reg, CMD_PRI_LOW,
+			dpdmai_dev->token, &attr);
 	if (ret) {
-		DPAA2_QDMA_ERR("Resetting QDMA failed");
-		goto init_err;
+		DPAA2_QDMA_ERR("%s: dma(%d) get attributes failed(%d)",
+			__func__, dpdmai_dev->dpdmai_id, ret);
+		err = dpdmai_close(&s_proc_mc_reg, CMD_PRI_LOW,
+			dpdmai_dev->token);
+		if (err) {
+			DPAA2_QDMA_ERR("dpdmai(%d) close failed(%d)",
+				dpdmai_dev->dpdmai_id, err);
+		}
+		return ret;
 	}
+	dpdmai_dev->num_queues = attr.num_of_queues;
+
+	DPAA2_QDMA_DEBUG("DMA(%d) is initialized.", dpdmai_id);
 
 	return 0;
-init_err:
-	dpaa2_dpdmai_dev_uninit(dev);
-	return ret;
 }
 
 static int
 dpaa2_qdma_probe(struct rte_dpaa2_driver *dpaa2_drv,
-		 struct rte_dpaa2_device *dpaa2_dev)
+	struct rte_dpaa2_device *dpaa2_dev)
 {
 	struct rte_dma_dev *dmadev;
 	int ret;
@@ -1662,8 +1567,8 @@ dpaa2_qdma_probe(struct rte_dpaa2_driver *dpaa2_drv,
 	RTE_SET_USED(dpaa2_drv);
 
 	dmadev = rte_dma_pmd_allocate(dpaa2_dev->device.name,
-				      rte_socket_id(),
-				      sizeof(struct dpaa2_dpdmai_dev));
+		rte_socket_id(),
+		sizeof(struct dpaa2_dpdmai_dev));
 	if (!dmadev) {
 		DPAA2_QDMA_ERR("Unable to allocate dmadevice");
 		return -EINVAL;
@@ -1673,10 +1578,10 @@ dpaa2_qdma_probe(struct rte_dpaa2_driver *dpaa2_drv,
 	dmadev->dev_ops = &dpaa2_qdma_ops;
 	dmadev->device = &dpaa2_dev->device;
 	dmadev->fp_obj->dev_private = dmadev->data->dev_private;
-	dmadev->fp_obj->copy = dpaa2_qdma_enqueue;
+	dmadev->fp_obj->copy = dpaa2_qdma_copy;
+	dmadev->fp_obj->copy_sg = dpaa2_qdma_copy_sg;
 	dmadev->fp_obj->submit = dpaa2_qdma_submit;
 	dmadev->fp_obj->completed = dpaa2_qdma_dequeue;
-	dmadev->fp_obj->completed_status = dpaa2_qdma_dequeue_status;
 	dmadev->fp_obj->burst_capacity = dpaa2_qdma_burst_capacity;
 
 	/* Invoke PMD device initialization function */
@@ -1718,5 +1623,6 @@ static struct rte_dpaa2_driver rte_dpaa2_qdma_pmd = {
 
 RTE_PMD_REGISTER_DPAA2(dpaa2_qdma, rte_dpaa2_qdma_pmd);
 RTE_PMD_REGISTER_PARAM_STRING(dpaa2_qdma,
-	"no_prefetch=<int> ");
+	DPAA2_QDMA_FLE_PRE_POPULATE "=<int>"
+	DPAA2_QDMA_DESC_DEBUG"=<int>");
 RTE_LOG_REGISTER_DEFAULT(dpaa2_qdma_logtype, INFO);
diff --git a/drivers/dma/dpaa2/dpaa2_qdma.h b/drivers/dma/dpaa2/dpaa2_qdma.h
index 811906fcbc..371393cb85 100644
--- a/drivers/dma/dpaa2/dpaa2_qdma.h
+++ b/drivers/dma/dpaa2/dpaa2_qdma.h
@@ -1,11 +1,14 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2018-2022 NXP
+ * Copyright 2018-2023 NXP
  */
 
 #ifndef _DPAA2_QDMA_H_
 #define _DPAA2_QDMA_H_
 
-#define DPAA2_QDMA_MAX_DESC		1024
+#include "portal/dpaa2_hw_pvt.h"
+#include "portal/dpaa2_hw_dpio.h"
+
+#define DPAA2_QDMA_MAX_DESC		4096
 #define DPAA2_QDMA_MIN_DESC		1
 #define DPAA2_QDMA_MAX_VHANS		64
 
@@ -13,48 +16,9 @@
 #define DPAA2_QDMA_VQ_FD_SG_FORMAT		(1ULL << 1)
 #define DPAA2_QDMA_VQ_NO_RESPONSE		(1ULL << 2)
 
-#define DPAA2_QDMA_MAX_FLE 3
-#define DPAA2_QDMA_MAX_SDD 2
-
-#define DPAA2_QDMA_MAX_SG_NB 64
-
-#define DPAA2_DPDMAI_MAX_QUEUES	1
-
-/** FLE single job pool size: job pointer(uint64_t) +
- * 3 Frame list + 2 source/destination descriptor.
- */
-#define QDMA_FLE_SINGLE_POOL_SIZE (sizeof(uint64_t) + \
-			sizeof(struct qbman_fle) * DPAA2_QDMA_MAX_FLE + \
-			sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD)
-
-/** FLE sg jobs pool size: job number(uint64_t) +
- * 3 Frame list + 2 source/destination descriptor  +
- * 64 (src + dst) sg entries + 64 jobs pointers.
- */
-#define QDMA_FLE_SG_POOL_SIZE (sizeof(uint64_t) + \
-		sizeof(struct qbman_fle) * DPAA2_QDMA_MAX_FLE + \
-		sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD + \
-		sizeof(struct qdma_sg_entry) * (DPAA2_QDMA_MAX_SG_NB * 2) + \
-		sizeof(struct rte_qdma_job *) * DPAA2_QDMA_MAX_SG_NB)
-
-#define QDMA_FLE_JOB_NB_OFFSET 0
+#define DPAA2_DPDMAI_MAX_QUEUES	16
 
-#define QDMA_FLE_SINGLE_JOB_OFFSET 0
-
-#define QDMA_FLE_FLE_OFFSET \
-		(QDMA_FLE_JOB_NB_OFFSET + sizeof(uint64_t))
-
-#define QDMA_FLE_SDD_OFFSET \
-		(QDMA_FLE_FLE_OFFSET + \
-		sizeof(struct qbman_fle) * DPAA2_QDMA_MAX_FLE)
-
-#define QDMA_FLE_SG_ENTRY_OFFSET \
-		(QDMA_FLE_SDD_OFFSET + \
-		sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD)
-
-#define QDMA_FLE_SG_JOBS_OFFSET \
-		(QDMA_FLE_SG_ENTRY_OFFSET + \
-		sizeof(struct qdma_sg_entry) * DPAA2_QDMA_MAX_SG_NB * 2)
+#define QDMA_JOB_HW_CNTX_IDX (RTE_DPAA2_QDMA_JOB_USR_CNTX_IDX + 1)
 
 /** FLE pool cache size */
 #define QDMA_FLE_CACHE_SIZE(_num) (_num/(RTE_MAX_LCORE * 2))
@@ -202,12 +166,49 @@ struct dpaa2_qdma_rbp {
 	uint32_t rsv:2;
 };
 
+enum {
+	DPAA2_QDMA_SDD_FLE,
+	DPAA2_QDMA_SRC_FLE,
+	DPAA2_QDMA_DST_FLE,
+	DPAA2_QDMA_MAX_FLE
+};
+
+enum {
+	DPAA2_QDMA_SRC_SDD,
+	DPAA2_QDMA_DST_SDD,
+	DPAA2_QDMA_MAX_SDD
+};
+
+struct qdma_cntx_fle_sdd {
+	struct qbman_fle fle[DPAA2_QDMA_MAX_FLE];
+	struct qdma_sdd sdd[DPAA2_QDMA_MAX_SDD];
+} __rte_packed;
+
+struct qdma_cntx_sg {
+	struct qdma_cntx_fle_sdd fle_sdd;
+	struct qdma_sg_entry sg_src_entry[RTE_DPAA2_QDMA_JOB_SUBMIT_MAX];
+	struct qdma_sg_entry sg_dst_entry[RTE_DPAA2_QDMA_JOB_SUBMIT_MAX];
+	uint16_t cntx_idx[RTE_DPAA2_QDMA_JOB_SUBMIT_MAX];
+	uint16_t job_nb;
+	uint16_t rsv[3];
+} __rte_packed;
+
+struct qdma_cntx_long {
+	struct qdma_cntx_fle_sdd fle_sdd;
+	uint16_t cntx_idx;
+	uint16_t rsv[3];
+} __rte_packed;
+
+#define DPAA2_QDMA_IDXADDR_FROM_SG_FLAG(flag) \
+	((void *)(uintptr_t)((flag) - ((flag) & RTE_DPAA2_QDMA_SG_IDX_ADDR_MASK)))
+
+#define DPAA2_QDMA_IDX_FROM_FLAG(flag) \
+	((flag) >> RTE_DPAA2_QDMA_COPY_IDX_OFFSET)
+
 /** Represents a DPDMAI device */
 struct dpaa2_dpdmai_dev {
 	/** Pointer to Next device instance */
 	TAILQ_ENTRY(dpaa2_qdma_device) next;
-	/** handle to DPDMAI object */
-	struct fsl_mc_io dpdmai;
 	/** HW ID for DPDMAI object */
 	uint32_t dpdmai_id;
 	/** Tocken of this device */
@@ -221,42 +222,30 @@ struct dpaa2_dpdmai_dev {
 	struct qdma_device *qdma_dev;
 };
 
-struct qdma_virt_queue;
-
-typedef uint16_t (qdma_get_job_t)(struct qdma_virt_queue *qdma_vq,
-					const struct qbman_fd *fd,
-					struct rte_dpaa2_qdma_job **job,
-					uint16_t *nb_jobs);
-typedef int (qdma_set_fd_t)(struct qdma_virt_queue *qdma_vq,
-					struct qbman_fd *fd,
-					struct rte_dpaa2_qdma_job **job,
-					uint16_t nb_jobs);
-
-typedef int (qdma_dequeue_multijob_t)(
-				struct qdma_virt_queue *qdma_vq,
-				uint16_t *vq_id,
-				struct rte_dpaa2_qdma_job **job,
-				uint16_t nb_jobs);
+#define QDMA_CNTX_IDX_RING_EXTRA_SPACE 64
+#define QDMA_CNTX_IDX_RING_MAX_FREE \
+	(DPAA2_QDMA_MAX_DESC - QDMA_CNTX_IDX_RING_EXTRA_SPACE)
+struct qdma_cntx_idx_ring {
+	uint16_t cntx_idx_ring[DPAA2_QDMA_MAX_DESC];
+	uint16_t start;
+	uint16_t tail;
+	uint16_t free_space;
+	uint16_t nb_in_ring;
+};
 
-typedef int (qdma_enqueue_multijob_t)(
-			struct qdma_virt_queue *qdma_vq,
-			struct rte_dpaa2_qdma_job **job,
-			uint16_t nb_jobs);
+#define DPAA2_QDMA_DESC_DEBUG_FLAG (1 << 0)
 
 /** Represents a QDMA virtual queue */
 struct qdma_virt_queue {
-	/** Status ring of the virtual queue */
-	struct rte_ring *status_ring;
 	/** Associated hw queue */
 	struct dpaa2_dpdmai_dev *dpdmai_dev;
 	/** FLE pool for the queue */
 	struct rte_mempool *fle_pool;
+	void **fle_elem;
 	/** Route by port */
 	struct dpaa2_qdma_rbp rbp;
 	/** States if this vq is in use or not */
-	uint8_t in_use;
-	/** States if this vq has exclusively associated hw queue */
-	uint8_t exclusive_hw_queue;
+	uint8_t fle_pre_populate;
 	/** Number of descriptor for the virtual DMA channel */
 	uint16_t nb_desc;
 	/* Total number of enqueues on this VQ */
@@ -266,18 +255,18 @@ struct qdma_virt_queue {
 
 	uint16_t vq_id;
 	uint32_t flags;
+	struct qbman_fd fd[DPAA2_QDMA_MAX_DESC];
+	uint16_t fd_idx;
+	struct qdma_cntx_idx_ring *ring_cntx_idx;
+
+	/**Used for silent enabled*/
+	struct qdma_cntx_sg *cntx_sg[DPAA2_QDMA_MAX_DESC];
+	struct qdma_cntx_long *cntx_long[DPAA2_QDMA_MAX_DESC];
+	uint16_t silent_idx;
 
-	struct rte_dpaa2_qdma_job *job_list[DPAA2_QDMA_MAX_DESC];
-	struct rte_mempool *job_pool;
 	int num_valid_jobs;
 
 	struct rte_dma_stats stats;
-
-	qdma_set_fd_t *set_fd;
-	qdma_get_job_t *get_job;
-
-	qdma_dequeue_multijob_t *dequeue_job;
-	qdma_enqueue_multijob_t *enqueue_job;
 };
 
 /** Represents a QDMA device. */
@@ -286,8 +275,7 @@ struct qdma_device {
 	struct qdma_virt_queue *vqs;
 	/** Total number of VQ's */
 	uint16_t num_vqs;
-	/** Device state - started or stopped */
-	uint8_t state;
+	uint8_t is_silent;
 };
 
 #endif /* _DPAA2_QDMA_H_ */
diff --git a/drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h b/drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h
index b0bf9d8bcc..e49604c8fc 100644
--- a/drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h
+++ b/drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2021-2022 NXP
+ * Copyright 2021-2023 NXP
  */
 
 #ifndef _RTE_PMD_DPAA2_QDMA_H_
@@ -7,118 +7,30 @@
 
 #include <rte_compat.h>
 
-/** States if the source addresses is physical. */
-#define RTE_DPAA2_QDMA_JOB_SRC_PHY		(1ULL << 30)
+#define RTE_DPAA2_QDMA_IDX_SHIFT_POS 20
+#define RTE_DPAA2_QDMA_LEN_MASK \
+	(~((~0u) << RTE_DPAA2_QDMA_IDX_SHIFT_POS))
 
-/** States if the destination addresses is physical. */
-#define RTE_DPAA2_QDMA_JOB_DEST_PHY		(1ULL << 31)
+#define RTE_DPAA2_QDMA_IDX_LEN(idx, len) \
+	((uint32_t)((idx << RTE_DPAA2_QDMA_IDX_SHIFT_POS) | (len & RTE_DPAA2_QDMA_LEN_MASK)))
 
-/** Determines a QDMA job */
-struct rte_dpaa2_qdma_job {
-	/** Source Address from where DMA is (to be) performed */
-	uint64_t src;
-	/** Destination Address where DMA is (to be) done */
-	uint64_t dest;
-	/** Length of the DMA operation in bytes. */
-	uint32_t len;
-	/** See RTE_QDMA_JOB_ flags */
-	uint32_t flags;
-	/**
-	 * Status of the transaction.
-	 * This is filled in the dequeue operation by the driver.
-	 * upper 8bits acc_err for route by port.
-	 * lower 8bits fd error
-	 */
-	uint16_t status;
-	uint16_t vq_id;
-	uint64_t cnxt;
-	/**
-	 * FLE pool element maintained by user, in case no qDMA response.
-	 * Note: the address must be allocated from DPDK memory pool.
-	 */
-	void *usr_elem;
-};
+#define RTE_DPAA2_QDMA_IDX_FROM_LENGTH(length) \
+	((uint16_t)((length) >> RTE_DPAA2_QDMA_IDX_SHIFT_POS))
 
-/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice.
- *
- * Enable FD in Ultra Short format on a channel. This API should be
- * called before calling 'rte_dma_vchan_setup()' API.
- *
- * @param dev_id
- *   The identifier of the device.
- * @param vchan
- *   The identifier of virtual DMA channel.
- */
-__rte_experimental
-void rte_dpaa2_qdma_vchan_fd_us_enable(int16_t dev_id, uint16_t vchan);
+#define RTE_DPAA2_QDMA_LEN_FROM_LENGTH(length) \
+	((length) & RTE_DPAA2_QDMA_LEN_MASK)
 
-/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice.
- *
- * Enable internal SG processing on a channel. This API should be
- * called before calling 'rte_dma_vchan_setup()' API.
- *
- * @param dev_id
- *   The identifier of the device.
- * @param vchan
- *   The identifier of virtual DMA channel.
- */
-__rte_experimental
-void rte_dpaa2_qdma_vchan_internal_sg_enable(int16_t dev_id, uint16_t vchan);
+#define RTE_DPAA2_QDMA_COPY_IDX_OFFSET 8
+#define RTE_DPAA2_QDMA_SG_IDX_ADDR_ALIGN \
+	RTE_BIT64(RTE_DPAA2_QDMA_COPY_IDX_OFFSET)
+#define RTE_DPAA2_QDMA_SG_IDX_ADDR_MASK \
+	(RTE_DPAA2_QDMA_SG_IDX_ADDR_ALIGN - 1)
+#define RTE_DPAA2_QDMA_SG_SUBMIT(idx_addr, flag) \
+	(((uint64_t)idx_addr) | (flag))
 
-/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice.
- *
- * Enqueue a copy operation onto the virtual DMA channel for silent mode,
- * when dequeue is not required.
- *
- * This queues up a copy operation to be performed by hardware, if the 'flags'
- * parameter contains RTE_DMA_OP_FLAG_SUBMIT then trigger doorbell to begin
- * this operation, otherwise do not trigger doorbell.
- *
- * @param dev_id
- *   The identifier of the device.
- * @param vchan
- *   The identifier of virtual DMA channel.
- * @param jobs
- *   Jobs to be submitted to QDMA.
- * @param nb_cpls
- *   Number of DMA jobs.
- *
- * @return
- *   - >= 0..Number of enqueued job.
- *   - -ENOSPC: if no space left to enqueue.
- *   - other values < 0 on failure.
- */
-__rte_experimental
-int rte_dpaa2_qdma_copy_multi(int16_t dev_id, uint16_t vchan,
-		struct rte_dpaa2_qdma_job **jobs, uint16_t nb_cpls);
-
-/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice.
- *
- * Return the number of operations that have been successfully completed.
- *
- * @param dev_id
- *   The identifier of the device.
- * @param vchan
- *   The identifier of virtual DMA channel.
- * @param jobs
- *   Jobs completed by QDMA.
- * @param nb_cpls
- *   Number of completed DMA jobs.
- *
- * @return
- *   The number of operations that successfully completed. This return value
- *   must be less than or equal to the value of nb_cpls.
- */
-__rte_experimental
-uint16_t rte_dpaa2_qdma_completed_multi(int16_t dev_id, uint16_t vchan,
-		struct rte_dpaa2_qdma_job **jobs, uint16_t nb_cpls);
+#define RTE_DPAA2_QDMA_COPY_SUBMIT(idx, flag) \
+	((idx << RTE_DPAA2_QDMA_COPY_IDX_OFFSET) | (flag))
 
+#define RTE_DPAA2_QDMA_JOB_SUBMIT_MAX (32 + 8)
+#define RTE_DMA_CAPA_DPAA2_QDMA_FLAGS_INDEX RTE_BIT64(63)
 #endif /* _RTE_PMD_DPAA2_QDMA_H_ */
diff --git a/drivers/dma/dpaa2/version.map b/drivers/dma/dpaa2/version.map
deleted file mode 100644
index 43e8b2d5c5..0000000000
--- a/drivers/dma/dpaa2/version.map
+++ /dev/null
@@ -1,13 +0,0 @@
-DPDK_25 {
-	local: *;
-};
-
-EXPERIMENTAL {
-	global:
-
-	# added in 22.07
-	rte_dpaa2_qdma_completed_multi;
-	rte_dpaa2_qdma_copy_multi;
-	rte_dpaa2_qdma_vchan_fd_us_enable;
-	rte_dpaa2_qdma_vchan_internal_sg_enable;
-};
-- 
2.25.1


  parent reply	other threads:[~2024-10-15  7:14 UTC|newest]

Thread overview: 165+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-19 10:00 [PATCH 01/30] dma/dpaa2: configure route by port by PCIe port param Gagandeep Singh
2024-07-19 10:00 ` [PATCH 02/30] dma/dpaa2: support multiple HW queues Gagandeep Singh
2024-07-19 10:00 ` [PATCH 03/30] dma/dpaa2: adapt DMA driver API Gagandeep Singh
2024-07-19 10:01 ` [PATCH 04/30] dma/dpaa2: multiple process support Gagandeep Singh
2024-07-19 10:01 ` [PATCH 05/30] dma/dpaa2: add sanity check for SG entry Gagandeep Singh
2024-07-19 10:01 ` [PATCH 06/30] dma/dpaa2: include DPAA2 specific header files Gagandeep Singh
2024-07-19 10:01 ` [PATCH 07/30] dma/dpaa2: borrow flags of DMA operation to pass job context Gagandeep Singh
2024-07-19 10:01 ` [PATCH 08/30] bus/fslmc: enhance the qbman dq storage logic Gagandeep Singh
2024-07-19 10:01 ` [PATCH 09/30] dma/dpaa2: add short FD support Gagandeep Singh
2024-07-19 10:01 ` [PATCH 10/30] dma/dpaa2: limit the max descriptor number Gagandeep Singh
2024-07-19 10:01 ` [PATCH 11/30] dma/dpaa2: change the DMA copy return value Gagandeep Singh
2024-07-19 10:01 ` [PATCH 12/30] dma/dpaa2: move the qdma header to common place Gagandeep Singh
2024-07-19 10:01 ` [PATCH 13/30] dma/dpaa: support multi channels Gagandeep Singh
2024-07-19 10:01 ` [PATCH 14/30] dma/dpaa: fix job enqueue Gagandeep Singh
2024-07-19 10:01 ` [PATCH 15/30] dma/dpaa: add burst capacity API Gagandeep Singh
2024-07-19 10:01 ` [PATCH 16/30] dma/dpaa: add workaround for ERR050757 Gagandeep Singh
2024-07-19 10:01 ` [PATCH 17/30] dma/dpaa: qdma stall workaround for ERR050265 Gagandeep Singh
2024-07-19 10:01 ` [PATCH 18/30] dma/dpaa: remove unwanted desc Gagandeep Singh
2024-07-19 10:01 ` [PATCH 19/30] dma/dpaa: data path optimization Gagandeep Singh
2024-07-19 10:01 ` [PATCH 20/30] dma/dpaa: refactor driver Gagandeep Singh
2024-07-19 10:01 ` [PATCH 21/30] dma/dpaa: dequeue status queue Gagandeep Singh
2024-07-19 10:01 ` [PATCH 22/30] dma/dpaa: add Scatter Gather support Gagandeep Singh
2024-07-19 10:01 ` [PATCH 23/30] dma/dpaa: block dequeue Gagandeep Singh
2024-07-19 10:01 ` [PATCH 24/30] dma/dpaa: improve congestion handling Gagandeep Singh
2024-07-19 10:01 ` [PATCH 25/30] dma/dpaa: disable SG descriptor as default Gagandeep Singh
2024-07-19 10:01 ` [PATCH 26/30] dma/dpaa: improve ERRATA workaround solution Gagandeep Singh
2024-07-19 10:01 ` [PATCH 27/30] dma/dpaa: improve silent mode support Gagandeep Singh
2024-07-19 10:01 ` [PATCH 28/30] dma/dpaa: support multiple SG copies Gagandeep Singh
2024-07-19 10:01 ` [PATCH 29/30] dma/dpaa: support max SG entry size Gagandeep Singh
2024-07-19 10:01 ` [PATCH 30/30] bus/dpaa: add port bmi stats Gagandeep Singh
2024-07-22 11:58 ` [v2 00/30] NXP DMA driver fixes and Enhancements Gagandeep Singh
2024-07-22 11:58   ` [v2 01/30] dma/dpaa2: configure route by port by PCIe port param Gagandeep Singh
2024-07-22 11:58   ` [v2 02/30] dma/dpaa2: support multiple HW queues Gagandeep Singh
2024-07-22 11:58   ` [v2 03/30] dma/dpaa2: adapt DMA driver API Gagandeep Singh
2024-07-22 11:58   ` [v2 04/30] dma/dpaa2: multiple process support Gagandeep Singh
2024-07-22 11:58   ` [v2 05/30] dma/dpaa2: add sanity check for SG entry Gagandeep Singh
2024-07-22 11:58   ` [v2 06/30] dma/dpaa2: include DPAA2 specific header files Gagandeep Singh
2024-07-22 11:58   ` [v2 07/30] dma/dpaa2: borrow flags of DMA operation to pass job context Gagandeep Singh
2024-07-22 11:58   ` [v2 08/30] bus/fslmc: enhance the qbman dq storage logic Gagandeep Singh
2024-07-22 11:58   ` [v2 09/30] dma/dpaa2: add short FD support Gagandeep Singh
2024-07-22 11:58   ` [v2 10/30] dma/dpaa2: limit the max descriptor number Gagandeep Singh
2024-07-22 11:58   ` [v2 11/30] dma/dpaa2: change the DMA copy return value Gagandeep Singh
2024-07-22 11:58   ` [v2 12/30] dma/dpaa2: move the qdma header to common place Gagandeep Singh
2024-07-22 11:58   ` [v2 13/30] dma/dpaa: support multi channels Gagandeep Singh
2024-07-22 11:58   ` [v2 14/30] dma/dpaa: fix job enqueue Gagandeep Singh
2024-07-22 11:58   ` [v2 15/30] dma/dpaa: add burst capacity API Gagandeep Singh
2024-07-22 11:58   ` [v2 16/30] dma/dpaa: add workaround for ERR050757 Gagandeep Singh
2024-07-22 11:58   ` [v2 17/30] dma/dpaa: qdma stall workaround for ERR050265 Gagandeep Singh
2024-07-22 11:58   ` [v2 18/30] dma/dpaa: remove unwanted desc Gagandeep Singh
2024-07-22 11:58   ` [v2 19/30] dma/dpaa: data path optimization Gagandeep Singh
2024-07-22 11:58   ` [v2 20/30] dma/dpaa: refactor driver Gagandeep Singh
2024-07-22 11:58   ` [v2 21/30] dma/dpaa: dequeue status queue Gagandeep Singh
2024-07-22 11:58   ` [v2 22/30] dma/dpaa: add Scatter Gather support Gagandeep Singh
2024-07-22 11:58   ` [v2 23/30] dma/dpaa: block dequeue Gagandeep Singh
2024-07-22 11:58   ` [v2 24/30] dma/dpaa: improve congestion handling Gagandeep Singh
2024-07-22 11:58   ` [v2 25/30] dma/dpaa: disable SG descriptor as default Gagandeep Singh
2024-07-22 11:58   ` [v2 26/30] dma/dpaa: improve ERRATA workaround solution Gagandeep Singh
2024-07-22 11:58   ` [v2 27/30] dma/dpaa: improve silent mode support Gagandeep Singh
2024-07-22 11:58   ` [v2 28/30] dma/dpaa: support multiple SG copies Gagandeep Singh
2024-07-22 11:58   ` [v2 29/30] dma/dpaa: support max SG entry size Gagandeep Singh
2024-07-22 11:58   ` [v2 30/30] bus/dpaa: add port bmi stats Gagandeep Singh
2024-07-22 16:39   ` [v3 00/30] NXP DMA driver fixes and Enhancements Gagandeep Singh
2024-07-22 16:39     ` [v3 01/30] dma/dpaa2: configure route by port by PCIe port param Gagandeep Singh
2024-10-08  7:22       ` [v4 00/15] NXP DMA driver fixes and Enhancements Gagandeep Singh
2024-10-08  7:22         ` [v4 01/15] dma/dpaa2: configure route by port by PCIe port param Gagandeep Singh
2024-10-08 10:57           ` [v5 00/15] NXP DMA driver fixes and Enhancements Gagandeep Singh
2024-10-08 10:57             ` [v5 01/15] dma/dpaa2: configure route by port by PCIe port param Gagandeep Singh
2024-10-14  9:36               ` [v6 00/15] NXP DMA driver fixes and Enhancements Gagandeep Singh
2024-10-14  9:36                 ` [v6 01/15] dma/dpaa2: configure route by port by PCIe port param Gagandeep Singh
2024-10-14  9:36                 ` [v6 02/15] dma/dpaa2: refactor driver code Gagandeep Singh
2024-10-14  9:36                 ` [v6 03/15] bus/fslmc: enhance the qbman dq storage logic Gagandeep Singh
2024-10-14  9:36                 ` [v6 04/15] dma/dpaa2: add short FD support Gagandeep Singh
2024-10-14  9:36                 ` [v6 05/15] dma/dpaa2: limit the max descriptor number Gagandeep Singh
2024-10-14  9:36                 ` [v6 06/15] dma/dpaa2: change the DMA copy return value Gagandeep Singh
2024-10-14  9:36                 ` [v6 07/15] dma/dpaa2: move the qdma header to common place Gagandeep Singh
2024-10-14  9:36                 ` [v6 08/15] dma/dpaa: refactor driver Gagandeep Singh
2024-10-15  2:59                   ` Stephen Hemminger
2024-10-14  9:36                 ` [v6 09/15] dma/dpaa: support burst capacity API Gagandeep Singh
2024-10-14  9:36                 ` [v6 10/15] dma/dpaa: add silent mode support Gagandeep Singh
2024-10-14  9:36                 ` [v6 11/15] dma/dpaa: add workaround for ERR050757 Gagandeep Singh
2024-10-14  9:36                 ` [v6 12/15] dma/dpaa: qdma stall workaround for ERR050265 Gagandeep Singh
2024-10-14  9:36                 ` [v6 13/15] dma/dpaa: add Scatter Gather support Gagandeep Singh
2024-10-14  9:36                 ` [v6 14/15] dma/dpaa: add DMA error checks Gagandeep Singh
2024-10-14  9:36                 ` [v6 15/15] bus/dpaa: add port bmi stats Gagandeep Singh
2024-10-15  7:13                 ` [v7 00/15] NXP DMA driver fixes and Enhancements Gagandeep Singh
2024-10-15  7:13                   ` [v7 01/15] dma/dpaa2: configure route by port by PCIe port param Gagandeep Singh
2024-10-15  7:13                   ` Gagandeep Singh [this message]
2024-10-15 22:11                     ` [v7 02/15] dma/dpaa2: refactor driver code Stephen Hemminger
2024-10-16  5:09                       ` Hemant Agrawal
2024-10-16  5:13                         ` Stephen Hemminger
2024-10-16  5:15                           ` Hemant Agrawal
2024-10-15  7:13                   ` [v7 03/15] bus/fslmc: enhance the qbman dq storage logic Gagandeep Singh
2024-10-15  7:13                   ` [v7 04/15] dma/dpaa2: add short FD support Gagandeep Singh
2024-10-15  7:13                   ` [v7 05/15] dma/dpaa2: limit the max descriptor number Gagandeep Singh
2024-10-15  7:13                   ` [v7 06/15] dma/dpaa2: change the DMA copy return value Gagandeep Singh
2024-10-15  7:13                   ` [v7 07/15] dma/dpaa2: move the qdma header to common place Gagandeep Singh
2024-10-15  7:13                   ` [v7 08/15] dma/dpaa: refactor driver Gagandeep Singh
2024-10-15  7:13                   ` [v7 09/15] dma/dpaa: support burst capacity API Gagandeep Singh
2024-10-15  7:13                   ` [v7 10/15] dma/dpaa: add silent mode support Gagandeep Singh
2024-10-15  7:13                   ` [v7 11/15] dma/dpaa: add workaround for ERR050757 Gagandeep Singh
2024-10-15  7:14                   ` [v7 12/15] dma/dpaa: qdma stall workaround for ERR050265 Gagandeep Singh
2024-10-15  7:14                   ` [v7 13/15] dma/dpaa: add Scatter Gather support Gagandeep Singh
2024-10-15  7:14                   ` [v7 14/15] dma/dpaa: add DMA error checks Gagandeep Singh
2024-10-15  7:14                   ` [v7 15/15] bus/dpaa: add port bmi stats Gagandeep Singh
2024-10-08 10:57             ` [v5 02/15] dma/dpaa2: refactor driver code Gagandeep Singh
2024-10-08 10:57             ` [v5 03/15] bus/fslmc: enhance the qbman dq storage logic Gagandeep Singh
2024-10-08 10:57             ` [v5 04/15] dma/dpaa2: add short FD support Gagandeep Singh
2024-10-08 10:58             ` [v5 05/15] dma/dpaa2: limit the max descriptor number Gagandeep Singh
2024-10-08 10:58             ` [v5 06/15] dma/dpaa2: change the DMA copy return value Gagandeep Singh
2024-10-08 10:58             ` [v5 07/15] dma/dpaa2: move the qdma header to common place Gagandeep Singh
2024-10-08 10:58             ` [v5 08/15] dma/dpaa: refactor driver Gagandeep Singh
2024-10-08 10:58             ` [v5 09/15] dma/dpaa: support burst capacity API Gagandeep Singh
2024-10-08 10:58             ` [v5 10/15] dma/dpaa: add silent mode support Gagandeep Singh
2024-10-08 10:58             ` [v5 11/15] dma/dpaa: add workaround for ERR050757 Gagandeep Singh
2024-10-08 10:58             ` [v5 12/15] dma/dpaa: qdma stall workaround for ERR050265 Gagandeep Singh
2024-10-08 10:58             ` [v5 13/15] dma/dpaa: add Scatter Gather support Gagandeep Singh
2024-10-08 10:58             ` [v5 14/15] dma/dpaa: add DMA error checks Gagandeep Singh
2024-10-08 10:58             ` [v5 15/15] bus/dpaa: add port bmi stats Gagandeep Singh
2024-10-09 18:02               ` Stephen Hemminger
2024-10-08  7:22         ` [v4 02/15] dma/dpaa2: refactor driver code Gagandeep Singh
2024-10-08  7:22         ` [v4 03/15] bus/fslmc: enhance the qbman dq storage logic Gagandeep Singh
2024-10-08  7:22         ` [v4 04/15] dma/dpaa2: add short FD support Gagandeep Singh
2024-10-08  7:22         ` [v4 05/15] dma/dpaa2: limit the max descriptor number Gagandeep Singh
2024-10-08  7:22         ` [v4 06/15] dma/dpaa2: change the DMA copy return value Gagandeep Singh
2024-10-08  7:22         ` [v4 07/15] dma/dpaa2: move the qdma header to common place Gagandeep Singh
2024-10-08  7:22         ` [v4 08/15] dma/dpaa: refactor driver Gagandeep Singh
2024-10-08  7:23         ` [v4 09/15] dma/dpaa: support burst capacity API Gagandeep Singh
2024-10-08  7:23         ` [v4 10/15] dma/dpaa: add silent mode support Gagandeep Singh
2024-10-08  7:23         ` [v4 11/15] dma/dpaa: add workaround for ERR050757 Gagandeep Singh
2024-10-08  7:23         ` [v4 12/15] dma/dpaa: qdma stall workaround for ERR050265 Gagandeep Singh
2024-10-08  7:23         ` [v4 13/15] dma/dpaa: add Scatter Gather support Gagandeep Singh
2024-10-08  7:23         ` [v4 14/15] dma/dpaa: add DMA error checks Gagandeep Singh
2024-10-08  7:23         ` [v4 15/15] bus/dpaa: add port bmi stats Gagandeep Singh
2024-07-22 16:39     ` [v3 02/30] dma/dpaa2: support multiple HW queues Gagandeep Singh
2024-07-22 20:19       ` Stephen Hemminger
2024-10-07 20:51       ` Stephen Hemminger
2024-07-22 16:39     ` [v3 03/30] dma/dpaa2: adapt DMA driver API Gagandeep Singh
2024-07-22 16:39     ` [v3 04/30] dma/dpaa2: multiple process support Gagandeep Singh
2024-07-22 16:39     ` [v3 05/30] dma/dpaa2: add sanity check for SG entry Gagandeep Singh
2024-07-22 20:21       ` Stephen Hemminger
2024-07-22 16:39     ` [v3 06/30] dma/dpaa2: include DPAA2 specific header files Gagandeep Singh
2024-07-22 16:39     ` [v3 07/30] dma/dpaa2: borrow flags of DMA operation to pass job context Gagandeep Singh
2024-07-22 16:39     ` [v3 08/30] bus/fslmc: enhance the qbman dq storage logic Gagandeep Singh
2024-07-22 16:39     ` [v3 09/30] dma/dpaa2: add short FD support Gagandeep Singh
2024-07-22 16:39     ` [v3 10/30] dma/dpaa2: limit the max descriptor number Gagandeep Singh
2024-07-22 16:39     ` [v3 11/30] dma/dpaa2: change the DMA copy return value Gagandeep Singh
2024-07-22 16:39     ` [v3 12/30] dma/dpaa2: move the qdma header to common place Gagandeep Singh
2024-07-22 16:39     ` [v3 13/30] dma/dpaa: support multi channels Gagandeep Singh
2024-07-22 16:39     ` [v3 14/30] dma/dpaa: fix job enqueue Gagandeep Singh
2024-07-22 16:39     ` [v3 15/30] dma/dpaa: add burst capacity API Gagandeep Singh
2024-07-22 16:39     ` [v3 16/30] dma/dpaa: add workaround for ERR050757 Gagandeep Singh
2024-07-22 16:39     ` [v3 17/30] dma/dpaa: qdma stall workaround for ERR050265 Gagandeep Singh
2024-07-22 16:39     ` [v3 18/30] dma/dpaa: remove unwanted desc Gagandeep Singh
2024-07-22 16:39     ` [v3 19/30] dma/dpaa: data path optimization Gagandeep Singh
2024-07-22 16:39     ` [v3 20/30] dma/dpaa: refactor driver Gagandeep Singh
2024-07-22 16:39     ` [v3 21/30] dma/dpaa: dequeue status queue Gagandeep Singh
2024-07-22 16:39     ` [v3 22/30] dma/dpaa: add Scatter Gather support Gagandeep Singh
2024-07-22 16:39     ` [v3 23/30] dma/dpaa: block dequeue Gagandeep Singh
2024-07-22 16:39     ` [v3 24/30] dma/dpaa: improve congestion handling Gagandeep Singh
2024-07-22 16:39     ` [v3 25/30] dma/dpaa: disable SG descriptor as default Gagandeep Singh
2024-07-22 16:39     ` [v3 26/30] dma/dpaa: improve ERRATA workaround solution Gagandeep Singh
2024-07-22 16:39     ` [v3 27/30] dma/dpaa: improve silent mode support Gagandeep Singh
2024-07-22 16:39     ` [v3 28/30] dma/dpaa: support multiple SG copies Gagandeep Singh
2024-07-22 16:39     ` [v3 29/30] dma/dpaa: support max SG entry size Gagandeep Singh
2024-07-22 16:39     ` [v3 30/30] bus/dpaa: add port bmi stats Gagandeep Singh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241015071403.1940833-3-g.singh@nxp.com \
    --to=g.singh@nxp.com \
    --cc=anatoly.burakov@intel.com \
    --cc=dev@dpdk.org \
    --cc=hemant.agrawal@nxp.com \
    --cc=jun.yang@nxp.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).