DPDK patches and discussions
 help / color / mirror / Atom feed
From: Gagandeep Singh <g.singh@nxp.com>
To: dev@dpdk.org, Hemant Agrawal <hemant.agrawal@nxp.com>
Cc: Jun Yang <jun.yang@nxp.com>
Subject: [PATCH 03/30] dma/dpaa2: adapt DMA driver API
Date: Fri, 19 Jul 2024 15:30:59 +0530	[thread overview]
Message-ID: <20240719100126.1150373-3-g.singh@nxp.com> (raw)
In-Reply-To: <20240719100126.1150373-1-g.singh@nxp.com>

From: Jun Yang <jun.yang@nxp.com>

1) Support DMA single copy and SG copy.
2) Silent mode support.

Add index combined with length field.
For Silent mode, this index is used to notify DMA driver
which inner descriptor should be used.
For none silent mode, this index is used to notify user
which descriptor is completed.
In addition, because dpaa2 qdma is not able to preserve order,
"rte_dma_completed_t" returns multiple indexes instead of last index.

Signed-off-by: Jun Yang <jun.yang@nxp.com>
---
 drivers/dma/dpaa2/dpaa2_qdma.c         | 1667 +++++++++++-------------
 drivers/dma/dpaa2/dpaa2_qdma.h         |  126 +-
 drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h |  119 +-
 drivers/dma/dpaa2/version.map          |   13 -
 4 files changed, 799 insertions(+), 1126 deletions(-)
 delete mode 100644 drivers/dma/dpaa2/version.map

diff --git a/drivers/dma/dpaa2/dpaa2_qdma.c b/drivers/dma/dpaa2/dpaa2_qdma.c
index 945ba71e4a..b467845fa0 100644
--- a/drivers/dma/dpaa2/dpaa2_qdma.c
+++ b/drivers/dma/dpaa2/dpaa2_qdma.c
@@ -16,218 +16,345 @@
 
 #define DPAA2_QDMA_PREFETCH "prefetch"
 
-uint32_t dpaa2_coherent_no_alloc_cache;
-uint32_t dpaa2_coherent_alloc_cache;
+static uint32_t dpaa2_coherent_no_alloc_cache;
+static uint32_t dpaa2_coherent_alloc_cache;
 
 static inline int
-qdma_populate_fd_pci(phys_addr_t src, phys_addr_t dest,
-		     uint32_t len, struct qbman_fd *fd,
-		     struct dpaa2_qdma_rbp *rbp, int ser)
+qdma_cntx_idx_ring_eq(struct qdma_cntx_idx_ring *ring,
+	const uint16_t *elem, uint16_t nb,
+	uint16_t *free_space)
 {
-	fd->simple_pci.saddr_lo = lower_32_bits((uint64_t) (src));
-	fd->simple_pci.saddr_hi = upper_32_bits((uint64_t) (src));
+	if (unlikely(nb > ring->free_space))
+		return 0;
 
-	fd->simple_pci.len_sl = len;
+	if ((ring->tail + nb) < DPAA2_QDMA_MAX_DESC) {
+		rte_memcpy(&ring->cntx_idx_ring[ring->tail],
+			elem, nb * sizeof(uint16_t));
+		ring->tail += nb;
+	} else {
+		rte_memcpy(&ring->cntx_idx_ring[ring->tail],
+			elem,
+			(DPAA2_QDMA_MAX_DESC - ring->tail) *
+			sizeof(uint16_t));
+		rte_memcpy(&ring->cntx_idx_ring[0],
+			&elem[DPAA2_QDMA_MAX_DESC - ring->tail],
+			(nb - DPAA2_QDMA_MAX_DESC + ring->tail) *
+			sizeof(uint16_t));
+		ring->tail = (ring->tail + nb) & (DPAA2_QDMA_MAX_DESC - 1);
+	}
+	ring->free_space -= nb;
+	ring->nb_in_ring += nb;
 
-	fd->simple_pci.bmt = 1;
-	fd->simple_pci.fmt = 3;
-	fd->simple_pci.sl = 1;
-	fd->simple_pci.ser = ser;
+	if (free_space)
+		*free_space = ring->free_space;
 
-	fd->simple_pci.sportid = rbp->sportid;	/*pcie 3 */
-	fd->simple_pci.srbp = rbp->srbp;
-	if (rbp->srbp)
-		fd->simple_pci.rdttype = 0;
-	else
-		fd->simple_pci.rdttype = dpaa2_coherent_alloc_cache;
+	return nb;
+}
 
-	/*dest is pcie memory */
-	fd->simple_pci.dportid = rbp->dportid;	/*pcie 3 */
-	fd->simple_pci.drbp = rbp->drbp;
-	if (rbp->drbp)
-		fd->simple_pci.wrttype = 0;
-	else
-		fd->simple_pci.wrttype = dpaa2_coherent_no_alloc_cache;
+static inline int
+qdma_cntx_idx_ring_dq(struct qdma_cntx_idx_ring *ring,
+	uint16_t *elem, uint16_t max)
+{
+	int ret = ring->nb_in_ring > max ? max : ring->nb_in_ring;
 
-	fd->simple_pci.daddr_lo = lower_32_bits((uint64_t) (dest));
-	fd->simple_pci.daddr_hi = upper_32_bits((uint64_t) (dest));
+	if (!ret)
+		return 0;
 
-	return 0;
+	if ((ring->start + ret) < DPAA2_QDMA_MAX_DESC) {
+		rte_memcpy(elem,
+			&ring->cntx_idx_ring[ring->start],
+			ret * sizeof(uint16_t));
+		ring->start += ret;
+	} else {
+		rte_memcpy(elem,
+			&ring->cntx_idx_ring[ring->start],
+			(DPAA2_QDMA_MAX_DESC - ring->start) *
+			sizeof(uint16_t));
+		rte_memcpy(&elem[DPAA2_QDMA_MAX_DESC - ring->start],
+			&ring->cntx_idx_ring[0],
+			(ret - DPAA2_QDMA_MAX_DESC + ring->start) *
+			sizeof(uint16_t));
+		ring->start = (ring->start + ret) & (DPAA2_QDMA_MAX_DESC - 1);
+	}
+	ring->free_space += ret;
+	ring->nb_in_ring -= ret;
+
+	return ret;
 }
 
-static inline int
-qdma_populate_fd_ddr(phys_addr_t src, phys_addr_t dest,
-		     uint32_t len, struct qbman_fd *fd, int ser)
+static int
+dpaa2_qdma_multi_eq(struct qdma_virt_queue *qdma_vq)
 {
-	fd->simple_ddr.saddr_lo = lower_32_bits((uint64_t) (src));
-	fd->simple_ddr.saddr_hi = upper_32_bits((uint64_t) (src));
-
-	fd->simple_ddr.len = len;
-
-	fd->simple_ddr.bmt = 1;
-	fd->simple_ddr.fmt = 3;
-	fd->simple_ddr.sl = 1;
-	fd->simple_ddr.ser = ser;
-	/**
-	 * src If RBP=0 {NS,RDTTYPE[3:0]}: 0_1011
-	 * Coherent copy of cacheable memory,
-	* lookup in downstream cache, no allocate
-	 * on miss
-	 */
-	fd->simple_ddr.rns = 0;
-	fd->simple_ddr.rdttype = dpaa2_coherent_alloc_cache;
-	/**
-	 * dest If RBP=0 {NS,WRTTYPE[3:0]}: 0_0111
-	 * Coherent write of cacheable memory,
-	 * lookup in downstream cache, no allocate on miss
-	 */
-	fd->simple_ddr.wns = 0;
-	fd->simple_ddr.wrttype = dpaa2_coherent_no_alloc_cache;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_vq->dpdmai_dev;
+	uint16_t txq_id = dpdmai_dev->tx_queue[qdma_vq->vq_id].fqid;
+	struct qbman_eq_desc eqdesc;
+	struct qbman_swp *swp;
+	uint32_t num_to_send = 0;
+	uint16_t num_tx = 0;
+	uint32_t enqueue_loop, loop;
+	int ret;
+	struct qbman_fd *fd = qdma_vq->fd;
+	uint16_t nb_fds = qdma_vq->fd_idx, idx, dst_idx;
 
-	fd->simple_ddr.daddr_lo = lower_32_bits((uint64_t) (dest));
-	fd->simple_ddr.daddr_hi = upper_32_bits((uint64_t) (dest));
+	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
+		ret = dpaa2_affine_qbman_swp();
+		if (ret) {
+			DPAA2_QDMA_ERR("Failed to allocate IO portal, tid: %d",
+				rte_gettid());
+			return -EIO;
+		}
+	}
+	swp = DPAA2_PER_LCORE_PORTAL;
 
-	return 0;
+	/* Prepare enqueue descriptor */
+	qbman_eq_desc_clear(&eqdesc);
+	qbman_eq_desc_set_fq(&eqdesc, txq_id);
+	qbman_eq_desc_set_no_orp(&eqdesc, 0);
+	qbman_eq_desc_set_response(&eqdesc, 0, 0);
+
+	while (nb_fds > 0) {
+		num_to_send = (nb_fds > dpaa2_eqcr_size) ?
+			dpaa2_eqcr_size : nb_fds;
+
+		/* Enqueue the packet to the QBMAN */
+		enqueue_loop = 0;
+		loop = num_to_send;
+
+		while (enqueue_loop < loop) {
+			ret = qbman_swp_enqueue_multiple(swp,
+				&eqdesc,
+				&fd[num_tx + enqueue_loop],
+				NULL,
+				loop - enqueue_loop);
+			if (likely(ret >= 0))
+				enqueue_loop += ret;
+		}
+		num_tx += num_to_send;
+		nb_fds -= loop;
+	}
+
+	qdma_vq->num_enqueues += num_tx;
+	if (unlikely(num_tx != qdma_vq->fd_idx)) {
+		dst_idx = 0;
+		for (idx = num_tx; idx < qdma_vq->fd_idx; idx++) {
+			rte_memcpy(&qdma_vq->fd[dst_idx],
+				&qdma_vq->fd[idx],
+				sizeof(struct qbman_fd));
+			dst_idx++;
+		}
+	}
+	qdma_vq->fd_idx -= num_tx;
+
+	return num_tx;
 }
 
 static void
-dpaa2_qdma_populate_fle(struct qbman_fle *fle,
-			uint64_t fle_iova,
-			struct dpaa2_qdma_rbp *rbp,
-			uint64_t src, uint64_t dest,
-			size_t len, uint32_t flags, uint32_t fmt)
+fle_sdd_pre_populate(struct qdma_cntx_fle_sdd *fle_sdd,
+	struct dpaa2_qdma_rbp *rbp, uint64_t src, uint64_t dest,
+	uint32_t fmt)
 {
-	struct qdma_sdd *sdd;
-	uint64_t sdd_iova;
-
-	sdd = (struct qdma_sdd *)
-			((uintptr_t)(uint64_t)fle - QDMA_FLE_FLE_OFFSET +
-			QDMA_FLE_SDD_OFFSET);
-	sdd_iova = fle_iova - QDMA_FLE_FLE_OFFSET + QDMA_FLE_SDD_OFFSET;
+	struct qbman_fle *fle = fle_sdd->fle;
+	struct qdma_sdd *sdd = fle_sdd->sdd;
+	uint64_t sdd_iova = DPAA2_VADDR_TO_IOVA(sdd);
 
 	/* first frame list to source descriptor */
-	DPAA2_SET_FLE_ADDR(fle, sdd_iova);
-	DPAA2_SET_FLE_LEN(fle, (2 * (sizeof(struct qdma_sdd))));
+	DPAA2_SET_FLE_ADDR(&fle[DPAA2_QDMA_SDD_FLE], sdd_iova);
+	DPAA2_SET_FLE_LEN(&fle[DPAA2_QDMA_SDD_FLE],
+		DPAA2_QDMA_MAX_SDD * (sizeof(struct qdma_sdd)));
 
 	/* source and destination descriptor */
 	if (rbp && rbp->enable) {
 		/* source */
-		sdd->read_cmd.portid = rbp->sportid;
-		sdd->rbpcmd_simple.pfid = rbp->spfid;
-		sdd->rbpcmd_simple.vfid = rbp->svfid;
+		sdd[DPAA2_QDMA_SRC_SDD].read_cmd.portid =
+			rbp->sportid;
+		sdd[DPAA2_QDMA_SRC_SDD].rbpcmd_simple.pfid =
+			rbp->spfid;
+		sdd[DPAA2_QDMA_SRC_SDD].rbpcmd_simple.vfid =
+			rbp->svfid;
+		sdd[DPAA2_QDMA_SRC_SDD].rbpcmd_simple.vfa =
+			rbp->svfa;
 
 		if (rbp->srbp) {
-			sdd->read_cmd.rbp = rbp->srbp;
-			sdd->read_cmd.rdtype = DPAA2_RBP_MEM_RW;
+			sdd[DPAA2_QDMA_SRC_SDD].read_cmd.rbp =
+				rbp->srbp;
+			sdd[DPAA2_QDMA_SRC_SDD].read_cmd.rdtype =
+				DPAA2_RBP_MEM_RW;
 		} else {
-			sdd->read_cmd.rdtype = dpaa2_coherent_no_alloc_cache;
+			sdd[DPAA2_QDMA_SRC_SDD].read_cmd.rdtype =
+				dpaa2_coherent_no_alloc_cache;
 		}
-		sdd++;
 		/* destination */
-		sdd->write_cmd.portid = rbp->dportid;
-		sdd->rbpcmd_simple.pfid = rbp->dpfid;
-		sdd->rbpcmd_simple.vfid = rbp->dvfid;
+		sdd[DPAA2_QDMA_DST_SDD].write_cmd.portid =
+			rbp->dportid;
+		sdd[DPAA2_QDMA_DST_SDD].rbpcmd_simple.pfid =
+			rbp->dpfid;
+		sdd[DPAA2_QDMA_DST_SDD].rbpcmd_simple.vfid =
+			rbp->dvfid;
+		sdd[DPAA2_QDMA_DST_SDD].rbpcmd_simple.vfa =
+			rbp->dvfa;
 
 		if (rbp->drbp) {
-			sdd->write_cmd.rbp = rbp->drbp;
-			sdd->write_cmd.wrttype = DPAA2_RBP_MEM_RW;
+			sdd[DPAA2_QDMA_DST_SDD].write_cmd.rbp =
+				rbp->drbp;
+			sdd[DPAA2_QDMA_DST_SDD].write_cmd.wrttype =
+				DPAA2_RBP_MEM_RW;
 		} else {
-			sdd->write_cmd.wrttype = dpaa2_coherent_alloc_cache;
+			sdd[DPAA2_QDMA_DST_SDD].write_cmd.wrttype =
+				dpaa2_coherent_alloc_cache;
 		}
-
 	} else {
-		sdd->read_cmd.rdtype = dpaa2_coherent_no_alloc_cache;
-		sdd++;
-		sdd->write_cmd.wrttype = dpaa2_coherent_alloc_cache;
+		sdd[DPAA2_QDMA_SRC_SDD].read_cmd.rdtype =
+			dpaa2_coherent_no_alloc_cache;
+		sdd[DPAA2_QDMA_DST_SDD].write_cmd.wrttype =
+			dpaa2_coherent_alloc_cache;
 	}
-	fle++;
 	/* source frame list to source buffer */
-	if (flags & RTE_DPAA2_QDMA_JOB_SRC_PHY) {
-		DPAA2_SET_FLE_ADDR(fle, src);
+	DPAA2_SET_FLE_ADDR(&fle[DPAA2_QDMA_SRC_FLE], src);
 #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
-		DPAA2_SET_FLE_BMT(fle);
+	DPAA2_SET_FLE_BMT(&fle[DPAA2_QDMA_SRC_FLE]);
 #endif
-	} else {
-		DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(src));
-	}
-	fle->word4.fmt = fmt;
-	DPAA2_SET_FLE_LEN(fle, len);
+	fle[DPAA2_QDMA_SRC_FLE].word4.fmt = fmt;
 
-	fle++;
 	/* destination frame list to destination buffer */
-	if (flags & RTE_DPAA2_QDMA_JOB_DEST_PHY) {
+	DPAA2_SET_FLE_ADDR(&fle[DPAA2_QDMA_DST_FLE], dest);
 #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
-		DPAA2_SET_FLE_BMT(fle);
+	DPAA2_SET_FLE_BMT(&fle[DPAA2_QDMA_DST_FLE]);
 #endif
-		DPAA2_SET_FLE_ADDR(fle, dest);
-	} else {
-		DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(dest));
-	}
-	fle->word4.fmt = fmt;
-	DPAA2_SET_FLE_LEN(fle, len);
+	fle[DPAA2_QDMA_DST_FLE].word4.fmt = fmt;
 
 	/* Final bit: 1, for last frame list */
-	DPAA2_SET_FLE_FIN(fle);
+	DPAA2_SET_FLE_FIN(&fle[DPAA2_QDMA_DST_FLE]);
 }
 
-static inline int
-dpdmai_dev_set_fd_us(struct qdma_virt_queue *qdma_vq,
-		     struct qbman_fd *fd,
-		     struct rte_dpaa2_qdma_job **job,
-		     uint16_t nb_jobs)
+static void
+sg_entry_pre_populate(struct qdma_cntx_sg *sg_cntx)
 {
+	uint16_t i;
+	struct qdma_sg_entry *src_sge = sg_cntx->sg_src_entry;
+	struct qdma_sg_entry *dst_sge = sg_cntx->sg_dst_entry;
+
+	for (i = 0; i < RTE_DPAA2_QDMA_JOB_SUBMIT_MAX; i++) {
+		/* source SG */
+		src_sge[i].ctrl.sl = QDMA_SG_SL_LONG;
+		src_sge[i].ctrl.fmt = QDMA_SG_FMT_SDB;
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+		src_sge[i].ctrl.bmt = QDMA_SG_BMT_ENABLE;
+#else
+		src_sge[i].ctrl.bmt = QDMA_SG_BMT_DISABLE;
+#endif
+		/* destination SG */
+		dst_sge[i].ctrl.sl = QDMA_SG_SL_LONG;
+		dst_sge[i].ctrl.fmt = QDMA_SG_FMT_SDB;
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+		dst_sge[i].ctrl.bmt = QDMA_SG_BMT_ENABLE;
+#else
+		dst_sge[i].ctrl.bmt = QDMA_SG_BMT_DISABLE;
+#endif
+	}
+}
+
+static void
+fle_sdd_sg_pre_populate(struct qdma_cntx_sg *sg_cntx,
+	struct qdma_virt_queue *qdma_vq)
+{
+	struct qdma_sg_entry *src_sge = sg_cntx->sg_src_entry;
+	struct qdma_sg_entry *dst_sge = sg_cntx->sg_dst_entry;
+	rte_iova_t src_sge_iova, dst_sge_iova;
 	struct dpaa2_qdma_rbp *rbp = &qdma_vq->rbp;
-	struct rte_dpaa2_qdma_job **ppjob;
-	size_t iova;
-	int ret = 0, loop;
-	int ser = (qdma_vq->flags & DPAA2_QDMA_VQ_NO_RESPONSE) ?
-				0 : 1;
-
-	for (loop = 0; loop < nb_jobs; loop++) {
-		if (job[loop]->src & QDMA_RBP_UPPER_ADDRESS_MASK)
-			iova = (size_t)job[loop]->dest;
-		else
-			iova = (size_t)job[loop]->src;
-
-		/* Set the metadata */
-		job[loop]->vq_id = qdma_vq->vq_id;
-		ppjob = (struct rte_dpaa2_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1;
-		*ppjob = job[loop];
-
-		if ((rbp->drbp == 1) || (rbp->srbp == 1))
-			ret = qdma_populate_fd_pci((phys_addr_t)job[loop]->src,
-					(phys_addr_t)job[loop]->dest,
-					job[loop]->len, &fd[loop], rbp, ser);
-		else
-			ret = qdma_populate_fd_ddr((phys_addr_t)job[loop]->src,
-					(phys_addr_t)job[loop]->dest,
-					job[loop]->len, &fd[loop], ser);
+
+	memset(sg_cntx, 0, sizeof(struct qdma_cntx_sg));
+
+	src_sge_iova = DPAA2_VADDR_TO_IOVA(src_sge);
+	dst_sge_iova = DPAA2_VADDR_TO_IOVA(dst_sge);
+
+	sg_entry_pre_populate(sg_cntx);
+	fle_sdd_pre_populate(&sg_cntx->fle_sdd,
+		rbp, src_sge_iova, dst_sge_iova,
+		QBMAN_FLE_WORD4_FMT_SGE);
+}
+
+static inline uint32_t
+sg_entry_post_populate(const struct rte_dma_sge *src,
+	const struct rte_dma_sge *dst, struct qdma_cntx_sg *sg_cntx,
+	uint16_t nb_sge)
+{
+	uint16_t i = 0, idx;
+	uint32_t total_len = 0, len;
+	struct qdma_sg_entry *src_sge = sg_cntx->sg_src_entry;
+	struct qdma_sg_entry *dst_sge = sg_cntx->sg_dst_entry;
+
+	for (i = 0; i < (nb_sge - 1); i++) {
+		if (unlikely(src[i].length != dst[i].length))
+			return -ENOTSUP;
+		len = RTE_DPAA2_QDMA_LEN_FROM_LENGTH(src[i].length);
+		idx = RTE_DPAA2_QDMA_IDX_FROM_LENGTH(src[i].length);
+		src_sge->addr_lo = (uint32_t)src[i].addr;
+		src_sge->addr_hi = (src[i].addr >> 32);
+		src_sge->data_len.data_len_sl0 = len;
+
+		dst_sge->addr_lo = (uint32_t)dst[i].addr;
+		dst_sge->addr_hi = (dst[i].addr >> 32);
+		dst_sge->data_len.data_len_sl0 = len;
+		total_len += len;
+		sg_cntx->cntx_idx[i] = idx;
+
+		src_sge->ctrl.f = 0;
+		dst_sge->ctrl.f = 0;
+		src_sge++;
+		dst_sge++;
 	}
 
-	return ret;
+	if (unlikely(src[i].length != dst[i].length))
+		return -ENOTSUP;
+
+	len = RTE_DPAA2_QDMA_LEN_FROM_LENGTH(src[i].length);
+	idx = RTE_DPAA2_QDMA_IDX_FROM_LENGTH(src[i].length);
+
+	src_sge->addr_lo = (uint32_t)src[i].addr;
+	src_sge->addr_hi = (src[i].addr >> 32);
+	src_sge->data_len.data_len_sl0 = len;
+
+	dst_sge->addr_lo = (uint32_t)dst[i].addr;
+	dst_sge->addr_hi = (dst[i].addr >> 32);
+	dst_sge->data_len.data_len_sl0 = len;
+
+	total_len += len;
+	sg_cntx->cntx_idx[i] = idx;
+	sg_cntx->job_nb = nb_sge;
+
+	src_sge->ctrl.f = QDMA_SG_F;
+	dst_sge->ctrl.f = QDMA_SG_F;
+
+	return total_len;
 }
 
-static uint32_t
-qdma_populate_sg_entry(struct rte_dpaa2_qdma_job **jobs,
-		       struct qdma_sg_entry *src_sge,
-		       struct qdma_sg_entry *dst_sge,
-		       uint16_t nb_jobs)
+static inline void
+sg_fle_post_populate(struct qbman_fle fle[],
+	size_t len)
 {
-	uint16_t i;
-	uint32_t total_len = 0;
-	uint64_t iova;
+	DPAA2_SET_FLE_LEN(&fle[DPAA2_QDMA_SRC_FLE], len);
+	DPAA2_SET_FLE_LEN(&fle[DPAA2_QDMA_DST_FLE], len);
+}
 
-	for (i = 0; i < nb_jobs; i++) {
-		/* source SG */
-		if (likely(jobs[i]->flags & RTE_DPAA2_QDMA_JOB_SRC_PHY)) {
-			src_sge->addr_lo = (uint32_t)jobs[i]->src;
-			src_sge->addr_hi = (jobs[i]->src >> 32);
-		} else {
-			iova = DPAA2_VADDR_TO_IOVA(jobs[i]->src);
-			src_sge->addr_lo = (uint32_t)iova;
-			src_sge->addr_hi = iova >> 32;
-		}
-		src_sge->data_len.data_len_sl0 = jobs[i]->len;
+static inline uint32_t
+sg_entry_populate(const struct rte_dma_sge *src,
+	const struct rte_dma_sge *dst, struct qdma_cntx_sg *sg_cntx,
+	uint16_t nb_sge)
+{
+	uint16_t i, idx;
+	uint32_t total_len = 0, len;
+	struct qdma_sg_entry *src_sge = sg_cntx->sg_src_entry;
+	struct qdma_sg_entry *dst_sge = sg_cntx->sg_dst_entry;
+
+	for (i = 0; i < nb_sge; i++) {
+		if (unlikely(src[i].length != dst[i].length))
+			return -ENOTSUP;
+		len = RTE_DPAA2_QDMA_LEN_FROM_LENGTH(src[i].length);
+		idx = RTE_DPAA2_QDMA_IDX_FROM_LENGTH(src[i].length);
+
+		src_sge->addr_lo = (uint32_t)src[i].addr;
+		src_sge->addr_hi = (src[i].addr >> 32);
+		src_sge->data_len.data_len_sl0 = len;
 		src_sge->ctrl.sl = QDMA_SG_SL_LONG;
 		src_sge->ctrl.fmt = QDMA_SG_FMT_SDB;
 #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
@@ -235,16 +362,9 @@ qdma_populate_sg_entry(struct rte_dpaa2_qdma_job **jobs,
 #else
 		src_sge->ctrl.bmt = QDMA_SG_BMT_DISABLE;
 #endif
-		/* destination SG */
-		if (likely(jobs[i]->flags & RTE_DPAA2_QDMA_JOB_DEST_PHY)) {
-			dst_sge->addr_lo = (uint32_t)jobs[i]->dest;
-			dst_sge->addr_hi = (jobs[i]->dest >> 32);
-		} else {
-			iova = DPAA2_VADDR_TO_IOVA(jobs[i]->dest);
-			dst_sge->addr_lo = (uint32_t)iova;
-			dst_sge->addr_hi = iova >> 32;
-		}
-		dst_sge->data_len.data_len_sl0 = jobs[i]->len;
+		dst_sge->addr_lo = (uint32_t)dst[i].addr;
+		dst_sge->addr_hi = (dst[i].addr >> 32);
+		dst_sge->data_len.data_len_sl0 = len;
 		dst_sge->ctrl.sl = QDMA_SG_SL_LONG;
 		dst_sge->ctrl.fmt = QDMA_SG_FMT_SDB;
 #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
@@ -252,9 +372,10 @@ qdma_populate_sg_entry(struct rte_dpaa2_qdma_job **jobs,
 #else
 		dst_sge->ctrl.bmt = QDMA_SG_BMT_DISABLE;
 #endif
-		total_len += jobs[i]->len;
+		total_len += len;
+		sg_cntx->cntx_idx[i] = idx;
 
-		if (i == (nb_jobs - 1)) {
+		if (i == (nb_sge - 1)) {
 			src_sge->ctrl.f = QDMA_SG_F;
 			dst_sge->ctrl.f = QDMA_SG_F;
 		} else {
@@ -265,327 +386,432 @@ qdma_populate_sg_entry(struct rte_dpaa2_qdma_job **jobs,
 		dst_sge++;
 	}
 
+	sg_cntx->job_nb = nb_sge;
+
 	return total_len;
 }
 
-static inline int
-dpdmai_dev_set_multi_fd_lf_no_rsp(struct qdma_virt_queue *qdma_vq,
-				  struct qbman_fd *fd,
-				  struct rte_dpaa2_qdma_job **job,
-				  uint16_t nb_jobs)
+static inline void
+fle_populate(struct qbman_fle fle[],
+	struct qdma_sdd sdd[], uint64_t sdd_iova,
+	struct dpaa2_qdma_rbp *rbp,
+	uint64_t src_iova, uint64_t dst_iova, size_t len,
+	uint32_t fmt)
 {
-	struct dpaa2_qdma_rbp *rbp = &qdma_vq->rbp;
-	struct rte_dpaa2_qdma_job **ppjob;
-	uint16_t i;
-	void *elem;
-	struct qbman_fle *fle;
-	uint64_t elem_iova, fle_iova;
+	/* first frame list to source descriptor */
+	DPAA2_SET_FLE_ADDR(&fle[DPAA2_QDMA_SDD_FLE], sdd_iova);
+	DPAA2_SET_FLE_LEN(&fle[DPAA2_QDMA_SDD_FLE],
+		(DPAA2_QDMA_MAX_SDD * (sizeof(struct qdma_sdd))));
 
-	for (i = 0; i < nb_jobs; i++) {
-		elem = job[i]->usr_elem;
-#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
-		elem_iova = rte_mempool_virt2iova(elem);
-#else
-		elem_iova = DPAA2_VADDR_TO_IOVA(elem);
-#endif
+	/* source and destination descriptor */
+	if (rbp && rbp->enable) {
+		/* source */
+		sdd[DPAA2_QDMA_SRC_SDD].read_cmd.portid =
+			rbp->sportid;
+		sdd[DPAA2_QDMA_SRC_SDD].rbpcmd_simple.pfid =
+			rbp->spfid;
+		sdd[DPAA2_QDMA_SRC_SDD].rbpcmd_simple.vfid =
+			rbp->svfid;
+		sdd[DPAA2_QDMA_SRC_SDD].rbpcmd_simple.vfa =
+			rbp->svfa;
 
-		ppjob = (struct rte_dpaa2_qdma_job **)
-			((uintptr_t)(uint64_t)elem +
-			 QDMA_FLE_SINGLE_JOB_OFFSET);
-		*ppjob = job[i];
+		if (rbp->srbp) {
+			sdd[DPAA2_QDMA_SRC_SDD].read_cmd.rbp =
+				rbp->srbp;
+			sdd[DPAA2_QDMA_SRC_SDD].read_cmd.rdtype =
+				DPAA2_RBP_MEM_RW;
+		} else {
+			sdd[DPAA2_QDMA_SRC_SDD].read_cmd.rdtype =
+				dpaa2_coherent_no_alloc_cache;
+		}
+		/* destination */
+		sdd[DPAA2_QDMA_DST_SDD].write_cmd.portid =
+			rbp->dportid;
+		sdd[DPAA2_QDMA_DST_SDD].rbpcmd_simple.pfid =
+			rbp->dpfid;
+		sdd[DPAA2_QDMA_DST_SDD].rbpcmd_simple.vfid =
+			rbp->dvfid;
+		sdd[DPAA2_QDMA_DST_SDD].rbpcmd_simple.vfa =
+			rbp->dvfa;
 
-		job[i]->vq_id = qdma_vq->vq_id;
+		if (rbp->drbp) {
+			sdd[DPAA2_QDMA_DST_SDD].write_cmd.rbp =
+				rbp->drbp;
+			sdd[DPAA2_QDMA_DST_SDD].write_cmd.wrttype =
+				DPAA2_RBP_MEM_RW;
+		} else {
+			sdd[DPAA2_QDMA_DST_SDD].write_cmd.wrttype =
+				dpaa2_coherent_alloc_cache;
+		}
 
-		fle = (struct qbman_fle *)
-			((uintptr_t)(uint64_t)elem + QDMA_FLE_FLE_OFFSET);
-		fle_iova = elem_iova + QDMA_FLE_FLE_OFFSET;
+	} else {
+		sdd[DPAA2_QDMA_SRC_SDD].read_cmd.rdtype =
+			dpaa2_coherent_no_alloc_cache;
+		sdd[DPAA2_QDMA_DST_SDD].write_cmd.wrttype =
+			dpaa2_coherent_alloc_cache;
+	}
+	/* source frame list to source buffer */
+	DPAA2_SET_FLE_ADDR(&fle[DPAA2_QDMA_SRC_FLE], src_iova);
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+	DPAA2_SET_FLE_BMT(&fle[DPAA2_QDMA_SRC_FLE]);
+#endif
+	fle[DPAA2_QDMA_SRC_FLE].word4.fmt = fmt;
+	DPAA2_SET_FLE_LEN(&fle[DPAA2_QDMA_SRC_FLE], len);
 
-		DPAA2_SET_FD_ADDR(&fd[i], fle_iova);
-		DPAA2_SET_FD_COMPOUND_FMT(&fd[i]);
+	/* destination frame list to destination buffer */
+	DPAA2_SET_FLE_ADDR(&fle[DPAA2_QDMA_DST_FLE], dst_iova);
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+	DPAA2_SET_FLE_BMT(&fle[DPAA2_QDMA_DST_FLE]);
+#endif
+	fle[DPAA2_QDMA_DST_FLE].word4.fmt = fmt;
+	DPAA2_SET_FLE_LEN(&fle[DPAA2_QDMA_DST_FLE], len);
 
-		memset(fle, 0, DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle) +
-				DPAA2_QDMA_MAX_SDD * sizeof(struct qdma_sdd));
+	/* Final bit: 1, for last frame list */
+	DPAA2_SET_FLE_FIN(&fle[DPAA2_QDMA_DST_FLE]);
+}
 
-		dpaa2_qdma_populate_fle(fle, fle_iova, rbp,
-			job[i]->src, job[i]->dest, job[i]->len,
-			job[i]->flags, QBMAN_FLE_WORD4_FMT_SBF);
-	}
+static inline void
+fle_post_populate(struct qbman_fle fle[],
+	uint64_t src, uint64_t dest, size_t len)
+{
+	DPAA2_SET_FLE_ADDR(&fle[DPAA2_QDMA_SRC_FLE], src);
+	DPAA2_SET_FLE_LEN(&fle[DPAA2_QDMA_SRC_FLE], len);
 
-	return 0;
+	DPAA2_SET_FLE_ADDR(&fle[DPAA2_QDMA_DST_FLE], dest);
+	DPAA2_SET_FLE_LEN(&fle[DPAA2_QDMA_DST_FLE], len);
 }
 
 static inline int
-dpdmai_dev_set_multi_fd_lf(struct qdma_virt_queue *qdma_vq,
-			   struct qbman_fd *fd,
-			   struct rte_dpaa2_qdma_job **job,
-			   uint16_t nb_jobs)
+dpaa2_qdma_submit(void *dev_private, uint16_t vchan)
 {
-	struct dpaa2_qdma_rbp *rbp = &qdma_vq->rbp;
-	struct rte_dpaa2_qdma_job **ppjob;
-	uint16_t i;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
+	uint16_t expected = qdma_vq->fd_idx;
 	int ret;
-	void *elem[DPAA2_QDMA_MAX_DESC];
-	struct qbman_fle *fle;
-	uint64_t elem_iova, fle_iova;
 
-	ret = rte_mempool_get_bulk(qdma_vq->fle_pool, elem, nb_jobs);
-	if (ret) {
-		DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
-		return ret;
-	}
+	ret = dpaa2_qdma_multi_eq(qdma_vq);
+	if (likely(ret == expected))
+		return 0;
 
-	for (i = 0; i < nb_jobs; i++) {
-#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
-		elem_iova = rte_mempool_virt2iova(elem[i]);
-#else
-		elem_iova = DPAA2_VADDR_TO_IOVA(elem[i]);
-#endif
+	return -EBUSY;
+}
 
-		ppjob = (struct rte_dpaa2_qdma_job **)
-			((uintptr_t)(uint64_t)elem[i] +
-			 QDMA_FLE_SINGLE_JOB_OFFSET);
-		*ppjob = job[i];
+static inline void
+dpaa2_qdma_fle_dump(const struct qbman_fle *fle)
+{
+	DPAA2_QDMA_INFO("addr:0x%08x-0x%08x, len:%d, frc:0x%08x, bpid:%d",
+		fle->addr_hi, fle->addr_lo, fle->length, fle->frc,
+		fle->word4.bpid);
+	DPAA2_QDMA_INFO("ivp:%d, bmt:%d, off:%d, fmt:%d, sl:%d, f:%d",
+		fle->word4.ivp, fle->word4.bmt, fle->word4.offset,
+		fle->word4.fmt, fle->word4.sl, fle->word4.f);
+}
 
-		job[i]->vq_id = qdma_vq->vq_id;
+static inline void
+dpaa2_qdma_sdd_dump(const struct qdma_sdd *sdd)
+{
+	DPAA2_QDMA_INFO("stride:%d, rbpcmd:0x%08x, cmd:0x%08x",
+		sdd->stride, sdd->rbpcmd, sdd->cmd);
+}
 
-		fle = (struct qbman_fle *)
-			((uintptr_t)(uint64_t)elem[i] + QDMA_FLE_FLE_OFFSET);
-		fle_iova = elem_iova + QDMA_FLE_FLE_OFFSET;
+static inline void
+dpaa2_qdma_sge_dump(const struct qdma_sg_entry *sge)
+{
+	DPAA2_QDMA_INFO("addr 0x%08x-0x%08x, len:0x%08x, ctl:0x%08x",
+		sge->addr_hi, sge->addr_lo, sge->data_len.data_len_sl0,
+		sge->ctrl_fields);
+}
 
-		DPAA2_SET_FD_ADDR(&fd[i], fle_iova);
-		DPAA2_SET_FD_COMPOUND_FMT(&fd[i]);
-		DPAA2_SET_FD_FRC(&fd[i], QDMA_SER_CTX);
+static void
+dpaa2_qdma_long_fmt_dump(const struct qbman_fle *fle)
+{
+	int i;
+	const struct qdma_cntx_fle_sdd *fle_sdd;
+	const struct qdma_sdd *sdd;
+	const struct qdma_cntx_sg *cntx_sg = NULL;
+	const struct qdma_cntx_long *cntx_long = NULL;
 
-		memset(fle, 0, DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle) +
-			DPAA2_QDMA_MAX_SDD * sizeof(struct qdma_sdd));
+	fle_sdd = container_of(fle, const struct qdma_cntx_fle_sdd, fle[0]);
+	sdd = fle_sdd->sdd;
 
-		dpaa2_qdma_populate_fle(fle, fle_iova, rbp,
-				job[i]->src, job[i]->dest, job[i]->len,
-				job[i]->flags, QBMAN_FLE_WORD4_FMT_SBF);
+	for (i = 0; i < DPAA2_QDMA_MAX_FLE; i++) {
+		DPAA2_QDMA_INFO("fle[%d] info:", i);
+		dpaa2_qdma_fle_dump(&fle[i]);
 	}
 
-	return 0;
+	if (fle[DPAA2_QDMA_SRC_FLE].word4.fmt !=
+		fle[DPAA2_QDMA_DST_FLE].word4.fmt) {
+		DPAA2_QDMA_ERR("fle[%d].fmt(%d) != fle[%d].fmt(%d)",
+			DPAA2_QDMA_SRC_FLE,
+			fle[DPAA2_QDMA_SRC_FLE].word4.fmt,
+			DPAA2_QDMA_DST_FLE,
+			fle[DPAA2_QDMA_DST_FLE].word4.fmt);
+
+		return;
+	} else if (fle[DPAA2_QDMA_SRC_FLE].word4.fmt ==
+		QBMAN_FLE_WORD4_FMT_SGE) {
+		cntx_sg = container_of(fle_sdd, const struct qdma_cntx_sg,
+			fle_sdd);
+	} else if (fle[DPAA2_QDMA_SRC_FLE].word4.fmt ==
+		QBMAN_FLE_WORD4_FMT_SBF) {
+		cntx_long = container_of(fle_sdd, const struct qdma_cntx_long,
+			fle_sdd);
+	} else {
+		DPAA2_QDMA_ERR("Unsupported fle format:%d",
+			fle[DPAA2_QDMA_SRC_FLE].word4.fmt);
+		return;
+	}
+
+	for (i = 0; i < DPAA2_QDMA_MAX_SDD; i++) {
+		DPAA2_QDMA_INFO("sdd[%d] info:", i);
+		dpaa2_qdma_sdd_dump(&sdd[i]);
+	}
+
+	if (cntx_long) {
+		DPAA2_QDMA_INFO("long format/Single buffer cntx idx:%d",
+			cntx_long->cntx_idx);
+	}
+
+	if (cntx_sg) {
+		DPAA2_QDMA_INFO("long format/SG format, job number:%d",
+			cntx_sg->job_nb);
+		if (!cntx_sg->job_nb ||
+			cntx_sg->job_nb > RTE_DPAA2_QDMA_JOB_SUBMIT_MAX) {
+			DPAA2_QDMA_ERR("Invalid SG job number:%d",
+				cntx_sg->job_nb);
+			return;
+		}
+		for (i = 0; i < cntx_sg->job_nb; i++) {
+			DPAA2_QDMA_INFO("sg[%d] src info:", i);
+			dpaa2_qdma_sge_dump(&cntx_sg->sg_src_entry[i]);
+			DPAA2_QDMA_INFO("sg[%d] dst info:", i);
+			dpaa2_qdma_sge_dump(&cntx_sg->sg_dst_entry[i]);
+			DPAA2_QDMA_INFO("cntx_idx[%d]:%d", i,
+				cntx_sg->cntx_idx[i]);
+		}
+	}
 }
 
-static inline int
-dpdmai_dev_set_sg_fd_lf(struct qdma_virt_queue *qdma_vq,
-			struct qbman_fd *fd,
-			struct rte_dpaa2_qdma_job **job,
-			uint16_t nb_jobs)
+static int
+dpaa2_qdma_copy_sg(void *dev_private,
+	uint16_t vchan,
+	const struct rte_dma_sge *src,
+	const struct rte_dma_sge *dst,
+	uint16_t nb_src, uint16_t nb_dst,
+	uint64_t flags)
 {
-	struct dpaa2_qdma_rbp *rbp = &qdma_vq->rbp;
-	struct rte_dpaa2_qdma_job **ppjob;
-	void *elem;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
+	int ret = 0, expected;
+	uint32_t cntx_idx, len;
+	struct qbman_fd *fd = &qdma_vq->fd[qdma_vq->fd_idx];
+	struct qdma_cntx_sg *cntx_sg;
+	rte_iova_t cntx_iova, fle_iova, sdd_iova;
+	rte_iova_t src_sge_iova, dst_sge_iova;
 	struct qbman_fle *fle;
-	uint64_t elem_iova, fle_iova, src, dst;
-	int ret = 0, i;
-	struct qdma_sg_entry *src_sge, *dst_sge;
-	uint32_t len, fmt, flags;
-
-	/*
-	 * Get an FLE/SDD from FLE pool.
-	 * Note: IO metadata is before the FLE and SDD memory.
-	 */
-	if (qdma_vq->flags & DPAA2_QDMA_VQ_NO_RESPONSE) {
-		elem = job[0]->usr_elem;
+	struct qdma_sdd *sdd;
+
+	if (unlikely(nb_src != nb_dst))
+		return -ENOTSUP;
+
+	memset(fd, 0, sizeof(struct qbman_fd));
+
+	if (qdma_dev->is_silent) {
+		cntx_idx = RTE_DPAA2_QDMA_IDX_FROM_LENGTH(src[0].length);
+		cntx_sg = qdma_vq->cntx_sg[cntx_idx];
 	} else {
-		ret = rte_mempool_get(qdma_vq->fle_pool, &elem);
-		if (ret) {
-			DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
+		ret = rte_mempool_get(qdma_vq->fle_pool,
+			(void **)&cntx_sg);
+		if (ret)
 			return ret;
-		}
+		DPAA2_SET_FD_FRC(fd, QDMA_SER_CTX);
 	}
 
 #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
-	elem_iova = rte_mempool_virt2iova(elem);
+	cntx_iova = rte_mempool_virt2iova(cntx_sg);
 #else
-	elem_iova = DPAA2_VADDR_TO_IOVA(elem);
+	cntx_iova = DPAA2_VADDR_TO_IOVA(cntx_sg);
 #endif
 
-	/* Set the metadata */
-	/* Save job context. */
-	*((uint16_t *)
-	((uintptr_t)(uint64_t)elem + QDMA_FLE_JOB_NB_OFFSET)) = nb_jobs;
-	ppjob = (struct rte_dpaa2_qdma_job **)
-		((uintptr_t)(uint64_t)elem + QDMA_FLE_SG_JOBS_OFFSET);
-	for (i = 0; i < nb_jobs; i++)
-		ppjob[i] = job[i];
-
-	ppjob[0]->vq_id = qdma_vq->vq_id;
-
-	fle = (struct qbman_fle *)
-		((uintptr_t)(uint64_t)elem + QDMA_FLE_FLE_OFFSET);
-	fle_iova = elem_iova + QDMA_FLE_FLE_OFFSET;
+	fle = cntx_sg->fle_sdd.fle;
+	fle_iova = cntx_iova +
+		offsetof(struct qdma_cntx_sg, fle_sdd) +
+		offsetof(struct qdma_cntx_fle_sdd, fle);
 
 	DPAA2_SET_FD_ADDR(fd, fle_iova);
 	DPAA2_SET_FD_COMPOUND_FMT(fd);
-	if (!(qdma_vq->flags & DPAA2_QDMA_VQ_NO_RESPONSE))
-		DPAA2_SET_FD_FRC(fd, QDMA_SER_CTX);
+	DPAA2_SET_FD_FLC(fd, (uint64_t)cntx_sg);
+
+	if (qdma_vq->fle_pre_populate) {
+		if (unlikely(!fle[DPAA2_QDMA_SRC_FLE].length))
+			fle_sdd_sg_pre_populate(cntx_sg, qdma_vq);
 
-	/* Populate FLE */
-	if (likely(nb_jobs > 1)) {
-		src_sge = (struct qdma_sg_entry *)
-			((uintptr_t)(uint64_t)elem + QDMA_FLE_SG_ENTRY_OFFSET);
-		dst_sge = src_sge + DPAA2_QDMA_MAX_SG_NB;
-		src = elem_iova + QDMA_FLE_SG_ENTRY_OFFSET;
-		dst = src +
-			DPAA2_QDMA_MAX_SG_NB * sizeof(struct qdma_sg_entry);
-		len = qdma_populate_sg_entry(job, src_sge, dst_sge, nb_jobs);
-		fmt = QBMAN_FLE_WORD4_FMT_SGE;
-		flags = RTE_DPAA2_QDMA_JOB_SRC_PHY | RTE_DPAA2_QDMA_JOB_DEST_PHY;
+		len = sg_entry_post_populate(src, dst,
+			cntx_sg, nb_src);
+		sg_fle_post_populate(fle, len);
 	} else {
-		src = job[0]->src;
-		dst = job[0]->dest;
-		len = job[0]->len;
-		fmt = QBMAN_FLE_WORD4_FMT_SBF;
-		flags = job[0]->flags;
+		sdd = cntx_sg->fle_sdd.sdd;
+		sdd_iova = cntx_iova +
+			offsetof(struct qdma_cntx_sg, fle_sdd) +
+			offsetof(struct qdma_cntx_fle_sdd, sdd);
+		src_sge_iova = cntx_iova +
+			offsetof(struct qdma_cntx_sg, sg_src_entry);
+		dst_sge_iova = cntx_iova +
+			offsetof(struct qdma_cntx_sg, sg_dst_entry);
+		len = sg_entry_populate(src, dst,
+			cntx_sg, nb_src);
+
+		fle_populate(fle, sdd, sdd_iova,
+			&qdma_vq->rbp, src_sge_iova, dst_sge_iova, len,
+			QBMAN_FLE_WORD4_FMT_SGE);
 	}
 
-	memset(fle, 0, DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle) +
-			DPAA2_QDMA_MAX_SDD * sizeof(struct qdma_sdd));
-
-	dpaa2_qdma_populate_fle(fle, fle_iova, rbp,
-					src, dst, len, flags, fmt);
-
-	return 0;
-}
-
-static inline uint16_t
-dpdmai_dev_get_job_us(struct qdma_virt_queue *qdma_vq __rte_unused,
-		      const struct qbman_fd *fd,
-		      struct rte_dpaa2_qdma_job **job, uint16_t *nb_jobs)
-{
-	uint16_t vqid;
-	size_t iova;
-	struct rte_dpaa2_qdma_job **ppjob;
+	if (unlikely(qdma_vq->flags & DPAA2_QDMA_DESC_DEBUG_FLAG))
+		dpaa2_qdma_long_fmt_dump(cntx_sg->fle_sdd.fle);
 
-	if (fd->simple_pci.saddr_hi & (QDMA_RBP_UPPER_ADDRESS_MASK >> 32))
-		iova = (size_t)(((uint64_t)fd->simple_pci.daddr_hi) << 32
-				| (uint64_t)fd->simple_pci.daddr_lo);
-	else
-		iova = (size_t)(((uint64_t)fd->simple_pci.saddr_hi) << 32
-				| (uint64_t)fd->simple_pci.saddr_lo);
+	qdma_vq->fd_idx++;
 
-	ppjob = (struct rte_dpaa2_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1;
-	*job = (struct rte_dpaa2_qdma_job *)*ppjob;
-	(*job)->status = (fd->simple_pci.acc_err << 8) |
-					(fd->simple_pci.error);
-	vqid = (*job)->vq_id;
-	*nb_jobs = 1;
+	if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
+		expected = qdma_vq->fd_idx;
+		ret = dpaa2_qdma_multi_eq(qdma_vq);
+		if (likely(ret == expected))
+			return 0;
+	} else {
+		return 0;
+	}
 
-	return vqid;
+	return ret;
 }
 
-static inline uint16_t
-dpdmai_dev_get_single_job_lf(struct qdma_virt_queue *qdma_vq,
-	const struct qbman_fd *fd,
-	struct rte_dpaa2_qdma_job **job,
-	uint16_t *nb_jobs)
+static int
+dpaa2_qdma_copy(void *dev_private, uint16_t vchan,
+	rte_iova_t src, rte_iova_t dst,
+	uint32_t length, uint64_t flags)
 {
+	struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
+	int ret = 0, expected;
+	uint16_t cntx_idx;
+	uint32_t len;
+	struct qbman_fd *fd = &qdma_vq->fd[qdma_vq->fd_idx];
+	struct qdma_cntx_long *cntx_long;
+	rte_iova_t cntx_iova, fle_iova, sdd_iova;
 	struct qbman_fle *fle;
-	struct rte_dpaa2_qdma_job **ppjob = NULL;
-	uint16_t status;
+	struct qdma_sdd *sdd;
 
-	/*
-	 * Fetch metadata from FLE. job and vq_id were set
-	 * in metadata in the enqueue operation.
-	 */
-	fle = (struct qbman_fle *)
-			DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd));
+	memset(fd, 0, sizeof(struct qbman_fd));
 
-	*nb_jobs = 1;
-	ppjob = (struct rte_dpaa2_qdma_job **)((uintptr_t)(uint64_t)fle -
-			QDMA_FLE_FLE_OFFSET + QDMA_FLE_SINGLE_JOB_OFFSET);
+	cntx_idx = RTE_DPAA2_QDMA_IDX_FROM_LENGTH(length);
+	len = RTE_DPAA2_QDMA_LEN_FROM_LENGTH(length);
 
-	status = (DPAA2_GET_FD_ERR(fd) << 8) | (DPAA2_GET_FD_FRC(fd) & 0xFF);
+	if (qdma_dev->is_silent) {
+		cntx_long = qdma_vq->cntx_long[cntx_idx];
+	} else {
+		ret = rte_mempool_get(qdma_vq->fle_pool,
+			(void **)&cntx_long);
+		if (ret)
+			return ret;
+		DPAA2_SET_FD_FRC(fd, QDMA_SER_CTX);
+		cntx_long->cntx_idx = cntx_idx;
+	}
 
-	*job = *ppjob;
-	(*job)->status = status;
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+	cntx_iova = rte_mempool_virt2iova(cntx_long);
+#else
+	cntx_iova = DPAA2_VADDR_TO_IOVA(cntx_long);
+#endif
 
-	/* Free FLE to the pool */
-	rte_mempool_put(qdma_vq->fle_pool,
-			(void *)
-			((uintptr_t)(uint64_t)fle - QDMA_FLE_FLE_OFFSET));
+	fle = cntx_long->fle_sdd.fle;
+	fle_iova = cntx_iova +
+		offsetof(struct qdma_cntx_long, fle_sdd) +
+		offsetof(struct qdma_cntx_fle_sdd, fle);
 
-	return (*job)->vq_id;
-}
+	DPAA2_SET_FD_ADDR(fd, fle_iova);
+	DPAA2_SET_FD_COMPOUND_FMT(fd);
+	DPAA2_SET_FD_FLC(fd, (uint64_t)cntx_long);
 
-static inline uint16_t
-dpdmai_dev_get_sg_job_lf(struct qdma_virt_queue *qdma_vq,
-	const struct qbman_fd *fd,
-	struct rte_dpaa2_qdma_job **job,
-	uint16_t *nb_jobs)
-{
-	struct qbman_fle *fle;
-	struct rte_dpaa2_qdma_job **ppjob = NULL;
-	uint16_t i, status;
+	if (qdma_vq->fle_pre_populate) {
+		if (unlikely(!fle[DPAA2_QDMA_SRC_FLE].length)) {
+			fle_sdd_pre_populate(&cntx_long->fle_sdd,
+				&qdma_vq->rbp,
+				0, 0, QBMAN_FLE_WORD4_FMT_SBF);
+		}
 
-	/*
-	 * Fetch metadata from FLE. job and vq_id were set
-	 * in metadata in the enqueue operation.
-	 */
-	fle = (struct qbman_fle *)
-			DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd));
-	*nb_jobs = *((uint16_t *)((uintptr_t)(uint64_t)fle -
-				QDMA_FLE_FLE_OFFSET + QDMA_FLE_JOB_NB_OFFSET));
-	ppjob = (struct rte_dpaa2_qdma_job **)((uintptr_t)(uint64_t)fle -
-				QDMA_FLE_FLE_OFFSET + QDMA_FLE_SG_JOBS_OFFSET);
-	status = (DPAA2_GET_FD_ERR(fd) << 8) | (DPAA2_GET_FD_FRC(fd) & 0xFF);
-
-	for (i = 0; i < (*nb_jobs); i++) {
-		job[i] = ppjob[i];
-		job[i]->status = status;
+		fle_post_populate(fle, src, dst, len);
+	} else {
+		sdd = cntx_long->fle_sdd.sdd;
+		sdd_iova = cntx_iova +
+			offsetof(struct qdma_cntx_long, fle_sdd) +
+			offsetof(struct qdma_cntx_fle_sdd, sdd);
+		fle_populate(fle, sdd, sdd_iova, &qdma_vq->rbp,
+			src, dst, len,
+			QBMAN_FLE_WORD4_FMT_SBF);
 	}
 
-	/* Free FLE to the pool */
-	rte_mempool_put(qdma_vq->fle_pool,
-			(void *)
-			((uintptr_t)(uint64_t)fle - QDMA_FLE_FLE_OFFSET));
+	if (unlikely(qdma_vq->flags & DPAA2_QDMA_DESC_DEBUG_FLAG))
+		dpaa2_qdma_long_fmt_dump(cntx_long->fle_sdd.fle);
 
-	return job[0]->vq_id;
+	qdma_vq->fd_idx++;
+
+	if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
+		expected = qdma_vq->fd_idx;
+		ret = dpaa2_qdma_multi_eq(qdma_vq);
+		if (likely(ret == expected))
+			return 0;
+	} else {
+		return 0;
+	}
+
+	return ret;
 }
 
-/* Function to receive a QDMA job for a given device and queue*/
-static int
-dpdmai_dev_dequeue_multijob_prefetch(struct qdma_virt_queue *qdma_vq,
-	uint16_t *vq_id,
-	struct rte_dpaa2_qdma_job **job,
-	uint16_t nb_jobs)
+static uint16_t
+dpaa2_qdma_dequeue(void *dev_private,
+	uint16_t vchan, const uint16_t nb_cpls,
+	uint16_t *cntx_idx, bool *has_error)
 {
-	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_vq->dpdmai_dev;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
+
 	struct dpaa2_queue *rxq;
 	struct qbman_result *dq_storage, *dq_storage1 = NULL;
 	struct qbman_pull_desc pulldesc;
 	struct qbman_swp *swp;
 	struct queue_storage_info_t *q_storage;
+	uint32_t fqid;
 	uint8_t status, pending;
 	uint8_t num_rx = 0;
 	const struct qbman_fd *fd;
-	uint16_t vqid, num_rx_ret;
-	uint16_t rx_fqid;
 	int ret, pull_size;
+	struct qbman_fle *fle;
+	struct qdma_cntx_fle_sdd *fle_sdd;
+	struct qdma_cntx_sg *cntx_sg;
+	struct qdma_cntx_long *cntx_long;
+	uint16_t free_space = 0, fle_elem_nb = 0;
 
-	if (qdma_vq->flags & DPAA2_QDMA_VQ_FD_SG_FORMAT) {
-		/** Make sure there are enough space to get jobs.*/
-		if (unlikely(nb_jobs < DPAA2_QDMA_MAX_SG_NB))
-			return -EINVAL;
-		nb_jobs = 1;
-	}
+	if (unlikely(qdma_dev->is_silent))
+		return 0;
 
 	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
 		ret = dpaa2_affine_qbman_swp();
 		if (ret) {
-			DPAA2_QDMA_ERR("Failed to allocate IO portal, tid(%d)",
+			DPAA2_QDMA_ERR("Allocate portal err, tid(%d)",
 				rte_gettid());
+			if (has_error)
+				*has_error = true;
 			return 0;
 		}
 	}
 	swp = DPAA2_PER_LCORE_PORTAL;
-	rxq = &dpdmai_dev->rx_queue[qdma_vq->vq_id];
-	rx_fqid = rxq->fqid;
 
-	pull_size = (nb_jobs > dpaa2_dqrr_size) ?
-		dpaa2_dqrr_size : nb_jobs;
+	pull_size = (nb_cpls > dpaa2_dqrr_size) ?
+		dpaa2_dqrr_size : nb_cpls;
+	rxq = &(dpdmai_dev->rx_queue[qdma_vq->vq_id]);
+	fqid = rxq->fqid;
 	q_storage = rxq->q_storage;
 
 	if (unlikely(!q_storage->active_dqs)) {
@@ -594,21 +820,20 @@ dpdmai_dev_dequeue_multijob_prefetch(struct qdma_virt_queue *qdma_vq,
 		q_storage->last_num_pkts = pull_size;
 		qbman_pull_desc_clear(&pulldesc);
 		qbman_pull_desc_set_numframes(&pulldesc,
-					      q_storage->last_num_pkts);
-		qbman_pull_desc_set_fq(&pulldesc, rx_fqid);
+			q_storage->last_num_pkts);
+		qbman_pull_desc_set_fq(&pulldesc, fqid);
 		qbman_pull_desc_set_storage(&pulldesc, dq_storage,
-				(size_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
+			(size_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
 		if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
 			while (!qbman_check_command_complete(
-				get_swp_active_dqs(
-				DPAA2_PER_LCORE_DPIO->index)))
+			       get_swp_active_dqs(
+			       DPAA2_PER_LCORE_DPIO->index)))
 				;
 			clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
 		}
 		while (1) {
 			if (qbman_swp_pull(swp, &pulldesc)) {
-				DPAA2_QDMA_DP_WARN(
-					"VDQ command not issued.QBMAN busy\n");
+				DPAA2_QDMA_DP_WARN("QBMAN busy");
 					/* Portal was busy, try again */
 				continue;
 			}
@@ -617,7 +842,7 @@ dpdmai_dev_dequeue_multijob_prefetch(struct qdma_virt_queue *qdma_vq,
 		q_storage->active_dqs = dq_storage;
 		q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
 		set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index,
-				   dq_storage);
+			dq_storage);
 	}
 
 	dq_storage = q_storage->active_dqs;
@@ -631,7 +856,7 @@ dpdmai_dev_dequeue_multijob_prefetch(struct qdma_virt_queue *qdma_vq,
 	dq_storage1 = q_storage->dq_storage[q_storage->toggle];
 	qbman_pull_desc_clear(&pulldesc);
 	qbman_pull_desc_set_numframes(&pulldesc, pull_size);
-	qbman_pull_desc_set_fq(&pulldesc, rx_fqid);
+	qbman_pull_desc_set_fq(&pulldesc, fqid);
 	qbman_pull_desc_set_storage(&pulldesc, dq_storage1,
 		(size_t)(DPAA2_VADDR_TO_IOVA(dq_storage1)), 1);
 
@@ -664,27 +889,40 @@ dpdmai_dev_dequeue_multijob_prefetch(struct qdma_virt_queue *qdma_vq,
 				continue;
 		}
 		fd = qbman_result_DQ_fd(dq_storage);
-
-		vqid = qdma_vq->get_job(qdma_vq, fd, &job[num_rx],
-								&num_rx_ret);
-		if (vq_id)
-			vq_id[num_rx] = vqid;
+		fle_sdd = (void *)DPAA2_GET_FD_FLC(fd);
+		fle = fle_sdd->fle;
+		qdma_vq->fle_elem[fle_elem_nb] = fle_sdd;
+		fle_elem_nb++;
+		if (fle[DPAA2_QDMA_SRC_FLE].word4.fmt ==
+			QBMAN_FLE_WORD4_FMT_SGE) {
+			cntx_sg = container_of(fle_sdd,
+				struct qdma_cntx_sg, fle_sdd);
+			ret = qdma_cntx_idx_ring_eq(qdma_vq->ring_cntx_idx,
+				cntx_sg->cntx_idx,
+				cntx_sg->job_nb, &free_space);
+		} else {
+			cntx_long = container_of(fle_sdd,
+				struct qdma_cntx_long, fle_sdd);
+			ret = qdma_cntx_idx_ring_eq(qdma_vq->ring_cntx_idx,
+				&cntx_long->cntx_idx,
+				1, &free_space);
+		}
+		if (!ret || free_space < RTE_DPAA2_QDMA_JOB_SUBMIT_MAX)
+			pending = 0;
 
 		dq_storage++;
-		num_rx += num_rx_ret;
 	} while (pending);
 
 	if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
 		while (!qbman_check_command_complete(
-			get_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)))
+		       get_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)))
 			;
 		clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
 	}
 	/* issue a volatile dequeue command for next pull */
 	while (1) {
 		if (qbman_swp_pull(swp, &pulldesc)) {
-			DPAA2_QDMA_DP_WARN(
-				"VDQ command is not issued. QBMAN is busy (2)\n");
+			DPAA2_QDMA_DP_WARN("QBMAN is busy (2)");
 			continue;
 		}
 		break;
@@ -694,387 +932,18 @@ dpdmai_dev_dequeue_multijob_prefetch(struct qdma_virt_queue *qdma_vq,
 	q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
 	set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index, dq_storage1);
 
-	return num_rx;
-}
-
-static int
-dpdmai_dev_dequeue_multijob_no_prefetch(struct qdma_virt_queue *qdma_vq,
-	uint16_t *vq_id,
-	struct rte_dpaa2_qdma_job **job,
-	uint16_t nb_jobs)
-{
-	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_vq->dpdmai_dev;
-	struct dpaa2_queue *rxq;
-	struct qbman_result *dq_storage;
-	struct qbman_pull_desc pulldesc;
-	struct qbman_swp *swp;
-	uint8_t status, pending;
-	uint8_t num_rx = 0;
-	const struct qbman_fd *fd;
-	uint16_t vqid, num_rx_ret;
-	uint16_t rx_fqid;
-	int ret, next_pull, num_pulled = 0;
-
-	if (qdma_vq->flags & DPAA2_QDMA_VQ_FD_SG_FORMAT) {
-		/** Make sure there are enough space to get jobs.*/
-		if (unlikely(nb_jobs < DPAA2_QDMA_MAX_SG_NB))
-			return -EINVAL;
-		nb_jobs = 1;
-	}
-
-	next_pull = nb_jobs;
-
-	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
-		ret = dpaa2_affine_qbman_swp();
-		if (ret) {
-			DPAA2_QDMA_ERR("Failed to allocate IO portal, tid(%d)",
-				rte_gettid());
-			return 0;
-		}
-	}
-	swp = DPAA2_PER_LCORE_PORTAL;
-
-	rxq = &dpdmai_dev->rx_queue[qdma_vq->vq_id];
-	rx_fqid = rxq->fqid;
+	rte_mempool_put_bulk(qdma_vq->fle_pool,
+		qdma_vq->fle_elem, fle_elem_nb);
 
-	do {
-		dq_storage = rxq->q_storage->dq_storage[0];
-		/* Prepare dequeue descriptor */
-		qbman_pull_desc_clear(&pulldesc);
-		qbman_pull_desc_set_fq(&pulldesc, rx_fqid);
-		qbman_pull_desc_set_storage(&pulldesc, dq_storage,
-			(uint64_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
+	num_rx = qdma_cntx_idx_ring_dq(qdma_vq->ring_cntx_idx,
+		cntx_idx, nb_cpls);
 
-		if (next_pull > dpaa2_dqrr_size) {
-			qbman_pull_desc_set_numframes(&pulldesc,
-					dpaa2_dqrr_size);
-			next_pull -= dpaa2_dqrr_size;
-		} else {
-			qbman_pull_desc_set_numframes(&pulldesc, next_pull);
-			next_pull = 0;
-		}
-
-		while (1) {
-			if (qbman_swp_pull(swp, &pulldesc)) {
-				DPAA2_QDMA_DP_WARN(
-					"VDQ command not issued. QBMAN busy");
-				/* Portal was busy, try again */
-				continue;
-			}
-			break;
-		}
-
-		rte_prefetch0((void *)((size_t)(dq_storage + 1)));
-		/* Check if the previous issued command is completed. */
-		while (!qbman_check_command_complete(dq_storage))
-			;
-
-		num_pulled = 0;
-		pending = 1;
-
-		do {
-			/* Loop until dq_storage is updated
-			 * with new token by QBMAN
-			 */
-			while (!qbman_check_new_result(dq_storage))
-				;
-			rte_prefetch0((void *)((size_t)(dq_storage + 2)));
-
-			if (qbman_result_DQ_is_pull_complete(dq_storage)) {
-				pending = 0;
-				/* Check for valid frame. */
-				status = qbman_result_DQ_flags(dq_storage);
-				if (unlikely((status &
-					QBMAN_DQ_STAT_VALIDFRAME) == 0))
-					continue;
-			}
-			fd = qbman_result_DQ_fd(dq_storage);
-
-			vqid = qdma_vq->get_job(qdma_vq, fd,
-						&job[num_rx], &num_rx_ret);
-			if (vq_id)
-				vq_id[num_rx] = vqid;
-
-			dq_storage++;
-			num_rx += num_rx_ret;
-			num_pulled++;
-
-		} while (pending);
-	/* Last VDQ provided all packets and more packets are requested */
-	} while (next_pull && num_pulled == dpaa2_dqrr_size);
+	if (has_error)
+		*has_error = false;
 
 	return num_rx;
 }
 
-static int
-dpdmai_dev_submit_multi(struct qdma_virt_queue *qdma_vq,
-			struct rte_dpaa2_qdma_job **job,
-			uint16_t nb_jobs)
-{
-	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_vq->dpdmai_dev;
-	uint16_t txq_id = dpdmai_dev->tx_queue[qdma_vq->vq_id].fqid;
-	struct qbman_fd fd[DPAA2_QDMA_MAX_DESC];
-	struct qbman_eq_desc eqdesc;
-	struct qbman_swp *swp;
-	uint32_t num_to_send = 0;
-	uint16_t num_tx = 0;
-	uint32_t enqueue_loop, loop;
-	int ret;
-
-	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
-		ret = dpaa2_affine_qbman_swp();
-		if (ret) {
-			DPAA2_QDMA_ERR(
-				"Failed to allocate IO portal, tid: %d\n",
-				rte_gettid());
-			return 0;
-		}
-	}
-	swp = DPAA2_PER_LCORE_PORTAL;
-
-	/* Prepare enqueue descriptor */
-	qbman_eq_desc_clear(&eqdesc);
-	qbman_eq_desc_set_fq(&eqdesc, txq_id);
-	qbman_eq_desc_set_no_orp(&eqdesc, 0);
-	qbman_eq_desc_set_response(&eqdesc, 0, 0);
-
-	if (qdma_vq->flags & DPAA2_QDMA_VQ_FD_SG_FORMAT) {
-		uint16_t fd_nb;
-		uint16_t sg_entry_nb = nb_jobs > DPAA2_QDMA_MAX_SG_NB ?
-						DPAA2_QDMA_MAX_SG_NB : nb_jobs;
-		uint16_t job_idx = 0;
-		uint16_t fd_sg_nb[8];
-		uint16_t nb_jobs_ret = 0;
-
-		if (nb_jobs % DPAA2_QDMA_MAX_SG_NB)
-			fd_nb = nb_jobs / DPAA2_QDMA_MAX_SG_NB + 1;
-		else
-			fd_nb = nb_jobs / DPAA2_QDMA_MAX_SG_NB;
-
-		memset(&fd[0], 0, sizeof(struct qbman_fd) * fd_nb);
-
-		for (loop = 0; loop < fd_nb; loop++) {
-			ret = qdma_vq->set_fd(qdma_vq, &fd[loop], &job[job_idx],
-					      sg_entry_nb);
-			if (unlikely(ret < 0))
-				return 0;
-			fd_sg_nb[loop] = sg_entry_nb;
-			nb_jobs -= sg_entry_nb;
-			job_idx += sg_entry_nb;
-			sg_entry_nb = nb_jobs > DPAA2_QDMA_MAX_SG_NB ?
-						DPAA2_QDMA_MAX_SG_NB : nb_jobs;
-		}
-
-		/* Enqueue the packet to the QBMAN */
-		enqueue_loop = 0;
-
-		while (enqueue_loop < fd_nb) {
-			ret = qbman_swp_enqueue_multiple(swp,
-					&eqdesc, &fd[enqueue_loop],
-					NULL, fd_nb - enqueue_loop);
-			if (likely(ret >= 0)) {
-				for (loop = 0; loop < (uint32_t)ret; loop++)
-					nb_jobs_ret +=
-						fd_sg_nb[enqueue_loop + loop];
-				enqueue_loop += ret;
-			}
-		}
-
-		return nb_jobs_ret;
-	}
-
-	memset(fd, 0, nb_jobs * sizeof(struct qbman_fd));
-
-	while (nb_jobs > 0) {
-		num_to_send = (nb_jobs > dpaa2_eqcr_size) ?
-			dpaa2_eqcr_size : nb_jobs;
-
-		ret = qdma_vq->set_fd(qdma_vq, &fd[num_tx],
-						&job[num_tx], num_to_send);
-		if (unlikely(ret < 0))
-			break;
-
-		/* Enqueue the packet to the QBMAN */
-		enqueue_loop = 0;
-		loop = num_to_send;
-
-		while (enqueue_loop < loop) {
-			ret = qbman_swp_enqueue_multiple(swp,
-						&eqdesc,
-						&fd[num_tx + enqueue_loop],
-						NULL,
-						loop - enqueue_loop);
-			if (likely(ret >= 0))
-				enqueue_loop += ret;
-		}
-		num_tx += num_to_send;
-		nb_jobs -= loop;
-	}
-
-	qdma_vq->num_enqueues += num_tx;
-
-	return num_tx;
-}
-
-static inline int
-dpaa2_qdma_submit(void *dev_private, uint16_t vchan)
-{
-	struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
-
-	dpdmai_dev_submit_multi(qdma_vq, qdma_vq->job_list,
-				qdma_vq->num_valid_jobs);
-
-	qdma_vq->num_valid_jobs = 0;
-
-	return 0;
-}
-
-static int
-dpaa2_qdma_enqueue(void *dev_private, uint16_t vchan,
-	rte_iova_t src, rte_iova_t dst,
-	uint32_t length, uint64_t flags)
-{
-	struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
-	struct rte_dpaa2_qdma_job *job;
-	int idx, ret;
-
-	idx = (uint16_t)(qdma_vq->num_enqueues + qdma_vq->num_valid_jobs);
-
-	ret = rte_mempool_get(qdma_vq->job_pool, (void **)&job);
-	if (ret) {
-		DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
-		return -ENOSPC;
-	}
-
-	job->src = src;
-	job->dest = dst;
-	job->len = length;
-	job->flags = flags;
-	job->status = 0;
-	job->vq_id = vchan;
-
-	qdma_vq->job_list[qdma_vq->num_valid_jobs] = job;
-	qdma_vq->num_valid_jobs++;
-
-	if (flags & RTE_DMA_OP_FLAG_SUBMIT)
-		dpaa2_qdma_submit(dev_private, vchan);
-
-	return idx;
-}
-
-int
-rte_dpaa2_qdma_copy_multi(int16_t dev_id, uint16_t vchan,
-	struct rte_dpaa2_qdma_job **jobs,
-	uint16_t nb_cpls)
-{
-	struct rte_dma_fp_object *obj = &rte_dma_fp_objs[dev_id];
-	struct dpaa2_dpdmai_dev *dpdmai_dev = obj->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
-
-	return dpdmai_dev_submit_multi(qdma_vq, jobs, nb_cpls);
-}
-
-static uint16_t
-dpaa2_qdma_dequeue_multi(struct qdma_virt_queue *qdma_vq,
-	struct rte_dpaa2_qdma_job **jobs,
-	uint16_t nb_jobs)
-{
-	int ret;
-
-	if (qdma_vq->flags & DPAA2_QDMA_VQ_FD_SG_FORMAT) {
-		/** Make sure there are enough space to get jobs.*/
-		if (unlikely(nb_jobs < DPAA2_QDMA_MAX_SG_NB))
-			return -EINVAL;
-	}
-
-	/* Only dequeue when there are pending jobs on VQ */
-	if (qdma_vq->num_enqueues == qdma_vq->num_dequeues)
-		return 0;
-
-	if (!(qdma_vq->flags & DPAA2_QDMA_VQ_FD_SG_FORMAT) &&
-		qdma_vq->num_enqueues < (qdma_vq->num_dequeues + nb_jobs))
-		nb_jobs = RTE_MIN((qdma_vq->num_enqueues -
-				qdma_vq->num_dequeues), nb_jobs);
-
-	ret = qdma_vq->dequeue_job(qdma_vq, NULL, jobs, nb_jobs);
-	if (ret < 0) {
-		DPAA2_QDMA_ERR("Dequeue from DMA%d-q%d failed(%d)",
-			qdma_vq->dpdmai_dev->dpdmai_id,
-			qdma_vq->vq_id, ret);
-		return ret;
-	}
-
-	qdma_vq->num_dequeues += ret;
-	return ret;
-}
-
-static uint16_t
-dpaa2_qdma_dequeue_status(void *dev_private, uint16_t vchan,
-	const uint16_t nb_cpls,
-	uint16_t *last_idx,
-	enum rte_dma_status_code *st)
-{
-	struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
-	struct rte_dpaa2_qdma_job *jobs[DPAA2_QDMA_MAX_DESC];
-	int ret, i;
-
-	ret = dpaa2_qdma_dequeue_multi(qdma_vq, jobs, nb_cpls);
-
-	for (i = 0; i < ret; i++)
-		st[i] = jobs[i]->status;
-
-	rte_mempool_put_bulk(qdma_vq->job_pool, (void **)jobs, ret);
-
-	if (last_idx != NULL)
-		*last_idx = (uint16_t)(qdma_vq->num_dequeues - 1);
-
-	return ret;
-}
-
-static uint16_t
-dpaa2_qdma_dequeue(void *dev_private,
-	uint16_t vchan, const uint16_t nb_cpls,
-	uint16_t *last_idx, bool *has_error)
-{
-	struct dpaa2_dpdmai_dev *dpdmai_dev = dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
-	struct rte_dpaa2_qdma_job *jobs[DPAA2_QDMA_MAX_DESC];
-	int ret;
-
-	RTE_SET_USED(has_error);
-
-	ret = dpaa2_qdma_dequeue_multi(qdma_vq,
-				jobs, nb_cpls);
-
-	rte_mempool_put_bulk(qdma_vq->job_pool, (void **)jobs, ret);
-
-	if (last_idx != NULL)
-		*last_idx = (uint16_t)(qdma_vq->num_dequeues - 1);
-
-	return ret;
-}
-
-uint16_t
-rte_dpaa2_qdma_completed_multi(int16_t dev_id, uint16_t vchan,
-			       struct rte_dpaa2_qdma_job **jobs,
-			       uint16_t nb_cpls)
-{
-	struct rte_dma_fp_object *obj = &rte_dma_fp_objs[dev_id];
-	struct dpaa2_dpdmai_dev *dpdmai_dev = obj->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vchan];
-
-	return dpaa2_qdma_dequeue_multi(qdma_vq, jobs, nb_cpls);
-}
-
 static int
 dpaa2_qdma_info_get(const struct rte_dma_dev *dev,
 	struct rte_dma_info *dev_info,
@@ -1119,80 +988,22 @@ dpaa2_qdma_configure(struct rte_dma_dev *dev,
 	/* Allocate Virtual Queues */
 	sprintf(name, "qdma_%d_vq", dev->data->dev_id);
 	qdma_dev->vqs = rte_malloc(name,
-			(sizeof(struct qdma_virt_queue) * dev_conf->nb_vchans),
-			RTE_CACHE_LINE_SIZE);
+		(sizeof(struct qdma_virt_queue) * dev_conf->nb_vchans),
+		RTE_CACHE_LINE_SIZE);
 	if (!qdma_dev->vqs) {
-		DPAA2_QDMA_ERR("qdma_virtual_queues allocation failed");
+		DPAA2_QDMA_ERR("%s: VQs(%d) alloc failed.",
+			dev->data->dev_name, dev_conf->nb_vchans);
 		return -ENOMEM;
 	}
 	for (i = 0; i < dev_conf->nb_vchans; i++)
 		qdma_dev->vqs[i].vq_id = i;
 
 	qdma_dev->num_vqs = dev_conf->nb_vchans;
+	qdma_dev->is_silent = dev_conf->enable_silent;
 
 	return 0;
 }
 
-static int
-check_devargs_handler(__rte_unused const char *key,
-		      const char *value,
-		      __rte_unused void *opaque)
-{
-	if (strcmp(value, "1"))
-		return -1;
-
-	return 0;
-}
-
-static int
-dpaa2_qdma_get_devargs(struct rte_devargs *devargs, const char *key)
-{
-	struct rte_kvargs *kvlist;
-
-	if (!devargs)
-		return 0;
-
-	kvlist = rte_kvargs_parse(devargs->args, NULL);
-	if (!kvlist)
-		return 0;
-
-	if (!rte_kvargs_count(kvlist, key)) {
-		rte_kvargs_free(kvlist);
-		return 0;
-	}
-
-	if (rte_kvargs_process(kvlist, key,
-			       check_devargs_handler, NULL) < 0) {
-		rte_kvargs_free(kvlist);
-		return 0;
-	}
-	rte_kvargs_free(kvlist);
-
-	return 1;
-}
-
-/* Enable FD in Ultra Short format */
-void
-rte_dpaa2_qdma_vchan_fd_us_enable(int16_t dev_id, uint16_t vchan)
-{
-	struct rte_dma_fp_object *obj = &rte_dma_fp_objs[dev_id];
-	struct dpaa2_dpdmai_dev *dpdmai_dev = obj->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-
-	qdma_dev->vqs[vchan].flags |= DPAA2_QDMA_VQ_FD_SHORT_FORMAT;
-}
-
-/* Enable internal SG processing */
-void
-rte_dpaa2_qdma_vchan_internal_sg_enable(int16_t dev_id, uint16_t vchan)
-{
-	struct rte_dma_fp_object *obj = &rte_dma_fp_objs[dev_id];
-	struct dpaa2_dpdmai_dev *dpdmai_dev = obj->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-
-	qdma_dev->vqs[vchan].flags |= DPAA2_QDMA_VQ_FD_SG_FORMAT;
-}
-
 static int
 dpaa2_qdma_vchan_rbp_set(struct qdma_virt_queue *vq,
 	const struct rte_dma_vchan_conf *conf)
@@ -1236,8 +1047,8 @@ dpaa2_qdma_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
 	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
 	uint32_t pool_size;
 	char pool_name[64];
-	int fd_long_format = 1;
-	int sg_enable = 0, ret;
+	int ret;
+	char *env = NULL;
 
 	DPAA2_QDMA_FUNC_TRACE();
 
@@ -1247,85 +1058,70 @@ dpaa2_qdma_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
 	if (ret)
 		return ret;
 
-	if (qdma_dev->vqs[vchan].flags & DPAA2_QDMA_VQ_FD_SG_FORMAT)
-		sg_enable = 1;
-
-	if (qdma_dev->vqs[vchan].flags & DPAA2_QDMA_VQ_FD_SHORT_FORMAT)
-		fd_long_format = 0;
-
-	if (dev->data->dev_conf.enable_silent)
-		qdma_dev->vqs[vchan].flags |= DPAA2_QDMA_VQ_NO_RESPONSE;
+	/**Default enable FLE PRE POPULATE*/
+	env = getenv("DPAA2_QDMA_FLE_PRE_POPULATE");
+	if (env)
+		qdma_dev->vqs[vchan].fle_pre_populate = atoi(env);
+	else
+		qdma_dev->vqs[vchan].fle_pre_populate = 1;
 
-	if (sg_enable) {
-		if (qdma_dev->num_vqs != 1) {
-			DPAA2_QDMA_ERR(
-				"qDMA SG format only supports physical queue!");
-			return -ENODEV;
-		}
-		if (!fd_long_format) {
-			DPAA2_QDMA_ERR(
-				"qDMA SG format only supports long FD format!");
-			return -ENODEV;
-		}
-		pool_size = QDMA_FLE_SG_POOL_SIZE;
-	} else {
-		pool_size = QDMA_FLE_SINGLE_POOL_SIZE;
-	}
+	env = getenv("DPAA2_QDMA_DESC_DEBUG");
+	if (env && atoi(env))
+		qdma_dev->vqs[vchan].flags |= DPAA2_QDMA_DESC_DEBUG_FLAG;
+	else
+		qdma_dev->vqs[vchan].flags &= (~DPAA2_QDMA_DESC_DEBUG_FLAG);
 
 	snprintf(pool_name, sizeof(pool_name),
 		"qdma_fle_pool_dev%d_qid%d", dpdmai_dev->dpdmai_id, vchan);
+	pool_size = RTE_MAX(sizeof(struct qdma_cntx_sg),
+			    sizeof(struct qdma_cntx_long));
+
 	qdma_dev->vqs[vchan].fle_pool = rte_mempool_create(pool_name,
-			conf->nb_desc, pool_size,
-			QDMA_FLE_CACHE_SIZE(conf->nb_desc), 0,
-			NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
+			DPAA2_QDMA_MAX_DESC * 2, pool_size,
+			512, 0, NULL, NULL, NULL, NULL,
+			SOCKET_ID_ANY, 0);
 	if (!qdma_dev->vqs[vchan].fle_pool) {
-		DPAA2_QDMA_ERR("qdma_fle_pool create failed");
-		return -ENOMEM;
-	}
-
-	snprintf(pool_name, sizeof(pool_name),
-		"qdma_job_pool_dev%d_qid%d", dpdmai_dev->dpdmai_id, vchan);
-	qdma_dev->vqs[vchan].job_pool = rte_mempool_create(pool_name,
-			conf->nb_desc, pool_size,
-			QDMA_FLE_CACHE_SIZE(conf->nb_desc), 0,
-			NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
-	if (!qdma_dev->vqs[vchan].job_pool) {
-		DPAA2_QDMA_ERR("qdma_job_pool create failed");
+		DPAA2_QDMA_ERR("%s create failed", pool_name);
 		return -ENOMEM;
 	}
 
-	if (fd_long_format) {
-		if (sg_enable) {
-			qdma_dev->vqs[vchan].set_fd = dpdmai_dev_set_sg_fd_lf;
-			qdma_dev->vqs[vchan].get_job = dpdmai_dev_get_sg_job_lf;
-		} else {
-			if (dev->data->dev_conf.enable_silent)
-				qdma_dev->vqs[vchan].set_fd =
-					dpdmai_dev_set_multi_fd_lf_no_rsp;
-			else
-				qdma_dev->vqs[vchan].set_fd =
-					dpdmai_dev_set_multi_fd_lf;
-			qdma_dev->vqs[vchan].get_job = dpdmai_dev_get_single_job_lf;
+	if (qdma_dev->is_silent) {
+		ret = rte_mempool_get_bulk(qdma_dev->vqs[vchan].fle_pool,
+				(void **)qdma_dev->vqs[vchan].cntx_sg,
+				DPAA2_QDMA_MAX_DESC);
+		if (ret) {
+			DPAA2_QDMA_ERR("sg cntx get from %s for silent mode",
+				       pool_name);
+			return ret;
+		}
+		ret = rte_mempool_get_bulk(qdma_dev->vqs[vchan].fle_pool,
+				(void **)qdma_dev->vqs[vchan].cntx_long,
+				DPAA2_QDMA_MAX_DESC);
+		if (ret) {
+			DPAA2_QDMA_ERR("long cntx get from %s for silent mode",
+				       pool_name);
+			return ret;
 		}
 	} else {
-		qdma_dev->vqs[vchan].set_fd = dpdmai_dev_set_fd_us;
-		qdma_dev->vqs[vchan].get_job = dpdmai_dev_get_job_us;
-	}
-
-	if (dpaa2_qdma_get_devargs(dev->device->devargs,
-			DPAA2_QDMA_PREFETCH)) {
-		/* If no prefetch is configured. */
-		qdma_dev->vqs[vchan].dequeue_job =
-				dpdmai_dev_dequeue_multijob_prefetch;
-		DPAA2_QDMA_INFO("Prefetch RX Mode enabled");
-	} else {
-		qdma_dev->vqs[vchan].dequeue_job =
-			dpdmai_dev_dequeue_multijob_no_prefetch;
+		qdma_dev->vqs[vchan].ring_cntx_idx = rte_malloc(NULL,
+				sizeof(struct qdma_cntx_idx_ring),
+				RTE_CACHE_LINE_SIZE);
+		if (!qdma_dev->vqs[vchan].ring_cntx_idx) {
+			DPAA2_QDMA_ERR("DQ response ring alloc failed.");
+			return -ENOMEM;
+		}
+		qdma_dev->vqs[vchan].ring_cntx_idx->start = 0;
+		qdma_dev->vqs[vchan].ring_cntx_idx->tail = 0;
+		qdma_dev->vqs[vchan].ring_cntx_idx->free_space =
+				QDMA_CNTX_IDX_RING_MAX_FREE;
+		qdma_dev->vqs[vchan].ring_cntx_idx->nb_in_ring = 0;
+		qdma_dev->vqs[vchan].fle_elem = rte_malloc(NULL,
+				sizeof(void *) * DPAA2_QDMA_MAX_DESC,
+				RTE_CACHE_LINE_SIZE);
 	}
 
 	qdma_dev->vqs[vchan].dpdmai_dev = dpdmai_dev;
 	qdma_dev->vqs[vchan].nb_desc = conf->nb_desc;
-	qdma_dev->vqs[vchan].enqueue_job = dpdmai_dev_submit_multi;
 
 	return 0;
 }
@@ -1374,9 +1170,12 @@ dpaa2_qdma_reset(struct rte_dma_dev *dev)
 
 	/* In case there are pending jobs on any VQ, return -EBUSY */
 	for (i = 0; i < qdma_dev->num_vqs; i++) {
-		if (qdma_dev->vqs[i].in_use && (qdma_dev->vqs[i].num_enqueues !=
-		    qdma_dev->vqs[i].num_dequeues)) {
-			DPAA2_QDMA_ERR("Jobs are still pending on VQ: %d", i);
+		if ((qdma_dev->vqs[i].num_enqueues !=
+		    qdma_dev->vqs[i].num_dequeues) &&
+		    !qdma_dev->is_silent) {
+			DPAA2_QDMA_ERR("VQ(%d) pending: eq(%"PRIu64") != dq(%"PRId64")",
+				i, qdma_dev->vqs[i].num_enqueues,
+				qdma_dev->vqs[i].num_dequeues);
 			return -EBUSY;
 		}
 	}
@@ -1618,7 +1417,7 @@ dpaa2_dpdmai_dev_init(struct rte_dma_dev *dev, int dpdmai_id)
 
 static int
 dpaa2_qdma_probe(struct rte_dpaa2_driver *dpaa2_drv,
-		 struct rte_dpaa2_device *dpaa2_dev)
+	struct rte_dpaa2_device *dpaa2_dev)
 {
 	struct rte_dma_dev *dmadev;
 	int ret;
@@ -1628,8 +1427,8 @@ dpaa2_qdma_probe(struct rte_dpaa2_driver *dpaa2_drv,
 	RTE_SET_USED(dpaa2_drv);
 
 	dmadev = rte_dma_pmd_allocate(dpaa2_dev->device.name,
-				      rte_socket_id(),
-				      sizeof(struct dpaa2_dpdmai_dev));
+		rte_socket_id(),
+		sizeof(struct dpaa2_dpdmai_dev));
 	if (!dmadev) {
 		DPAA2_QDMA_ERR("Unable to allocate dmadevice");
 		return -EINVAL;
@@ -1639,10 +1438,10 @@ dpaa2_qdma_probe(struct rte_dpaa2_driver *dpaa2_drv,
 	dmadev->dev_ops = &dpaa2_qdma_ops;
 	dmadev->device = &dpaa2_dev->device;
 	dmadev->fp_obj->dev_private = dmadev->data->dev_private;
-	dmadev->fp_obj->copy = dpaa2_qdma_enqueue;
+	dmadev->fp_obj->copy = dpaa2_qdma_copy;
+	dmadev->fp_obj->copy_sg = dpaa2_qdma_copy_sg;
 	dmadev->fp_obj->submit = dpaa2_qdma_submit;
 	dmadev->fp_obj->completed = dpaa2_qdma_dequeue;
-	dmadev->fp_obj->completed_status = dpaa2_qdma_dequeue_status;
 	dmadev->fp_obj->burst_capacity = dpaa2_qdma_burst_capacity;
 
 	/* Invoke PMD device initialization function */
diff --git a/drivers/dma/dpaa2/dpaa2_qdma.h b/drivers/dma/dpaa2/dpaa2_qdma.h
index 786dcb9308..ee34532408 100644
--- a/drivers/dma/dpaa2/dpaa2_qdma.h
+++ b/drivers/dma/dpaa2/dpaa2_qdma.h
@@ -5,7 +5,7 @@
 #ifndef _DPAA2_QDMA_H_
 #define _DPAA2_QDMA_H_
 
-#define DPAA2_QDMA_MAX_DESC		1024
+#define DPAA2_QDMA_MAX_DESC		4096
 #define DPAA2_QDMA_MIN_DESC		1
 #define DPAA2_QDMA_MAX_VHANS		64
 
@@ -13,48 +13,9 @@
 #define DPAA2_QDMA_VQ_FD_SG_FORMAT		(1ULL << 1)
 #define DPAA2_QDMA_VQ_NO_RESPONSE		(1ULL << 2)
 
-#define DPAA2_QDMA_MAX_FLE 3
-#define DPAA2_QDMA_MAX_SDD 2
-
-#define DPAA2_QDMA_MAX_SG_NB 64
-
 #define DPAA2_DPDMAI_MAX_QUEUES	16
 
-/** FLE single job pool size: job pointer(uint64_t) +
- * 3 Frame list + 2 source/destination descriptor.
- */
-#define QDMA_FLE_SINGLE_POOL_SIZE (sizeof(uint64_t) + \
-			sizeof(struct qbman_fle) * DPAA2_QDMA_MAX_FLE + \
-			sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD)
-
-/** FLE sg jobs pool size: job number(uint64_t) +
- * 3 Frame list + 2 source/destination descriptor  +
- * 64 (src + dst) sg entries + 64 jobs pointers.
- */
-#define QDMA_FLE_SG_POOL_SIZE (sizeof(uint64_t) + \
-		sizeof(struct qbman_fle) * DPAA2_QDMA_MAX_FLE + \
-		sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD + \
-		sizeof(struct qdma_sg_entry) * (DPAA2_QDMA_MAX_SG_NB * 2) + \
-		sizeof(struct rte_qdma_job *) * DPAA2_QDMA_MAX_SG_NB)
-
-#define QDMA_FLE_JOB_NB_OFFSET 0
-
-#define QDMA_FLE_SINGLE_JOB_OFFSET 0
-
-#define QDMA_FLE_FLE_OFFSET \
-		(QDMA_FLE_JOB_NB_OFFSET + sizeof(uint64_t))
-
-#define QDMA_FLE_SDD_OFFSET \
-		(QDMA_FLE_FLE_OFFSET + \
-		sizeof(struct qbman_fle) * DPAA2_QDMA_MAX_FLE)
-
-#define QDMA_FLE_SG_ENTRY_OFFSET \
-		(QDMA_FLE_SDD_OFFSET + \
-		sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD)
-
-#define QDMA_FLE_SG_JOBS_OFFSET \
-		(QDMA_FLE_SG_ENTRY_OFFSET + \
-		sizeof(struct qdma_sg_entry) * DPAA2_QDMA_MAX_SG_NB * 2)
+#define QDMA_JOB_HW_CNTX_IDX (RTE_DPAA2_QDMA_JOB_USR_CNTX_IDX + 1)
 
 /** FLE pool cache size */
 #define QDMA_FLE_CACHE_SIZE(_num) (_num/(RTE_MAX_LCORE * 2))
@@ -202,6 +163,39 @@ struct dpaa2_qdma_rbp {
 	uint32_t rsv:2;
 };
 
+enum {
+	DPAA2_QDMA_SDD_FLE,
+	DPAA2_QDMA_SRC_FLE,
+	DPAA2_QDMA_DST_FLE,
+	DPAA2_QDMA_MAX_FLE
+};
+
+enum {
+	DPAA2_QDMA_SRC_SDD,
+	DPAA2_QDMA_DST_SDD,
+	DPAA2_QDMA_MAX_SDD
+};
+
+struct qdma_cntx_fle_sdd {
+	struct qbman_fle fle[DPAA2_QDMA_MAX_FLE];
+	struct qdma_sdd sdd[DPAA2_QDMA_MAX_SDD];
+} __rte_packed;
+
+struct qdma_cntx_sg {
+	struct qdma_cntx_fle_sdd fle_sdd;
+	struct qdma_sg_entry sg_src_entry[RTE_DPAA2_QDMA_JOB_SUBMIT_MAX];
+	struct qdma_sg_entry sg_dst_entry[RTE_DPAA2_QDMA_JOB_SUBMIT_MAX];
+	uint16_t cntx_idx[RTE_DPAA2_QDMA_JOB_SUBMIT_MAX];
+	uint16_t job_nb;
+	uint16_t rsv[3];
+} __rte_packed;
+
+struct qdma_cntx_long {
+	struct qdma_cntx_fle_sdd fle_sdd;
+	uint16_t cntx_idx;
+	uint16_t rsv[3];
+} __rte_packed;
+
 /** Represents a DPDMAI device */
 struct dpaa2_dpdmai_dev {
 	/** Pointer to Next device instance */
@@ -221,27 +215,18 @@ struct dpaa2_dpdmai_dev {
 	struct qdma_device *qdma_dev;
 };
 
-struct qdma_virt_queue;
-
-typedef uint16_t (qdma_get_job_t)(struct qdma_virt_queue *qdma_vq,
-					const struct qbman_fd *fd,
-					struct rte_dpaa2_qdma_job **job,
-					uint16_t *nb_jobs);
-typedef int (qdma_set_fd_t)(struct qdma_virt_queue *qdma_vq,
-					struct qbman_fd *fd,
-					struct rte_dpaa2_qdma_job **job,
-					uint16_t nb_jobs);
-
-typedef int (qdma_dequeue_multijob_t)(
-				struct qdma_virt_queue *qdma_vq,
-				uint16_t *vq_id,
-				struct rte_dpaa2_qdma_job **job,
-				uint16_t nb_jobs);
+#define QDMA_CNTX_IDX_RING_EXTRA_SPACE 64
+#define QDMA_CNTX_IDX_RING_MAX_FREE \
+	(DPAA2_QDMA_MAX_DESC - QDMA_CNTX_IDX_RING_EXTRA_SPACE)
+struct qdma_cntx_idx_ring {
+	uint16_t cntx_idx_ring[DPAA2_QDMA_MAX_DESC];
+	uint16_t start;
+	uint16_t tail;
+	uint16_t free_space;
+	uint16_t nb_in_ring;
+};
 
-typedef int (qdma_enqueue_multijob_t)(
-			struct qdma_virt_queue *qdma_vq,
-			struct rte_dpaa2_qdma_job **job,
-			uint16_t nb_jobs);
+#define DPAA2_QDMA_DESC_DEBUG_FLAG (1 << 0)
 
 /** Represents a QDMA virtual queue */
 struct qdma_virt_queue {
@@ -249,10 +234,11 @@ struct qdma_virt_queue {
 	struct dpaa2_dpdmai_dev *dpdmai_dev;
 	/** FLE pool for the queue */
 	struct rte_mempool *fle_pool;
+	void **fle_elem;
 	/** Route by port */
 	struct dpaa2_qdma_rbp rbp;
 	/** States if this vq is in use or not */
-	uint8_t in_use;
+	uint8_t fle_pre_populate;
 	/** Number of descriptor for the virtual DMA channel */
 	uint16_t nb_desc;
 	/* Total number of enqueues on this VQ */
@@ -262,18 +248,17 @@ struct qdma_virt_queue {
 
 	uint16_t vq_id;
 	uint32_t flags;
+	struct qbman_fd fd[DPAA2_QDMA_MAX_DESC];
+	uint16_t fd_idx;
+	struct qdma_cntx_idx_ring *ring_cntx_idx;
+
+	/**Used for silent enabled*/
+	struct qdma_cntx_sg *cntx_sg[DPAA2_QDMA_MAX_DESC];
+	struct qdma_cntx_long *cntx_long[DPAA2_QDMA_MAX_DESC];
 
-	struct rte_dpaa2_qdma_job *job_list[DPAA2_QDMA_MAX_DESC];
-	struct rte_mempool *job_pool;
 	int num_valid_jobs;
 
 	struct rte_dma_stats stats;
-
-	qdma_set_fd_t *set_fd;
-	qdma_get_job_t *get_job;
-
-	qdma_dequeue_multijob_t *dequeue_job;
-	qdma_enqueue_multijob_t *enqueue_job;
 };
 
 /** Represents a QDMA device. */
@@ -284,6 +269,7 @@ struct qdma_device {
 	uint16_t num_vqs;
 	/** Device state - started or stopped */
 	uint8_t state;
+	uint8_t is_silent;
 };
 
 #endif /* _DPAA2_QDMA_H_ */
diff --git a/drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h b/drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h
index b0bf9d8bcc..729bff42bb 100644
--- a/drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h
+++ b/drivers/dma/dpaa2/rte_pmd_dpaa2_qdma.h
@@ -7,118 +7,19 @@
 
 #include <rte_compat.h>
 
-/** States if the source addresses is physical. */
-#define RTE_DPAA2_QDMA_JOB_SRC_PHY		(1ULL << 30)
+#define RTE_DPAA2_QDMA_IDX_SHIFT_POS 20
+#define RTE_DPAA2_QDMA_LEN_MASK \
+	(~((~0u) << RTE_DPAA2_QDMA_IDX_SHIFT_POS))
 
-/** States if the destination addresses is physical. */
-#define RTE_DPAA2_QDMA_JOB_DEST_PHY		(1ULL << 31)
+#define RTE_DPAA2_QDMA_IDX_LEN(idx, len) \
+	((uint32_t)((idx << RTE_DPAA2_QDMA_IDX_SHIFT_POS) | (len & RTE_DPAA2_QDMA_LEN_MASK)))
 
-/** Determines a QDMA job */
-struct rte_dpaa2_qdma_job {
-	/** Source Address from where DMA is (to be) performed */
-	uint64_t src;
-	/** Destination Address where DMA is (to be) done */
-	uint64_t dest;
-	/** Length of the DMA operation in bytes. */
-	uint32_t len;
-	/** See RTE_QDMA_JOB_ flags */
-	uint32_t flags;
-	/**
-	 * Status of the transaction.
-	 * This is filled in the dequeue operation by the driver.
-	 * upper 8bits acc_err for route by port.
-	 * lower 8bits fd error
-	 */
-	uint16_t status;
-	uint16_t vq_id;
-	uint64_t cnxt;
-	/**
-	 * FLE pool element maintained by user, in case no qDMA response.
-	 * Note: the address must be allocated from DPDK memory pool.
-	 */
-	void *usr_elem;
-};
+#define RTE_DPAA2_QDMA_IDX_FROM_LENGTH(length) \
+	((uint16_t)((length) >> RTE_DPAA2_QDMA_IDX_SHIFT_POS))
 
-/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice.
- *
- * Enable FD in Ultra Short format on a channel. This API should be
- * called before calling 'rte_dma_vchan_setup()' API.
- *
- * @param dev_id
- *   The identifier of the device.
- * @param vchan
- *   The identifier of virtual DMA channel.
- */
-__rte_experimental
-void rte_dpaa2_qdma_vchan_fd_us_enable(int16_t dev_id, uint16_t vchan);
-
-/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice.
- *
- * Enable internal SG processing on a channel. This API should be
- * called before calling 'rte_dma_vchan_setup()' API.
- *
- * @param dev_id
- *   The identifier of the device.
- * @param vchan
- *   The identifier of virtual DMA channel.
- */
-__rte_experimental
-void rte_dpaa2_qdma_vchan_internal_sg_enable(int16_t dev_id, uint16_t vchan);
+#define RTE_DPAA2_QDMA_LEN_FROM_LENGTH(length) \
+	((length) & RTE_DPAA2_QDMA_LEN_MASK)
 
-/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice.
- *
- * Enqueue a copy operation onto the virtual DMA channel for silent mode,
- * when dequeue is not required.
- *
- * This queues up a copy operation to be performed by hardware, if the 'flags'
- * parameter contains RTE_DMA_OP_FLAG_SUBMIT then trigger doorbell to begin
- * this operation, otherwise do not trigger doorbell.
- *
- * @param dev_id
- *   The identifier of the device.
- * @param vchan
- *   The identifier of virtual DMA channel.
- * @param jobs
- *   Jobs to be submitted to QDMA.
- * @param nb_cpls
- *   Number of DMA jobs.
- *
- * @return
- *   - >= 0..Number of enqueued job.
- *   - -ENOSPC: if no space left to enqueue.
- *   - other values < 0 on failure.
- */
-__rte_experimental
-int rte_dpaa2_qdma_copy_multi(int16_t dev_id, uint16_t vchan,
-		struct rte_dpaa2_qdma_job **jobs, uint16_t nb_cpls);
-
-/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice.
- *
- * Return the number of operations that have been successfully completed.
- *
- * @param dev_id
- *   The identifier of the device.
- * @param vchan
- *   The identifier of virtual DMA channel.
- * @param jobs
- *   Jobs completed by QDMA.
- * @param nb_cpls
- *   Number of completed DMA jobs.
- *
- * @return
- *   The number of operations that successfully completed. This return value
- *   must be less than or equal to the value of nb_cpls.
- */
-__rte_experimental
-uint16_t rte_dpaa2_qdma_completed_multi(int16_t dev_id, uint16_t vchan,
-		struct rte_dpaa2_qdma_job **jobs, uint16_t nb_cpls);
+#define RTE_DPAA2_QDMA_JOB_SUBMIT_MAX (32 + 8)
 
 #endif /* _RTE_PMD_DPAA2_QDMA_H_ */
diff --git a/drivers/dma/dpaa2/version.map b/drivers/dma/dpaa2/version.map
deleted file mode 100644
index eb012cfbfc..0000000000
--- a/drivers/dma/dpaa2/version.map
+++ /dev/null
@@ -1,13 +0,0 @@
-DPDK_24 {
-	local: *;
-};
-
-EXPERIMENTAL {
-	global:
-
-	# added in 22.07
-	rte_dpaa2_qdma_completed_multi;
-	rte_dpaa2_qdma_copy_multi;
-	rte_dpaa2_qdma_vchan_fd_us_enable;
-	rte_dpaa2_qdma_vchan_internal_sg_enable;
-};
-- 
2.25.1


  parent reply	other threads:[~2024-07-19 10:01 UTC|newest]

Thread overview: 94+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-19 10:00 [PATCH 01/30] dma/dpaa2: configure route by port by PCIe port param Gagandeep Singh
2024-07-19 10:00 ` [PATCH 02/30] dma/dpaa2: support multiple HW queues Gagandeep Singh
2024-07-19 10:00 ` Gagandeep Singh [this message]
2024-07-19 10:01 ` [PATCH 04/30] dma/dpaa2: multiple process support Gagandeep Singh
2024-07-19 10:01 ` [PATCH 05/30] dma/dpaa2: add sanity check for SG entry Gagandeep Singh
2024-07-19 10:01 ` [PATCH 06/30] dma/dpaa2: include DPAA2 specific header files Gagandeep Singh
2024-07-19 10:01 ` [PATCH 07/30] dma/dpaa2: borrow flags of DMA operation to pass job context Gagandeep Singh
2024-07-19 10:01 ` [PATCH 08/30] bus/fslmc: enhance the qbman dq storage logic Gagandeep Singh
2024-07-19 10:01 ` [PATCH 09/30] dma/dpaa2: add short FD support Gagandeep Singh
2024-07-19 10:01 ` [PATCH 10/30] dma/dpaa2: limit the max descriptor number Gagandeep Singh
2024-07-19 10:01 ` [PATCH 11/30] dma/dpaa2: change the DMA copy return value Gagandeep Singh
2024-07-19 10:01 ` [PATCH 12/30] dma/dpaa2: move the qdma header to common place Gagandeep Singh
2024-07-19 10:01 ` [PATCH 13/30] dma/dpaa: support multi channels Gagandeep Singh
2024-07-19 10:01 ` [PATCH 14/30] dma/dpaa: fix job enqueue Gagandeep Singh
2024-07-19 10:01 ` [PATCH 15/30] dma/dpaa: add burst capacity API Gagandeep Singh
2024-07-19 10:01 ` [PATCH 16/30] dma/dpaa: add workaround for ERR050757 Gagandeep Singh
2024-07-19 10:01 ` [PATCH 17/30] dma/dpaa: qdma stall workaround for ERR050265 Gagandeep Singh
2024-07-19 10:01 ` [PATCH 18/30] dma/dpaa: remove unwanted desc Gagandeep Singh
2024-07-19 10:01 ` [PATCH 19/30] dma/dpaa: data path optimization Gagandeep Singh
2024-07-19 10:01 ` [PATCH 20/30] dma/dpaa: refactor driver Gagandeep Singh
2024-07-19 10:01 ` [PATCH 21/30] dma/dpaa: dequeue status queue Gagandeep Singh
2024-07-19 10:01 ` [PATCH 22/30] dma/dpaa: add Scatter Gather support Gagandeep Singh
2024-07-19 10:01 ` [PATCH 23/30] dma/dpaa: block dequeue Gagandeep Singh
2024-07-19 10:01 ` [PATCH 24/30] dma/dpaa: improve congestion handling Gagandeep Singh
2024-07-19 10:01 ` [PATCH 25/30] dma/dpaa: disable SG descriptor as default Gagandeep Singh
2024-07-19 10:01 ` [PATCH 26/30] dma/dpaa: improve ERRATA workaround solution Gagandeep Singh
2024-07-19 10:01 ` [PATCH 27/30] dma/dpaa: improve silent mode support Gagandeep Singh
2024-07-19 10:01 ` [PATCH 28/30] dma/dpaa: support multiple SG copies Gagandeep Singh
2024-07-19 10:01 ` [PATCH 29/30] dma/dpaa: support max SG entry size Gagandeep Singh
2024-07-19 10:01 ` [PATCH 30/30] bus/dpaa: add port bmi stats Gagandeep Singh
2024-07-22 11:58 ` [v2 00/30] NXP DMA driver fixes and Enhancements Gagandeep Singh
2024-07-22 11:58   ` [v2 01/30] dma/dpaa2: configure route by port by PCIe port param Gagandeep Singh
2024-07-22 11:58   ` [v2 02/30] dma/dpaa2: support multiple HW queues Gagandeep Singh
2024-07-22 11:58   ` [v2 03/30] dma/dpaa2: adapt DMA driver API Gagandeep Singh
2024-07-22 11:58   ` [v2 04/30] dma/dpaa2: multiple process support Gagandeep Singh
2024-07-22 11:58   ` [v2 05/30] dma/dpaa2: add sanity check for SG entry Gagandeep Singh
2024-07-22 11:58   ` [v2 06/30] dma/dpaa2: include DPAA2 specific header files Gagandeep Singh
2024-07-22 11:58   ` [v2 07/30] dma/dpaa2: borrow flags of DMA operation to pass job context Gagandeep Singh
2024-07-22 11:58   ` [v2 08/30] bus/fslmc: enhance the qbman dq storage logic Gagandeep Singh
2024-07-22 11:58   ` [v2 09/30] dma/dpaa2: add short FD support Gagandeep Singh
2024-07-22 11:58   ` [v2 10/30] dma/dpaa2: limit the max descriptor number Gagandeep Singh
2024-07-22 11:58   ` [v2 11/30] dma/dpaa2: change the DMA copy return value Gagandeep Singh
2024-07-22 11:58   ` [v2 12/30] dma/dpaa2: move the qdma header to common place Gagandeep Singh
2024-07-22 11:58   ` [v2 13/30] dma/dpaa: support multi channels Gagandeep Singh
2024-07-22 11:58   ` [v2 14/30] dma/dpaa: fix job enqueue Gagandeep Singh
2024-07-22 11:58   ` [v2 15/30] dma/dpaa: add burst capacity API Gagandeep Singh
2024-07-22 11:58   ` [v2 16/30] dma/dpaa: add workaround for ERR050757 Gagandeep Singh
2024-07-22 11:58   ` [v2 17/30] dma/dpaa: qdma stall workaround for ERR050265 Gagandeep Singh
2024-07-22 11:58   ` [v2 18/30] dma/dpaa: remove unwanted desc Gagandeep Singh
2024-07-22 11:58   ` [v2 19/30] dma/dpaa: data path optimization Gagandeep Singh
2024-07-22 11:58   ` [v2 20/30] dma/dpaa: refactor driver Gagandeep Singh
2024-07-22 11:58   ` [v2 21/30] dma/dpaa: dequeue status queue Gagandeep Singh
2024-07-22 11:58   ` [v2 22/30] dma/dpaa: add Scatter Gather support Gagandeep Singh
2024-07-22 11:58   ` [v2 23/30] dma/dpaa: block dequeue Gagandeep Singh
2024-07-22 11:58   ` [v2 24/30] dma/dpaa: improve congestion handling Gagandeep Singh
2024-07-22 11:58   ` [v2 25/30] dma/dpaa: disable SG descriptor as default Gagandeep Singh
2024-07-22 11:58   ` [v2 26/30] dma/dpaa: improve ERRATA workaround solution Gagandeep Singh
2024-07-22 11:58   ` [v2 27/30] dma/dpaa: improve silent mode support Gagandeep Singh
2024-07-22 11:58   ` [v2 28/30] dma/dpaa: support multiple SG copies Gagandeep Singh
2024-07-22 11:58   ` [v2 29/30] dma/dpaa: support max SG entry size Gagandeep Singh
2024-07-22 11:58   ` [v2 30/30] bus/dpaa: add port bmi stats Gagandeep Singh
2024-07-22 16:39   ` [v3 00/30] NXP DMA driver fixes and Enhancements Gagandeep Singh
2024-07-22 16:39     ` [v3 01/30] dma/dpaa2: configure route by port by PCIe port param Gagandeep Singh
2024-07-22 16:39     ` [v3 02/30] dma/dpaa2: support multiple HW queues Gagandeep Singh
2024-07-22 20:19       ` Stephen Hemminger
2024-07-22 16:39     ` [v3 03/30] dma/dpaa2: adapt DMA driver API Gagandeep Singh
2024-07-22 16:39     ` [v3 04/30] dma/dpaa2: multiple process support Gagandeep Singh
2024-07-22 16:39     ` [v3 05/30] dma/dpaa2: add sanity check for SG entry Gagandeep Singh
2024-07-22 20:21       ` Stephen Hemminger
2024-07-22 16:39     ` [v3 06/30] dma/dpaa2: include DPAA2 specific header files Gagandeep Singh
2024-07-22 16:39     ` [v3 07/30] dma/dpaa2: borrow flags of DMA operation to pass job context Gagandeep Singh
2024-07-22 16:39     ` [v3 08/30] bus/fslmc: enhance the qbman dq storage logic Gagandeep Singh
2024-07-22 16:39     ` [v3 09/30] dma/dpaa2: add short FD support Gagandeep Singh
2024-07-22 16:39     ` [v3 10/30] dma/dpaa2: limit the max descriptor number Gagandeep Singh
2024-07-22 16:39     ` [v3 11/30] dma/dpaa2: change the DMA copy return value Gagandeep Singh
2024-07-22 16:39     ` [v3 12/30] dma/dpaa2: move the qdma header to common place Gagandeep Singh
2024-07-22 16:39     ` [v3 13/30] dma/dpaa: support multi channels Gagandeep Singh
2024-07-22 16:39     ` [v3 14/30] dma/dpaa: fix job enqueue Gagandeep Singh
2024-07-22 16:39     ` [v3 15/30] dma/dpaa: add burst capacity API Gagandeep Singh
2024-07-22 16:39     ` [v3 16/30] dma/dpaa: add workaround for ERR050757 Gagandeep Singh
2024-07-22 16:39     ` [v3 17/30] dma/dpaa: qdma stall workaround for ERR050265 Gagandeep Singh
2024-07-22 16:39     ` [v3 18/30] dma/dpaa: remove unwanted desc Gagandeep Singh
2024-07-22 16:39     ` [v3 19/30] dma/dpaa: data path optimization Gagandeep Singh
2024-07-22 16:39     ` [v3 20/30] dma/dpaa: refactor driver Gagandeep Singh
2024-07-22 16:39     ` [v3 21/30] dma/dpaa: dequeue status queue Gagandeep Singh
2024-07-22 16:39     ` [v3 22/30] dma/dpaa: add Scatter Gather support Gagandeep Singh
2024-07-22 16:39     ` [v3 23/30] dma/dpaa: block dequeue Gagandeep Singh
2024-07-22 16:39     ` [v3 24/30] dma/dpaa: improve congestion handling Gagandeep Singh
2024-07-22 16:39     ` [v3 25/30] dma/dpaa: disable SG descriptor as default Gagandeep Singh
2024-07-22 16:39     ` [v3 26/30] dma/dpaa: improve ERRATA workaround solution Gagandeep Singh
2024-07-22 16:39     ` [v3 27/30] dma/dpaa: improve silent mode support Gagandeep Singh
2024-07-22 16:39     ` [v3 28/30] dma/dpaa: support multiple SG copies Gagandeep Singh
2024-07-22 16:39     ` [v3 29/30] dma/dpaa: support max SG entry size Gagandeep Singh
2024-07-22 16:39     ` [v3 30/30] bus/dpaa: add port bmi stats Gagandeep Singh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240719100126.1150373-3-g.singh@nxp.com \
    --to=g.singh@nxp.com \
    --cc=dev@dpdk.org \
    --cc=hemant.agrawal@nxp.com \
    --cc=jun.yang@nxp.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).