DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH 0/7] raw/dpaa2_qdma: driver enhancement
@ 2020-09-07  9:25 Gagandeep Singh
  2020-09-07  9:25 ` [dpdk-dev] [PATCH 1/7] raw/dpaa2_qdma: change DPAA2 QDMA APIs to rawdev ops Gagandeep Singh
                   ` (8 more replies)
  0 siblings, 9 replies; 23+ messages in thread
From: Gagandeep Singh @ 2020-09-07  9:25 UTC (permalink / raw)
  To: dev, nipun.gupta, hemant.agrawal; +Cc: thomas.monjalon, Gagandeep Singh

In this patchset, we have done some changes in dpaa2_qdma driver
related to rawdev APIs, optimizations, scatter-gather support on TX,
enqueue without wait.

Gagandeep Singh (2):
  raw/dpaa2_qdma: change DPAA2 QDMA APIs to rawdev ops
  raw/dpaa2_qdma: memset to only required memory

Jun Yang (5):
  raw/dpaa2_qdma: refactor the code
  raw/dpaa2_qdma: optimize IOVA conversion
  raw/dpaa2_qdma: support scatter gather in enqueue
  raw/dpaa2_qdma: support FLE pool per queue
  raw/dpaa2_qdma: support enqueue without response wait

 drivers/bus/fslmc/portal/dpaa2_hw_pvt.h     |   18 +-
 drivers/raw/dpaa2_qdma/dpaa2_qdma.c         | 1824 ++++++++++++++++-----------
 drivers/raw/dpaa2_qdma/dpaa2_qdma.h         |  128 +-
 drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h |  231 +---
 4 files changed, 1257 insertions(+), 944 deletions(-)

-- 
2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH 1/7] raw/dpaa2_qdma: change DPAA2 QDMA APIs to rawdev ops
  2020-09-07  9:25 [dpdk-dev] [PATCH 0/7] raw/dpaa2_qdma: driver enhancement Gagandeep Singh
@ 2020-09-07  9:25 ` Gagandeep Singh
  2020-09-25 10:53   ` Hemant Agrawal
  2020-10-06 21:36   ` Thomas Monjalon
  2020-09-07  9:25 ` [dpdk-dev] [PATCH 2/7] raw/dpaa2_qdma: memset to only required memory Gagandeep Singh
                   ` (7 subsequent siblings)
  8 siblings, 2 replies; 23+ messages in thread
From: Gagandeep Singh @ 2020-09-07  9:25 UTC (permalink / raw)
  To: dev, nipun.gupta, hemant.agrawal; +Cc: thomas.monjalon, Gagandeep Singh

dpaa2_qdma was partially using direct pmd APIs.
This patch changes that and adapt the driver to use
more of the rawdev APIs

Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
---
 drivers/raw/dpaa2_qdma/dpaa2_qdma.c         | 331 ++++++++++++++--------------
 drivers/raw/dpaa2_qdma/dpaa2_qdma.h         |   3 +-
 drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h | 207 ++---------------
 3 files changed, 187 insertions(+), 354 deletions(-)

diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
index 0b9c4e3..a2ee6cc 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2018-2019 NXP
+ * Copyright 2018-2020 NXP
  */
 
 #include <string.h>
@@ -30,7 +30,7 @@ uint32_t dpaa2_coherent_no_alloc_cache;
 uint32_t dpaa2_coherent_alloc_cache;
 
 /* QDMA device */
-static struct qdma_device qdma_dev;
+static struct qdma_device q_dev;
 
 /* QDMA H/W queues list */
 TAILQ_HEAD(qdma_hw_queue_list, qdma_hw_queue);
@@ -51,9 +51,11 @@ typedef int (dpdmai_dev_dequeue_multijob_t)(struct dpaa2_dpdmai_dev *dpdmai_dev,
 
 dpdmai_dev_dequeue_multijob_t *dpdmai_dev_dequeue_multijob;
 
-typedef uint16_t (dpdmai_dev_get_job_t)(const struct qbman_fd *fd,
+typedef uint16_t (dpdmai_dev_get_job_t)(struct qdma_device *qdma_dev,
+					const struct qbman_fd *fd,
 					struct rte_qdma_job **job);
-typedef int (dpdmai_dev_set_fd_t)(struct qbman_fd *fd,
+typedef int (dpdmai_dev_set_fd_t)(struct qdma_device *qdma_dev,
+				  struct qbman_fd *fd,
 				  struct rte_qdma_job *job,
 				  struct rte_qdma_rbp *rbp,
 				  uint16_t vq_id);
@@ -201,10 +203,12 @@ dpaa2_qdma_populate_fle(struct qbman_fle *fle,
 	DPAA2_SET_FLE_FIN(fle);
 }
 
-static inline int dpdmai_dev_set_fd_us(struct qbman_fd *fd,
-					struct rte_qdma_job *job,
-					struct rte_qdma_rbp *rbp,
-					uint16_t vq_id)
+static inline int dpdmai_dev_set_fd_us(
+				struct qdma_device *qdma_dev __rte_unused,
+				struct qbman_fd *fd,
+				struct rte_qdma_job *job,
+				struct rte_qdma_rbp *rbp,
+				uint16_t vq_id)
 {
 	struct rte_qdma_job **ppjob;
 	size_t iova;
@@ -230,7 +234,8 @@ static inline int dpdmai_dev_set_fd_us(struct qbman_fd *fd,
 					   job->len, fd);
 	return ret;
 }
-static inline int dpdmai_dev_set_fd_lf(struct qbman_fd *fd,
+static inline int dpdmai_dev_set_fd_lf(struct qdma_device *qdma_dev,
+					struct qbman_fd *fd,
 					struct rte_qdma_job *job,
 					struct rte_qdma_rbp *rbp,
 					uint16_t vq_id)
@@ -242,7 +247,7 @@ static inline int dpdmai_dev_set_fd_lf(struct qbman_fd *fd,
 	 * Get an FLE/SDD from FLE pool.
 	 * Note: IO metadata is before the FLE and SDD memory.
 	 */
-	ret = rte_mempool_get(qdma_dev.fle_pool, (void **)(&ppjob));
+	ret = rte_mempool_get(qdma_dev->fle_pool, (void **)(&ppjob));
 	if (ret) {
 		DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
 		return ret;
@@ -266,8 +271,10 @@ static inline int dpdmai_dev_set_fd_lf(struct qbman_fd *fd,
 	return 0;
 }
 
-static inline uint16_t dpdmai_dev_get_job_us(const struct qbman_fd *fd,
-					struct rte_qdma_job **job)
+static inline uint16_t dpdmai_dev_get_job_us(
+				struct qdma_device *qdma_dev __rte_unused,
+				const struct qbman_fd *fd,
+				struct rte_qdma_job **job)
 {
 	uint16_t vqid;
 	size_t iova;
@@ -288,8 +295,9 @@ static inline uint16_t dpdmai_dev_get_job_us(const struct qbman_fd *fd,
 	return vqid;
 }
 
-static inline uint16_t dpdmai_dev_get_job_lf(const struct qbman_fd *fd,
-					struct rte_qdma_job **job)
+static inline uint16_t dpdmai_dev_get_job_lf(struct qdma_device *qdma_dev,
+					     const struct qbman_fd *fd,
+					     struct rte_qdma_job **job)
 {
 	struct rte_qdma_job **ppjob;
 	uint16_t vqid;
@@ -307,7 +315,7 @@ static inline uint16_t dpdmai_dev_get_job_lf(const struct qbman_fd *fd,
 	vqid = (*job)->vq_id;
 
 	/* Free FLE to the pool */
-	rte_mempool_put(qdma_dev.fle_pool, (void *)ppjob);
+	rte_mempool_put(qdma_dev->fle_pool, (void *)ppjob);
 
 	return vqid;
 }
@@ -341,7 +349,7 @@ free_hw_queue(struct qdma_hw_queue *queue)
 
 
 static struct qdma_hw_queue *
-get_hw_queue(uint32_t lcore_id)
+get_hw_queue(struct qdma_device *qdma_dev, uint32_t lcore_id)
 {
 	struct qdma_per_core_info *core_info;
 	struct qdma_hw_queue *queue, *temp;
@@ -357,7 +365,7 @@ get_hw_queue(uint32_t lcore_id)
 	 * Allocate a HW queue if there are less queues
 	 * than maximum per core queues configured
 	 */
-	if (num_hw_queues < qdma_dev.max_hw_queues_per_core) {
+	if (num_hw_queues < qdma_dev->max_hw_queues_per_core) {
 		queue = alloc_hw_queue(lcore_id);
 		if (queue) {
 			core_info->hw_queues[num_hw_queues] = queue;
@@ -416,41 +424,41 @@ put_hw_queue(struct qdma_hw_queue *queue)
 	}
 }
 
-int
-rte_qdma_init(void)
+static int
+dpaa2_qdma_attr_get(struct rte_rawdev *rawdev,
+		    __rte_unused const char *attr_name,
+		    uint64_t *attr_value)
 {
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct rte_qdma_attr *qdma_attr = (struct rte_qdma_attr *)attr_value;
+
 	DPAA2_QDMA_FUNC_TRACE();
 
-	rte_spinlock_init(&qdma_dev.lock);
+	qdma_attr->num_hw_queues = qdma_dev->num_hw_queues;
 
 	return 0;
 }
 
-void
-rte_qdma_attr_get(struct rte_qdma_attr *qdma_attr)
-{
-	DPAA2_QDMA_FUNC_TRACE();
-
-	qdma_attr->num_hw_queues = qdma_dev.num_hw_queues;
-}
-
-int
-rte_qdma_reset(void)
+static int
+dpaa2_qdma_reset(struct rte_rawdev *rawdev)
 {
 	struct qdma_hw_queue *queue;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
 	int i;
 
 	DPAA2_QDMA_FUNC_TRACE();
 
 	/* In case QDMA device is not in stopped state, return -EBUSY */
-	if (qdma_dev.state == 1) {
+	if (qdma_dev->state == 1) {
 		DPAA2_QDMA_ERR(
 			"Device is in running state. Stop before reset.");
 		return -EBUSY;
 	}
 
 	/* In case there are pending jobs on any VQ, return -EBUSY */
-	for (i = 0; i < qdma_dev.max_vqs; i++) {
+	for (i = 0; i < qdma_dev->max_vqs; i++) {
 		if (qdma_vqs[i].in_use && (qdma_vqs[i].num_enqueues !=
 		    qdma_vqs[i].num_dequeues))
 			DPAA2_QDMA_ERR("Jobs are still pending on VQ: %d", i);
@@ -462,7 +470,7 @@ rte_qdma_reset(void)
 		queue->num_users = 0;
 
 	/* Reset and free virtual queues */
-	for (i = 0; i < qdma_dev.max_vqs; i++) {
+	for (i = 0; i < qdma_dev->max_vqs; i++) {
 		if (qdma_vqs[i].status_ring)
 			rte_ring_free(qdma_vqs[i].status_ring);
 	}
@@ -475,43 +483,39 @@ rte_qdma_reset(void)
 		sizeof(struct qdma_per_core_info) * RTE_MAX_LCORE);
 
 	/* Free the FLE pool */
-	if (qdma_dev.fle_pool)
-		rte_mempool_free(qdma_dev.fle_pool);
+	if (qdma_dev->fle_pool)
+		rte_mempool_free(qdma_dev->fle_pool);
 
 	/* Reset QDMA device structure */
-	qdma_dev.mode = RTE_QDMA_MODE_HW;
-	qdma_dev.max_hw_queues_per_core = 0;
-	qdma_dev.fle_pool = NULL;
-	qdma_dev.fle_pool_count = 0;
-	qdma_dev.max_vqs = 0;
+	qdma_dev->mode = RTE_QDMA_MODE_HW;
+	qdma_dev->max_hw_queues_per_core = 0;
+	qdma_dev->fle_pool = NULL;
+	qdma_dev->fle_pool_count = 0;
+	qdma_dev->max_vqs = 0;
 
 	return 0;
 }
 
-int
-rte_qdma_configure(struct rte_qdma_config *qdma_config)
+static int
+dpaa2_qdma_configure(const struct rte_rawdev *rawdev,
+			 rte_rawdev_obj_t config)
 {
-	int ret;
 	char fle_pool_name[32]; /* RTE_MEMZONE_NAMESIZE = 32 */
+	struct rte_qdma_config *qdma_config = (struct rte_qdma_config *)config;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
 
 	DPAA2_QDMA_FUNC_TRACE();
 
 	/* In case QDMA device is not in stopped state, return -EBUSY */
-	if (qdma_dev.state == 1) {
+	if (qdma_dev->state == 1) {
 		DPAA2_QDMA_ERR(
 			"Device is in running state. Stop before config.");
 		return -1;
 	}
 
-	/* Reset the QDMA device */
-	ret = rte_qdma_reset();
-	if (ret) {
-		DPAA2_QDMA_ERR("Resetting QDMA failed");
-		return ret;
-	}
-
 	/* Set mode */
-	qdma_dev.mode = qdma_config->mode;
+	qdma_dev->mode = qdma_config->mode;
 
 	/* Set max HW queue per core */
 	if (qdma_config->max_hw_queues_per_core > MAX_HW_QUEUE_PER_CORE) {
@@ -519,7 +523,7 @@ rte_qdma_configure(struct rte_qdma_config *qdma_config)
 			       MAX_HW_QUEUE_PER_CORE);
 		return -EINVAL;
 	}
-	qdma_dev.max_hw_queues_per_core =
+	qdma_dev->max_hw_queues_per_core =
 		qdma_config->max_hw_queues_per_core;
 
 	/* Allocate Virtual Queues */
@@ -530,24 +534,24 @@ rte_qdma_configure(struct rte_qdma_config *qdma_config)
 		DPAA2_QDMA_ERR("qdma_virtual_queues allocation failed");
 		return -ENOMEM;
 	}
-	qdma_dev.max_vqs = qdma_config->max_vqs;
+	qdma_dev->max_vqs = qdma_config->max_vqs;
 
 	/* Allocate FLE pool; just append PID so that in case of
 	 * multiprocess, the pool's don't collide.
 	 */
 	snprintf(fle_pool_name, sizeof(fle_pool_name), "qdma_fle_pool%u",
 		 getpid());
-	qdma_dev.fle_pool = rte_mempool_create(fle_pool_name,
+	qdma_dev->fle_pool = rte_mempool_create(fle_pool_name,
 			qdma_config->fle_pool_count, QDMA_FLE_POOL_SIZE,
 			QDMA_FLE_CACHE_SIZE(qdma_config->fle_pool_count), 0,
 			NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
-	if (!qdma_dev.fle_pool) {
+	if (!qdma_dev->fle_pool) {
 		DPAA2_QDMA_ERR("qdma_fle_pool create failed");
 		rte_free(qdma_vqs);
 		qdma_vqs = NULL;
 		return -ENOMEM;
 	}
-	qdma_dev.fle_pool_count = qdma_config->fle_pool_count;
+	qdma_dev->fle_pool_count = qdma_config->fle_pool_count;
 
 	if (qdma_config->format == RTE_QDMA_ULTRASHORT_FORMAT) {
 		dpdmai_dev_get_job = dpdmai_dev_get_job_us;
@@ -559,57 +563,67 @@ rte_qdma_configure(struct rte_qdma_config *qdma_config)
 	return 0;
 }
 
-int
-rte_qdma_start(void)
+static int
+dpaa2_qdma_start(struct rte_rawdev *rawdev)
 {
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+
 	DPAA2_QDMA_FUNC_TRACE();
 
-	qdma_dev.state = 1;
+	qdma_dev->state = 1;
 
 	return 0;
 }
 
-int
-rte_qdma_vq_create(uint32_t lcore_id, uint32_t flags)
+static int
+dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
+			  __rte_unused uint16_t queue_id,
+			  rte_rawdev_obj_t queue_conf)
 {
 	char ring_name[32];
 	int i;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct rte_qdma_queue_config *q_config =
+		(struct rte_qdma_queue_config *)queue_conf;
 
 	DPAA2_QDMA_FUNC_TRACE();
 
-	rte_spinlock_lock(&qdma_dev.lock);
+	rte_spinlock_lock(&qdma_dev->lock);
 
 	/* Get a free Virtual Queue */
-	for (i = 0; i < qdma_dev.max_vqs; i++) {
+	for (i = 0; i < qdma_dev->max_vqs; i++) {
 		if (qdma_vqs[i].in_use == 0)
 			break;
 	}
 
 	/* Return in case no VQ is free */
-	if (i == qdma_dev.max_vqs) {
-		rte_spinlock_unlock(&qdma_dev.lock);
+	if (i == qdma_dev->max_vqs) {
+		rte_spinlock_unlock(&qdma_dev->lock);
 		DPAA2_QDMA_ERR("Unable to get lock on QDMA device");
 		return -ENODEV;
 	}
 
-	if (qdma_dev.mode == RTE_QDMA_MODE_HW ||
-			(flags & RTE_QDMA_VQ_EXCLUSIVE_PQ)) {
+	if (qdma_dev->mode == RTE_QDMA_MODE_HW ||
+			(q_config->flags & RTE_QDMA_VQ_EXCLUSIVE_PQ)) {
 		/* Allocate HW queue for a VQ */
-		qdma_vqs[i].hw_queue = alloc_hw_queue(lcore_id);
+		qdma_vqs[i].hw_queue = alloc_hw_queue(q_config->lcore_id);
 		qdma_vqs[i].exclusive_hw_queue = 1;
 	} else {
 		/* Allocate a Ring for Virutal Queue in VQ mode */
 		snprintf(ring_name, sizeof(ring_name), "status ring %d", i);
 		qdma_vqs[i].status_ring = rte_ring_create(ring_name,
-			qdma_dev.fle_pool_count, rte_socket_id(), 0);
+			qdma_dev->fle_pool_count, rte_socket_id(), 0);
 		if (!qdma_vqs[i].status_ring) {
 			DPAA2_QDMA_ERR("Status ring creation failed for vq");
-			rte_spinlock_unlock(&qdma_dev.lock);
+			rte_spinlock_unlock(&qdma_dev->lock);
 			return rte_errno;
 		}
 
 		/* Get a HW queue (shared) for a VQ */
-		qdma_vqs[i].hw_queue = get_hw_queue(lcore_id);
+		qdma_vqs[i].hw_queue = get_hw_queue(qdma_dev,
+						    q_config->lcore_id);
 		qdma_vqs[i].exclusive_hw_queue = 0;
 	}
 
@@ -618,28 +632,18 @@ rte_qdma_vq_create(uint32_t lcore_id, uint32_t flags)
 		if (qdma_vqs[i].status_ring)
 			rte_ring_free(qdma_vqs[i].status_ring);
 		qdma_vqs[i].status_ring = NULL;
-		rte_spinlock_unlock(&qdma_dev.lock);
+		rte_spinlock_unlock(&qdma_dev->lock);
 		return -ENODEV;
 	}
 
 	qdma_vqs[i].in_use = 1;
-	qdma_vqs[i].lcore_id = lcore_id;
+	qdma_vqs[i].lcore_id = q_config->lcore_id;
 	memset(&qdma_vqs[i].rbp, 0, sizeof(struct rte_qdma_rbp));
-	rte_spinlock_unlock(&qdma_dev.lock);
-
-	return i;
-}
-
-/*create vq for route-by-port*/
-int
-rte_qdma_vq_create_rbp(uint32_t lcore_id, uint32_t flags,
-			struct rte_qdma_rbp *rbp)
-{
-	int i;
-
-	i = rte_qdma_vq_create(lcore_id, flags);
+	rte_spinlock_unlock(&qdma_dev->lock);
 
-	memcpy(&qdma_vqs[i].rbp, rbp, sizeof(struct rte_qdma_rbp));
+	if (q_config->rbp != NULL)
+		memcpy(&qdma_vqs[i].rbp, q_config->rbp,
+		       sizeof(struct rte_qdma_rbp));
 
 	return i;
 }
@@ -688,7 +692,7 @@ dpdmai_dev_enqueue_multi(struct dpaa2_dpdmai_dev *dpdmai_dev,
 			dpaa2_eqcr_size : nb_jobs;
 
 		for (loop = 0; loop < num_to_send; loop++) {
-			ret = dpdmai_dev_set_fd(&fd[loop],
+			ret = dpdmai_dev_set_fd(dpdmai_dev->qdma_dev, &fd[loop],
 						job[num_tx], rbp, vq_id);
 			if (ret < 0) {
 				/* Set nb_jobs to loop, so outer while loop
@@ -723,12 +727,14 @@ dpdmai_dev_enqueue_multi(struct dpaa2_dpdmai_dev *dpdmai_dev,
 	return num_tx;
 }
 
-int
-rte_qdma_vq_enqueue_multi(uint16_t vq_id,
-			  struct rte_qdma_job **job,
-			  uint16_t nb_jobs)
+static int
+dpaa2_qdma_enqueue(__rte_unused struct rte_rawdev *rawdev,
+		  __rte_unused struct rte_rawdev_buf **buffers,
+		  unsigned int nb_jobs,
+		  rte_rawdev_obj_t context)
 {
-	struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id];
+	struct rte_qdma_enqdeq *e_context = (struct rte_qdma_enqdeq *)context;
+	struct qdma_virt_queue *qdma_vq = &qdma_vqs[e_context->vq_id];
 	struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue;
 	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev;
 	int ret;
@@ -736,15 +742,15 @@ rte_qdma_vq_enqueue_multi(uint16_t vq_id,
 	/* Return error in case of wrong lcore_id */
 	if (rte_lcore_id() != qdma_vq->lcore_id) {
 		DPAA2_QDMA_ERR("QDMA enqueue for vqid %d on wrong core",
-				vq_id);
+				e_context->vq_id);
 		return -EINVAL;
 	}
 
 	ret = dpdmai_dev_enqueue_multi(dpdmai_dev,
 				 qdma_pq->queue_id,
-				 vq_id,
+				 e_context->vq_id,
 				 &qdma_vq->rbp,
-				 job,
+				 e_context->job,
 				 nb_jobs);
 	if (ret < 0) {
 		DPAA2_QDMA_ERR("DPDMAI device enqueue failed: %d", ret);
@@ -756,13 +762,6 @@ rte_qdma_vq_enqueue_multi(uint16_t vq_id,
 	return ret;
 }
 
-int
-rte_qdma_vq_enqueue(uint16_t vq_id,
-		    struct rte_qdma_job *job)
-{
-	return rte_qdma_vq_enqueue_multi(vq_id, &job, 1);
-}
-
 /* Function to receive a QDMA job for a given device and queue*/
 static int
 dpdmai_dev_dequeue_multijob_prefetch(
@@ -877,7 +876,8 @@ dpdmai_dev_dequeue_multijob_prefetch(
 		}
 		fd = qbman_result_DQ_fd(dq_storage);
 
-		vqid = dpdmai_dev_get_job(fd, &job[num_rx]);
+		vqid = dpdmai_dev_get_job(dpdmai_dev->qdma_dev, fd,
+					  &job[num_rx]);
 		if (vq_id)
 			vq_id[num_rx] = vqid;
 
@@ -993,7 +993,8 @@ dpdmai_dev_dequeue_multijob_no_prefetch(
 			}
 			fd = qbman_result_DQ_fd(dq_storage);
 
-			vqid = dpdmai_dev_get_job(fd, &job[num_rx]);
+			vqid = dpdmai_dev_get_job(dpdmai_dev->qdma_dev, fd,
+						  &job[num_rx]);
 			if (vq_id)
 				vq_id[num_rx] = vqid;
 
@@ -1008,21 +1009,24 @@ dpdmai_dev_dequeue_multijob_no_prefetch(
 	return num_rx;
 }
 
-int
-rte_qdma_vq_dequeue_multi(uint16_t vq_id,
-			  struct rte_qdma_job **job,
-			  uint16_t nb_jobs)
+static int
+dpaa2_qdma_dequeue(__rte_unused struct rte_rawdev *rawdev,
+		   __rte_unused struct rte_rawdev_buf **buffers,
+		   unsigned int nb_jobs,
+		   rte_rawdev_obj_t cntxt)
 {
-	struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id];
+	struct rte_qdma_enqdeq *context = (struct rte_qdma_enqdeq *)cntxt;
+	struct qdma_virt_queue *qdma_vq = &qdma_vqs[context->vq_id];
 	struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue;
 	struct qdma_virt_queue *temp_qdma_vq;
 	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev;
-	int ring_count, ret = 0, i;
+	int ret = 0, i;
+	unsigned int ring_count;
 
 	/* Return error in case of wrong lcore_id */
 	if (rte_lcore_id() != (unsigned int)(qdma_vq->lcore_id)) {
 		DPAA2_QDMA_WARN("QDMA dequeue for vqid %d on wrong core",
-				vq_id);
+				context->vq_id);
 		return -1;
 	}
 
@@ -1036,7 +1040,7 @@ rte_qdma_vq_dequeue_multi(uint16_t vq_id,
 	if (qdma_vq->exclusive_hw_queue) {
 		/* In case of exclusive queue directly fetch from HW queue */
 		ret = dpdmai_dev_dequeue_multijob(dpdmai_dev, qdma_pq->queue_id,
-					 NULL, job, nb_jobs);
+					 NULL, context->job, nb_jobs);
 		if (ret < 0) {
 			DPAA2_QDMA_ERR(
 				"Dequeue from DPDMAI device failed: %d", ret);
@@ -1055,11 +1059,11 @@ rte_qdma_vq_dequeue_multi(uint16_t vq_id,
 			/* TODO - How to have right budget */
 			ret = dpdmai_dev_dequeue_multijob(dpdmai_dev,
 					qdma_pq->queue_id,
-					temp_vq_id, job, nb_jobs);
+					temp_vq_id, context->job, nb_jobs);
 			for (i = 0; i < ret; i++) {
 				temp_qdma_vq = &qdma_vqs[temp_vq_id[i]];
 				rte_ring_enqueue(temp_qdma_vq->status_ring,
-					(void *)(job[i]));
+					(void *)(context->job[i]));
 			}
 			ring_count = rte_ring_count(
 					qdma_vq->status_ring);
@@ -1070,7 +1074,8 @@ rte_qdma_vq_dequeue_multi(uint16_t vq_id,
 			 * to provide to the user
 			 */
 			ret = rte_ring_dequeue_bulk(qdma_vq->status_ring,
-					(void **)job, ring_count, NULL);
+						    (void **)context->job,
+						    ring_count, NULL);
 			if (ret)
 				qdma_vq->num_dequeues += ret;
 		}
@@ -1079,19 +1084,6 @@ rte_qdma_vq_dequeue_multi(uint16_t vq_id,
 	return ret;
 }
 
-struct rte_qdma_job *
-rte_qdma_vq_dequeue(uint16_t vq_id)
-{
-	int ret;
-	struct rte_qdma_job *job = NULL;
-
-	ret = rte_qdma_vq_dequeue_multi(vq_id, &job, 1);
-	if (ret < 0)
-		DPAA2_QDMA_DP_WARN("DPDMAI device dequeue failed: %d", ret);
-
-	return job;
-}
-
 void
 rte_qdma_vq_stats(uint16_t vq_id,
 		  struct rte_qdma_vq_stats *vq_status)
@@ -1108,9 +1100,13 @@ rte_qdma_vq_stats(uint16_t vq_id,
 	}
 }
 
-int
-rte_qdma_vq_destroy(uint16_t vq_id)
+static int
+dpaa2_qdma_queue_release(struct rte_rawdev *rawdev,
+			 uint16_t vq_id)
 {
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+
 	struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id];
 
 	DPAA2_QDMA_FUNC_TRACE();
@@ -1119,7 +1115,7 @@ rte_qdma_vq_destroy(uint16_t vq_id)
 	if (qdma_vq->num_enqueues != qdma_vq->num_dequeues)
 		return -EBUSY;
 
-	rte_spinlock_lock(&qdma_dev.lock);
+	rte_spinlock_lock(&qdma_dev->lock);
 
 	if (qdma_vq->exclusive_hw_queue)
 		free_hw_queue(qdma_vq->hw_queue);
@@ -1132,57 +1128,44 @@ rte_qdma_vq_destroy(uint16_t vq_id)
 
 	memset(qdma_vq, 0, sizeof(struct qdma_virt_queue));
 
-	rte_spinlock_unlock(&qdma_dev.lock);
+	rte_spinlock_unlock(&qdma_dev->lock);
 
 	return 0;
 }
 
-int
-rte_qdma_vq_destroy_rbp(uint16_t vq_id)
+static void
+dpaa2_qdma_stop(struct rte_rawdev *rawdev)
 {
-	struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id];
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
 
 	DPAA2_QDMA_FUNC_TRACE();
 
-	/* In case there are pending jobs on any VQ, return -EBUSY */
-	if (qdma_vq->num_enqueues != qdma_vq->num_dequeues)
-		return -EBUSY;
-
-	rte_spinlock_lock(&qdma_dev.lock);
-
-	if (qdma_vq->exclusive_hw_queue) {
-		free_hw_queue(qdma_vq->hw_queue);
-	} else {
-		if (qdma_vqs->status_ring)
-			rte_ring_free(qdma_vqs->status_ring);
-
-		put_hw_queue(qdma_vq->hw_queue);
-	}
-
-	memset(qdma_vq, 0, sizeof(struct qdma_virt_queue));
-
-	rte_spinlock_unlock(&qdma_dev.lock);
-
-	return 0;
+	qdma_dev->state = 0;
 }
 
-void
-rte_qdma_stop(void)
+static int
+dpaa2_qdma_close(struct rte_rawdev *rawdev)
 {
 	DPAA2_QDMA_FUNC_TRACE();
 
-	qdma_dev.state = 0;
-}
-
-void
-rte_qdma_destroy(void)
-{
-	DPAA2_QDMA_FUNC_TRACE();
+	dpaa2_qdma_reset(rawdev);
 
-	rte_qdma_reset();
+	return 0;
 }
 
-static const struct rte_rawdev_ops dpaa2_qdma_ops;
+static struct rte_rawdev_ops dpaa2_qdma_ops = {
+	.dev_configure            = dpaa2_qdma_configure,
+	.dev_start                = dpaa2_qdma_start,
+	.dev_stop                 = dpaa2_qdma_stop,
+	.dev_reset                = dpaa2_qdma_reset,
+	.dev_close                = dpaa2_qdma_close,
+	.queue_setup		  = dpaa2_qdma_queue_setup,
+	.queue_release		  = dpaa2_qdma_queue_release,
+	.attr_get		  = dpaa2_qdma_attr_get,
+	.enqueue_bufs		  = dpaa2_qdma_enqueue,
+	.dequeue_bufs		  = dpaa2_qdma_dequeue,
+};
 
 static int
 add_hw_queues_to_list(struct dpaa2_dpdmai_dev *dpdmai_dev)
@@ -1204,7 +1187,7 @@ add_hw_queues_to_list(struct dpaa2_dpdmai_dev *dpdmai_dev)
 		queue->queue_id = i;
 
 		TAILQ_INSERT_TAIL(&qdma_queue_list, queue, next);
-		qdma_dev.num_hw_queues++;
+		dpdmai_dev->qdma_dev->num_hw_queues++;
 	}
 
 	return 0;
@@ -1313,6 +1296,7 @@ dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id)
 	/* Open DPDMAI device */
 	dpdmai_dev->dpdmai_id = dpdmai_id;
 	dpdmai_dev->dpdmai.regs = dpaa2_get_mcp_ptr(MC_PORTAL_INDEX);
+	dpdmai_dev->qdma_dev = &q_dev;
 	ret = dpdmai_open(&dpdmai_dev->dpdmai, CMD_PRI_LOW,
 			  dpdmai_dev->dpdmai_id, &dpdmai_dev->token);
 	if (ret) {
@@ -1427,6 +1411,8 @@ dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id)
 
 	DPAA2_QDMA_DEBUG("Initialized dpdmai object successfully");
 
+	rte_spinlock_init(&dpdmai_dev->qdma_dev->lock);
+
 	return 0;
 init_err:
 	dpaa2_dpdmai_dev_uninit(rawdev);
@@ -1462,6 +1448,13 @@ rte_dpaa2_qdma_probe(struct rte_dpaa2_driver *dpaa2_drv,
 		return ret;
 	}
 
+	/* Reset the QDMA device */
+	ret = dpaa2_qdma_reset(rawdev);
+	if (ret) {
+		DPAA2_QDMA_ERR("Resetting QDMA failed");
+		return ret;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
index 0176380..3c112d2 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2018-2019 NXP
+ * Copyright 2018-2020 NXP
  */
 
 #ifndef __DPAA2_QDMA_H__
@@ -173,6 +173,7 @@ struct dpaa2_dpdmai_dev {
 	struct dpaa2_queue rx_queue[DPAA2_DPDMAI_MAX_QUEUES];
 	/** TX queues */
 	struct dpaa2_queue tx_queue[DPAA2_DPDMAI_MAX_QUEUES];
+	struct qdma_device *qdma_dev;
 };
 
 #endif /* __DPAA2_QDMA_H__ */
diff --git a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
index 4e1268c..71894d3 100644
--- a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
@@ -1,10 +1,12 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2018-2019 NXP
+ * Copyright 2018-2020 NXP
  */
 
 #ifndef __RTE_PMD_DPAA2_QDMA_H__
 #define __RTE_PMD_DPAA2_QDMA_H__
 
+#include <rte_rawdev.h>
+
 /**
  * @file
  *
@@ -154,150 +156,29 @@ struct rte_qdma_job {
 	uint16_t vq_id;
 };
 
-/**
- * Initialize the QDMA device.
- *
- * @returns
- *   - 0: Success.
- *   - <0: Error code.
- */
-int
-rte_qdma_init(void);
-
-/**
- * Get the QDMA attributes.
- *
- * @param qdma_attr
- *   QDMA attributes providing total number of hw queues etc.
- */
-void
-rte_qdma_attr_get(struct rte_qdma_attr *qdma_attr);
-
-/**
- * Reset the QDMA device. This API will completely reset the QDMA
- * device, bringing it to original state as if only rte_qdma_init() API
- * has been called.
- *
- * @returns
- *   - 0: Success.
- *   - <0: Error code.
- */
-int
-rte_qdma_reset(void);
-
-/**
- * Configure the QDMA device.
- *
- * @returns
- *   - 0: Success.
- *   - <0: Error code.
- */
-int
-rte_qdma_configure(struct rte_qdma_config *qdma_config);
-
-/**
- * Start the QDMA device.
- *
- * @returns
- *   - 0: Success.
- *   - <0: Error code.
- */
-int
-rte_qdma_start(void);
-
-/**
- * Create a Virtual Queue on a particular lcore id.
- * This API can be called from any thread/core. User can create/destroy
- * VQ's at runtime.
- *
- * @param lcore_id
- *   LCORE ID on which this particular queue would be associated with.
- * @param flags
- *  RTE_QDMA_VQ_ flags. See macro definitions.
- *
- * @returns
- *   - >= 0: Virtual queue ID.
- *   - <0: Error code.
- */
-int
-rte_qdma_vq_create(uint32_t lcore_id, uint32_t flags);
-
-/*create vq for route-by-port*/
-int
-rte_qdma_vq_create_rbp(uint32_t lcore_id, uint32_t flags,
-			struct rte_qdma_rbp *rbp);
-
-/**
- * Enqueue multiple jobs to a Virtual Queue.
- * If the enqueue is successful, the H/W will perform DMA operations
- * on the basis of the QDMA jobs provided.
- *
- * @param vq_id
- *   Virtual Queue ID.
- * @param job
- *   List of QDMA Jobs containing relevant information related to DMA.
- * @param nb_jobs
- *   Number of QDMA jobs provided by the user.
- *
- * @returns
- *   - >=0: Number of jobs successfully submitted
- *   - <0: Error code.
- */
-int
-rte_qdma_vq_enqueue_multi(uint16_t vq_id,
-			  struct rte_qdma_job **job,
-			  uint16_t nb_jobs);
-
-/**
- * Enqueue a single job to a Virtual Queue.
- * If the enqueue is successful, the H/W will perform DMA operations
- * on the basis of the QDMA job provided.
- *
- * @param vq_id
- *   Virtual Queue ID.
- * @param job
- *   A QDMA Job containing relevant information related to DMA.
- *
- * @returns
- *   - >=0: Number of jobs successfully submitted
- *   - <0: Error code.
- */
-int
-rte_qdma_vq_enqueue(uint16_t vq_id,
-		    struct rte_qdma_job *job);
+struct rte_qdma_enqdeq {
+	uint16_t vq_id;
+	struct rte_qdma_job **job;
+};
 
-/**
- * Dequeue multiple completed jobs from a Virtual Queue.
- * Provides the list of completed jobs capped by nb_jobs.
- *
- * @param vq_id
- *   Virtual Queue ID.
- * @param job
- *   List of QDMA Jobs returned from the API.
- * @param nb_jobs
- *   Number of QDMA jobs requested for dequeue by the user.
- *
- * @returns
- *   - >=0: Number of jobs successfully received
- *   - <0: Error code.
- */
-int
-rte_qdma_vq_dequeue_multi(uint16_t vq_id,
-			  struct rte_qdma_job **job,
-			  uint16_t nb_jobs);
+struct rte_qdma_queue_config {
+	uint32_t lcore_id;
+	uint32_t flags;
+	struct rte_qdma_rbp *rbp;
+};
 
-/**
- * Dequeue a single completed jobs from a Virtual Queue.
- *
- * @param vq_id
- *   Virtual Queue ID.
- *
- * @returns
- *   - A completed job or NULL if no job is there.
- */
-struct rte_qdma_job *
-rte_qdma_vq_dequeue(uint16_t vq_id);
+#define rte_qdma_info rte_rawdev_info
+#define rte_qdma_start(id) rte_rawdev_start(id)
+#define rte_qdma_reset(id) rte_rawdev_reset(id)
+#define rte_qdma_configure(id, cf) rte_rawdev_configure(id, cf)
+#define rte_qdma_dequeue_buffers(id, buf, num, ctxt) \
+	rte_rawdev_dequeue_buffers(id, buf, num, ctxt)
+#define rte_qdma_enqueue_buffers(id, buf, num, ctxt) \
+	rte_rawdev_enqueue_buffers(id, buf, num, ctxt)
+#define rte_qdma_queue_setup(id, qid, cfg) \
+	rte_rawdev_queue_setup(id, qid, cfg)
 
+/*TODO introduce per queue stats API in rawdew */
 /**
  * Get a Virtual Queue statistics.
  *
@@ -310,46 +191,4 @@ void
 rte_qdma_vq_stats(uint16_t vq_id,
 		  struct rte_qdma_vq_stats *vq_stats);
 
-/**
- * Destroy the Virtual Queue specified by vq_id.
- * This API can be called from any thread/core. User can create/destroy
- * VQ's at runtime.
- *
- * @param vq_id
- *   Virtual Queue ID which needs to be uninitialized.
- *
- * @returns
- *   - 0: Success.
- *   - <0: Error code.
- */
-int
-rte_qdma_vq_destroy(uint16_t vq_id);
-
-/**
- * Destroy the RBP specific Virtual Queue specified by vq_id.
- * This API can be called from any thread/core. User can create/destroy
- * VQ's at runtime.
- *
- * @param vq_id
- *   RBP based Virtual Queue ID which needs to be uninitialized.
- *
- * @returns
- *   - 0: Success.
- *   - <0: Error code.
- */
-
-int
-rte_qdma_vq_destroy_rbp(uint16_t vq_id);
-/**
- * Stop QDMA device.
- */
-void
-rte_qdma_stop(void);
-
-/**
- * Destroy the QDMA device.
- */
-void
-rte_qdma_destroy(void);
-
 #endif /* __RTE_PMD_DPAA2_QDMA_H__*/
-- 
2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH 2/7] raw/dpaa2_qdma: memset to only required memory
  2020-09-07  9:25 [dpdk-dev] [PATCH 0/7] raw/dpaa2_qdma: driver enhancement Gagandeep Singh
  2020-09-07  9:25 ` [dpdk-dev] [PATCH 1/7] raw/dpaa2_qdma: change DPAA2 QDMA APIs to rawdev ops Gagandeep Singh
@ 2020-09-07  9:25 ` Gagandeep Singh
  2020-09-07  9:26 ` [dpdk-dev] [PATCH 3/7] raw/dpaa2_qdma: refactor the code Gagandeep Singh
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 23+ messages in thread
From: Gagandeep Singh @ 2020-09-07  9:25 UTC (permalink / raw)
  To: dev, nipun.gupta, hemant.agrawal; +Cc: thomas.monjalon, Gagandeep Singh

performance improvement: memset should be done only
for required memory.

Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
---
 drivers/raw/dpaa2_qdma/dpaa2_qdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
index a2ee6cc..57194db 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
@@ -683,7 +683,7 @@ dpdmai_dev_enqueue_multi(struct dpaa2_dpdmai_dev *dpdmai_dev,
 	qbman_eq_desc_set_no_orp(&eqdesc, 0);
 	qbman_eq_desc_set_response(&eqdesc, 0, 0);
 
-	memset(fd, 0, RTE_QDMA_BURST_NB_MAX * sizeof(struct qbman_fd));
+	memset(fd, 0, nb_jobs * sizeof(struct qbman_fd));
 
 	while (nb_jobs > 0) {
 		uint32_t loop;
-- 
2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH 3/7] raw/dpaa2_qdma: refactor the code
  2020-09-07  9:25 [dpdk-dev] [PATCH 0/7] raw/dpaa2_qdma: driver enhancement Gagandeep Singh
  2020-09-07  9:25 ` [dpdk-dev] [PATCH 1/7] raw/dpaa2_qdma: change DPAA2 QDMA APIs to rawdev ops Gagandeep Singh
  2020-09-07  9:25 ` [dpdk-dev] [PATCH 2/7] raw/dpaa2_qdma: memset to only required memory Gagandeep Singh
@ 2020-09-07  9:26 ` Gagandeep Singh
  2020-09-07  9:26 ` [dpdk-dev] [PATCH 4/7] raw/dpaa2_qdma: optimize IOVA conversion Gagandeep Singh
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 23+ messages in thread
From: Gagandeep Singh @ 2020-09-07  9:26 UTC (permalink / raw)
  To: dev, nipun.gupta, hemant.agrawal; +Cc: thomas.monjalon, Jun Yang

From: Jun Yang <jun.yang@nxp.com>

This patch moves qdma queue specific configurations from driver
global configuration to per-queue setup. This is required
as each queue can be configured differently.

Signed-off-by: Jun Yang <jun.yang@nxp.com>
---
 drivers/raw/dpaa2_qdma/dpaa2_qdma.c         | 1259 +++++++++++++--------------
 drivers/raw/dpaa2_qdma/dpaa2_qdma.h         |   39 +-
 drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h |   13 +-
 3 files changed, 670 insertions(+), 641 deletions(-)

diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
index 57194db..ae45ec3 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
@@ -26,6 +26,9 @@
 
 #define DPAA2_QDMA_NO_PREFETCH "no_prefetch"
 
+/* Dynamic log type identifier */
+int dpaa2_qdma_logtype;
+
 uint32_t dpaa2_coherent_no_alloc_cache;
 uint32_t dpaa2_coherent_alloc_cache;
 
@@ -37,31 +40,9 @@ TAILQ_HEAD(qdma_hw_queue_list, qdma_hw_queue);
 static struct qdma_hw_queue_list qdma_queue_list
 	= TAILQ_HEAD_INITIALIZER(qdma_queue_list);
 
-/* QDMA Virtual Queues */
-static struct qdma_virt_queue *qdma_vqs;
-
 /* QDMA per core data */
 static struct qdma_per_core_info qdma_core_info[RTE_MAX_LCORE];
 
-typedef int (dpdmai_dev_dequeue_multijob_t)(struct dpaa2_dpdmai_dev *dpdmai_dev,
-					    uint16_t rxq_id,
-					    uint16_t *vq_id,
-					    struct rte_qdma_job **job,
-					    uint16_t nb_jobs);
-
-dpdmai_dev_dequeue_multijob_t *dpdmai_dev_dequeue_multijob;
-
-typedef uint16_t (dpdmai_dev_get_job_t)(struct qdma_device *qdma_dev,
-					const struct qbman_fd *fd,
-					struct rte_qdma_job **job);
-typedef int (dpdmai_dev_set_fd_t)(struct qdma_device *qdma_dev,
-				  struct qbman_fd *fd,
-				  struct rte_qdma_job *job,
-				  struct rte_qdma_rbp *rbp,
-				  uint16_t vq_id);
-dpdmai_dev_get_job_t *dpdmai_dev_get_job;
-dpdmai_dev_set_fd_t *dpdmai_dev_set_fd;
-
 static inline int
 qdma_populate_fd_pci(phys_addr_t src, phys_addr_t dest,
 			uint32_t len, struct qbman_fd *fd,
@@ -114,7 +95,7 @@ qdma_populate_fd_ddr(phys_addr_t src, phys_addr_t dest,
 	/**
 	 * src If RBP=0 {NS,RDTTYPE[3:0]}: 0_1011
 	 * Coherent copy of cacheable memory,
-	 * lookup in downstream cache, no allocate
+	* lookup in downstream cache, no allocate
 	 * on miss
 	 */
 	fd->simple_ddr.rns = 0;
@@ -204,12 +185,11 @@ dpaa2_qdma_populate_fle(struct qbman_fle *fle,
 }
 
 static inline int dpdmai_dev_set_fd_us(
-				struct qdma_device *qdma_dev __rte_unused,
-				struct qbman_fd *fd,
-				struct rte_qdma_job *job,
-				struct rte_qdma_rbp *rbp,
-				uint16_t vq_id)
+		struct qdma_virt_queue *qdma_vq,
+		struct qbman_fd *fd,
+		struct rte_qdma_job *job)
 {
+	struct rte_qdma_rbp *rbp = &qdma_vq->rbp;
 	struct rte_qdma_job **ppjob;
 	size_t iova;
 	int ret = 0;
@@ -220,7 +200,7 @@ static inline int dpdmai_dev_set_fd_us(
 		iova = (size_t)job->src;
 
 	/* Set the metadata */
-	job->vq_id = vq_id;
+	job->vq_id = qdma_vq->vq_id;
 	ppjob = (struct rte_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1;
 	*ppjob = job;
 
@@ -234,15 +214,17 @@ static inline int dpdmai_dev_set_fd_us(
 					   job->len, fd);
 	return ret;
 }
-static inline int dpdmai_dev_set_fd_lf(struct qdma_device *qdma_dev,
-					struct qbman_fd *fd,
-					struct rte_qdma_job *job,
-					struct rte_qdma_rbp *rbp,
-					uint16_t vq_id)
+static inline int dpdmai_dev_set_fd_lf(
+		struct qdma_virt_queue *qdma_vq,
+		struct qbman_fd *fd,
+		struct rte_qdma_job *job)
 {
+	struct rte_qdma_rbp *rbp = &qdma_vq->rbp;
 	struct rte_qdma_job **ppjob;
 	struct qbman_fle *fle;
 	int ret = 0;
+	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
+
 	/*
 	 * Get an FLE/SDD from FLE pool.
 	 * Note: IO metadata is before the FLE and SDD memory.
@@ -254,7 +236,7 @@ static inline int dpdmai_dev_set_fd_lf(struct qdma_device *qdma_dev,
 	}
 
 	/* Set the metadata */
-	job->vq_id = vq_id;
+	job->vq_id = qdma_vq->vq_id;
 	*ppjob = job;
 
 	fle = (struct qbman_fle *)(ppjob + 1);
@@ -272,7 +254,7 @@ static inline int dpdmai_dev_set_fd_lf(struct qdma_device *qdma_dev,
 }
 
 static inline uint16_t dpdmai_dev_get_job_us(
-				struct qdma_device *qdma_dev __rte_unused,
+				struct qdma_virt_queue *qdma_vq __rte_unused,
 				const struct qbman_fd *fd,
 				struct rte_qdma_job **job)
 {
@@ -281,7 +263,7 @@ static inline uint16_t dpdmai_dev_get_job_us(
 	struct rte_qdma_job **ppjob;
 
 	if (fd->simple_pci.saddr_hi & (QDMA_RBP_UPPER_ADDRESS_MASK >> 32))
-		iova = (size_t) (((uint64_t)fd->simple_pci.daddr_hi) << 32
+		iova = (size_t)(((uint64_t)fd->simple_pci.daddr_hi) << 32
 				| (uint64_t)fd->simple_pci.daddr_lo);
 	else
 		iova = (size_t)(((uint64_t)fd->simple_pci.saddr_hi) << 32
@@ -289,18 +271,22 @@ static inline uint16_t dpdmai_dev_get_job_us(
 
 	ppjob = (struct rte_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1;
 	*job = (struct rte_qdma_job *)*ppjob;
-	(*job)->status = (fd->simple_pci.acc_err << 8) | (fd->simple_pci.error);
+	(*job)->status = (fd->simple_pci.acc_err << 8) |
+					(fd->simple_pci.error);
 	vqid = (*job)->vq_id;
 
 	return vqid;
 }
 
-static inline uint16_t dpdmai_dev_get_job_lf(struct qdma_device *qdma_dev,
-					     const struct qbman_fd *fd,
-					     struct rte_qdma_job **job)
+static inline uint16_t dpdmai_dev_get_job_lf(
+						struct qdma_virt_queue *qdma_vq,
+						const struct qbman_fd *fd,
+						struct rte_qdma_job **job)
 {
 	struct rte_qdma_job **ppjob;
 	uint16_t vqid;
+	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
+
 	/*
 	 * Fetch metadata from FLE. job and vq_id were set
 	 * in metadata in the enqueue operation.
@@ -320,342 +306,268 @@ static inline uint16_t dpdmai_dev_get_job_lf(struct qdma_device *qdma_dev,
 	return vqid;
 }
 
-static struct qdma_hw_queue *
-alloc_hw_queue(uint32_t lcore_id)
+/* Function to receive a QDMA job for a given device and queue*/
+static int
+dpdmai_dev_dequeue_multijob_prefetch(
+			struct qdma_virt_queue *qdma_vq,
+			uint16_t *vq_id,
+			struct rte_qdma_job **job,
+			uint16_t nb_jobs)
 {
-	struct qdma_hw_queue *queue = NULL;
+	struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev;
+	uint16_t rxq_id = qdma_pq->queue_id;
 
-	DPAA2_QDMA_FUNC_TRACE();
+	struct dpaa2_queue *rxq;
+	struct qbman_result *dq_storage, *dq_storage1 = NULL;
+	struct qbman_pull_desc pulldesc;
+	struct qbman_swp *swp;
+	struct queue_storage_info_t *q_storage;
+	uint32_t fqid;
+	uint8_t status, pending;
+	uint8_t num_rx = 0;
+	const struct qbman_fd *fd;
+	uint16_t vqid;
+	int ret, pull_size;
 
-	/* Get a free queue from the list */
-	TAILQ_FOREACH(queue, &qdma_queue_list, next) {
-		if (queue->num_users == 0) {
-			queue->lcore_id = lcore_id;
-			queue->num_users++;
-			break;
+	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
+		ret = dpaa2_affine_qbman_swp();
+		if (ret) {
+			DPAA2_QDMA_ERR(
+				"Failed to allocate IO portal, tid: %d\n",
+				rte_gettid());
+			return 0;
 		}
 	}
+	swp = DPAA2_PER_LCORE_PORTAL;
 
-	return queue;
-}
-
-static void
-free_hw_queue(struct qdma_hw_queue *queue)
-{
-	DPAA2_QDMA_FUNC_TRACE();
+	pull_size = (nb_jobs > dpaa2_dqrr_size) ? dpaa2_dqrr_size : nb_jobs;
+	rxq = &(dpdmai_dev->rx_queue[rxq_id]);
+	fqid = rxq->fqid;
+	q_storage = rxq->q_storage;
 
-	queue->num_users--;
-}
+	if (unlikely(!q_storage->active_dqs)) {
+		q_storage->toggle = 0;
+		dq_storage = q_storage->dq_storage[q_storage->toggle];
+		q_storage->last_num_pkts = pull_size;
+		qbman_pull_desc_clear(&pulldesc);
+		qbman_pull_desc_set_numframes(&pulldesc,
+					      q_storage->last_num_pkts);
+		qbman_pull_desc_set_fq(&pulldesc, fqid);
+		qbman_pull_desc_set_storage(&pulldesc, dq_storage,
+				(size_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
+		if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
+			while (!qbman_check_command_complete(
+			       get_swp_active_dqs(
+			       DPAA2_PER_LCORE_DPIO->index)))
+				;
+			clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
+		}
+		while (1) {
+			if (qbman_swp_pull(swp, &pulldesc)) {
+				DPAA2_QDMA_DP_WARN(
+					"VDQ command not issued.QBMAN busy\n");
+					/* Portal was busy, try again */
+				continue;
+			}
+			break;
+		}
+		q_storage->active_dqs = dq_storage;
+		q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
+		set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index,
+				   dq_storage);
+	}
 
+	dq_storage = q_storage->active_dqs;
+	rte_prefetch0((void *)(size_t)(dq_storage));
+	rte_prefetch0((void *)(size_t)(dq_storage + 1));
 
-static struct qdma_hw_queue *
-get_hw_queue(struct qdma_device *qdma_dev, uint32_t lcore_id)
-{
-	struct qdma_per_core_info *core_info;
-	struct qdma_hw_queue *queue, *temp;
-	uint32_t least_num_users;
-	int num_hw_queues, i;
+	/* Prepare next pull descriptor. This will give space for the
+	 * prefething done on DQRR entries
+	 */
+	q_storage->toggle ^= 1;
+	dq_storage1 = q_storage->dq_storage[q_storage->toggle];
+	qbman_pull_desc_clear(&pulldesc);
+	qbman_pull_desc_set_numframes(&pulldesc, pull_size);
+	qbman_pull_desc_set_fq(&pulldesc, fqid);
+	qbman_pull_desc_set_storage(&pulldesc, dq_storage1,
+		(size_t)(DPAA2_VADDR_TO_IOVA(dq_storage1)), 1);
 
-	DPAA2_QDMA_FUNC_TRACE();
+	/* Check if the previous issued command is completed.
+	 * Also seems like the SWP is shared between the Ethernet Driver
+	 * and the SEC driver.
+	 */
+	while (!qbman_check_command_complete(dq_storage))
+		;
+	if (dq_storage == get_swp_active_dqs(q_storage->active_dpio_id))
+		clear_swp_active_dqs(q_storage->active_dpio_id);
 
-	core_info = &qdma_core_info[lcore_id];
-	num_hw_queues = core_info->num_hw_queues;
+	pending = 1;
 
-	/*
-	 * Allocate a HW queue if there are less queues
-	 * than maximum per core queues configured
-	 */
-	if (num_hw_queues < qdma_dev->max_hw_queues_per_core) {
-		queue = alloc_hw_queue(lcore_id);
-		if (queue) {
-			core_info->hw_queues[num_hw_queues] = queue;
-			core_info->num_hw_queues++;
-			return queue;
+	do {
+		/* Loop until the dq_storage is updated with
+		 * new token by QBMAN
+		 */
+		while (!qbman_check_new_result(dq_storage))
+			;
+		rte_prefetch0((void *)((size_t)(dq_storage + 2)));
+		/* Check whether Last Pull command is Expired and
+		 * setting Condition for Loop termination
+		 */
+		if (qbman_result_DQ_is_pull_complete(dq_storage)) {
+			pending = 0;
+			/* Check for valid frame. */
+			status = qbman_result_DQ_flags(dq_storage);
+			if (unlikely((status & QBMAN_DQ_STAT_VALIDFRAME) == 0))
+				continue;
 		}
-	}
+		fd = qbman_result_DQ_fd(dq_storage);
 
-	queue = core_info->hw_queues[0];
-	/* In case there is no queue associated with the core return NULL */
-	if (!queue)
-		return NULL;
+		vqid = qdma_vq->get_job(qdma_vq, fd, &job[num_rx]);
+		if (vq_id)
+			vq_id[num_rx] = vqid;
 
-	/* Fetch the least loaded H/W queue */
-	least_num_users = core_info->hw_queues[0]->num_users;
-	for (i = 0; i < num_hw_queues; i++) {
-		temp = core_info->hw_queues[i];
-		if (temp->num_users < least_num_users)
-			queue = temp;
+		dq_storage++;
+		num_rx++;
+	} while (pending);
+
+	if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
+		while (!qbman_check_command_complete(
+		       get_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)))
+			;
+		clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
+	}
+	/* issue a volatile dequeue command for next pull */
+	while (1) {
+		if (qbman_swp_pull(swp, &pulldesc)) {
+			DPAA2_QDMA_DP_WARN(
+				"VDQ command is not issued. QBMAN is busy (2)\n");
+			continue;
+		}
+		break;
 	}
 
-	if (queue)
-		queue->num_users++;
+	q_storage->active_dqs = dq_storage1;
+	q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
+	set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index, dq_storage1);
 
-	return queue;
+	return num_rx;
 }
 
-static void
-put_hw_queue(struct qdma_hw_queue *queue)
+static int
+dpdmai_dev_dequeue_multijob_no_prefetch(
+		struct qdma_virt_queue *qdma_vq,
+		uint16_t *vq_id,
+		struct rte_qdma_job **job,
+		uint16_t nb_jobs)
 {
-	struct qdma_per_core_info *core_info;
-	int lcore_id, num_hw_queues, i;
-
-	DPAA2_QDMA_FUNC_TRACE();
+	struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev;
+	uint16_t rxq_id = qdma_pq->queue_id;
 
-	/*
-	 * If this is the last user of the queue free it.
-	 * Also remove it from QDMA core info.
-	 */
-	if (queue->num_users == 1) {
-		free_hw_queue(queue);
+	struct dpaa2_queue *rxq;
+	struct qbman_result *dq_storage;
+	struct qbman_pull_desc pulldesc;
+	struct qbman_swp *swp;
+	uint32_t fqid;
+	uint8_t status, pending;
+	uint8_t num_rx = 0;
+	const struct qbman_fd *fd;
+	uint16_t vqid;
+	int ret, next_pull = nb_jobs, num_pulled = 0;
 
-		/* Remove the physical queue from core info */
-		lcore_id = queue->lcore_id;
-		core_info = &qdma_core_info[lcore_id];
-		num_hw_queues = core_info->num_hw_queues;
-		for (i = 0; i < num_hw_queues; i++) {
-			if (queue == core_info->hw_queues[i])
-				break;
+	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
+		ret = dpaa2_affine_qbman_swp();
+		if (ret) {
+			DPAA2_QDMA_ERR(
+				"Failed to allocate IO portal, tid: %d\n",
+				rte_gettid());
+			return 0;
 		}
-		for (; i < num_hw_queues - 1; i++)
-			core_info->hw_queues[i] = core_info->hw_queues[i + 1];
-		core_info->hw_queues[i] = NULL;
-	} else {
-		queue->num_users--;
 	}
-}
-
-static int
-dpaa2_qdma_attr_get(struct rte_rawdev *rawdev,
-		    __rte_unused const char *attr_name,
-		    uint64_t *attr_value)
-{
-	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-	struct rte_qdma_attr *qdma_attr = (struct rte_qdma_attr *)attr_value;
-
-	DPAA2_QDMA_FUNC_TRACE();
+	swp = DPAA2_PER_LCORE_PORTAL;
 
-	qdma_attr->num_hw_queues = qdma_dev->num_hw_queues;
+	rxq = &(dpdmai_dev->rx_queue[rxq_id]);
+	fqid = rxq->fqid;
 
-	return 0;
-}
+	do {
+		dq_storage = rxq->q_storage->dq_storage[0];
+		/* Prepare dequeue descriptor */
+		qbman_pull_desc_clear(&pulldesc);
+		qbman_pull_desc_set_fq(&pulldesc, fqid);
+		qbman_pull_desc_set_storage(&pulldesc, dq_storage,
+			(uint64_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
 
-static int
-dpaa2_qdma_reset(struct rte_rawdev *rawdev)
-{
-	struct qdma_hw_queue *queue;
-	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-	int i;
-
-	DPAA2_QDMA_FUNC_TRACE();
-
-	/* In case QDMA device is not in stopped state, return -EBUSY */
-	if (qdma_dev->state == 1) {
-		DPAA2_QDMA_ERR(
-			"Device is in running state. Stop before reset.");
-		return -EBUSY;
-	}
-
-	/* In case there are pending jobs on any VQ, return -EBUSY */
-	for (i = 0; i < qdma_dev->max_vqs; i++) {
-		if (qdma_vqs[i].in_use && (qdma_vqs[i].num_enqueues !=
-		    qdma_vqs[i].num_dequeues))
-			DPAA2_QDMA_ERR("Jobs are still pending on VQ: %d", i);
-			return -EBUSY;
-	}
-
-	/* Reset HW queues */
-	TAILQ_FOREACH(queue, &qdma_queue_list, next)
-		queue->num_users = 0;
-
-	/* Reset and free virtual queues */
-	for (i = 0; i < qdma_dev->max_vqs; i++) {
-		if (qdma_vqs[i].status_ring)
-			rte_ring_free(qdma_vqs[i].status_ring);
-	}
-	if (qdma_vqs)
-		rte_free(qdma_vqs);
-	qdma_vqs = NULL;
-
-	/* Reset per core info */
-	memset(&qdma_core_info, 0,
-		sizeof(struct qdma_per_core_info) * RTE_MAX_LCORE);
-
-	/* Free the FLE pool */
-	if (qdma_dev->fle_pool)
-		rte_mempool_free(qdma_dev->fle_pool);
-
-	/* Reset QDMA device structure */
-	qdma_dev->mode = RTE_QDMA_MODE_HW;
-	qdma_dev->max_hw_queues_per_core = 0;
-	qdma_dev->fle_pool = NULL;
-	qdma_dev->fle_pool_count = 0;
-	qdma_dev->max_vqs = 0;
-
-	return 0;
-}
-
-static int
-dpaa2_qdma_configure(const struct rte_rawdev *rawdev,
-			 rte_rawdev_obj_t config)
-{
-	char fle_pool_name[32]; /* RTE_MEMZONE_NAMESIZE = 32 */
-	struct rte_qdma_config *qdma_config = (struct rte_qdma_config *)config;
-	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-
-	DPAA2_QDMA_FUNC_TRACE();
-
-	/* In case QDMA device is not in stopped state, return -EBUSY */
-	if (qdma_dev->state == 1) {
-		DPAA2_QDMA_ERR(
-			"Device is in running state. Stop before config.");
-		return -1;
-	}
-
-	/* Set mode */
-	qdma_dev->mode = qdma_config->mode;
-
-	/* Set max HW queue per core */
-	if (qdma_config->max_hw_queues_per_core > MAX_HW_QUEUE_PER_CORE) {
-		DPAA2_QDMA_ERR("H/W queues per core is more than: %d",
-			       MAX_HW_QUEUE_PER_CORE);
-		return -EINVAL;
-	}
-	qdma_dev->max_hw_queues_per_core =
-		qdma_config->max_hw_queues_per_core;
-
-	/* Allocate Virtual Queues */
-	qdma_vqs = rte_malloc("qdma_virtual_queues",
-			(sizeof(struct qdma_virt_queue) * qdma_config->max_vqs),
-			RTE_CACHE_LINE_SIZE);
-	if (!qdma_vqs) {
-		DPAA2_QDMA_ERR("qdma_virtual_queues allocation failed");
-		return -ENOMEM;
-	}
-	qdma_dev->max_vqs = qdma_config->max_vqs;
-
-	/* Allocate FLE pool; just append PID so that in case of
-	 * multiprocess, the pool's don't collide.
-	 */
-	snprintf(fle_pool_name, sizeof(fle_pool_name), "qdma_fle_pool%u",
-		 getpid());
-	qdma_dev->fle_pool = rte_mempool_create(fle_pool_name,
-			qdma_config->fle_pool_count, QDMA_FLE_POOL_SIZE,
-			QDMA_FLE_CACHE_SIZE(qdma_config->fle_pool_count), 0,
-			NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
-	if (!qdma_dev->fle_pool) {
-		DPAA2_QDMA_ERR("qdma_fle_pool create failed");
-		rte_free(qdma_vqs);
-		qdma_vqs = NULL;
-		return -ENOMEM;
-	}
-	qdma_dev->fle_pool_count = qdma_config->fle_pool_count;
-
-	if (qdma_config->format == RTE_QDMA_ULTRASHORT_FORMAT) {
-		dpdmai_dev_get_job = dpdmai_dev_get_job_us;
-		dpdmai_dev_set_fd = dpdmai_dev_set_fd_us;
-	} else {
-		dpdmai_dev_get_job = dpdmai_dev_get_job_lf;
-		dpdmai_dev_set_fd = dpdmai_dev_set_fd_lf;
-	}
-	return 0;
-}
-
-static int
-dpaa2_qdma_start(struct rte_rawdev *rawdev)
-{
-	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-
-	DPAA2_QDMA_FUNC_TRACE();
-
-	qdma_dev->state = 1;
-
-	return 0;
-}
-
-static int
-dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
-			  __rte_unused uint16_t queue_id,
-			  rte_rawdev_obj_t queue_conf)
-{
-	char ring_name[32];
-	int i;
-	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-	struct rte_qdma_queue_config *q_config =
-		(struct rte_qdma_queue_config *)queue_conf;
-
-	DPAA2_QDMA_FUNC_TRACE();
-
-	rte_spinlock_lock(&qdma_dev->lock);
+		if (next_pull > dpaa2_dqrr_size) {
+			qbman_pull_desc_set_numframes(&pulldesc,
+					dpaa2_dqrr_size);
+			next_pull -= dpaa2_dqrr_size;
+		} else {
+			qbman_pull_desc_set_numframes(&pulldesc, next_pull);
+			next_pull = 0;
+		}
 
-	/* Get a free Virtual Queue */
-	for (i = 0; i < qdma_dev->max_vqs; i++) {
-		if (qdma_vqs[i].in_use == 0)
+		while (1) {
+			if (qbman_swp_pull(swp, &pulldesc)) {
+				DPAA2_QDMA_DP_WARN(
+					"VDQ command not issued. QBMAN busy");
+				/* Portal was busy, try again */
+				continue;
+			}
 			break;
-	}
+		}
 
-	/* Return in case no VQ is free */
-	if (i == qdma_dev->max_vqs) {
-		rte_spinlock_unlock(&qdma_dev->lock);
-		DPAA2_QDMA_ERR("Unable to get lock on QDMA device");
-		return -ENODEV;
-	}
+		rte_prefetch0((void *)((size_t)(dq_storage + 1)));
+		/* Check if the previous issued command is completed. */
+		while (!qbman_check_command_complete(dq_storage))
+			;
 
-	if (qdma_dev->mode == RTE_QDMA_MODE_HW ||
-			(q_config->flags & RTE_QDMA_VQ_EXCLUSIVE_PQ)) {
-		/* Allocate HW queue for a VQ */
-		qdma_vqs[i].hw_queue = alloc_hw_queue(q_config->lcore_id);
-		qdma_vqs[i].exclusive_hw_queue = 1;
-	} else {
-		/* Allocate a Ring for Virutal Queue in VQ mode */
-		snprintf(ring_name, sizeof(ring_name), "status ring %d", i);
-		qdma_vqs[i].status_ring = rte_ring_create(ring_name,
-			qdma_dev->fle_pool_count, rte_socket_id(), 0);
-		if (!qdma_vqs[i].status_ring) {
-			DPAA2_QDMA_ERR("Status ring creation failed for vq");
-			rte_spinlock_unlock(&qdma_dev->lock);
-			return rte_errno;
-		}
+		num_pulled = 0;
+		pending = 1;
 
-		/* Get a HW queue (shared) for a VQ */
-		qdma_vqs[i].hw_queue = get_hw_queue(qdma_dev,
-						    q_config->lcore_id);
-		qdma_vqs[i].exclusive_hw_queue = 0;
-	}
+		do {
+			/* Loop until dq_storage is updated
+			 * with new token by QBMAN
+			 */
+			while (!qbman_check_new_result(dq_storage))
+				;
+			rte_prefetch0((void *)((size_t)(dq_storage + 2)));
 
-	if (qdma_vqs[i].hw_queue == NULL) {
-		DPAA2_QDMA_ERR("No H/W queue available for VQ");
-		if (qdma_vqs[i].status_ring)
-			rte_ring_free(qdma_vqs[i].status_ring);
-		qdma_vqs[i].status_ring = NULL;
-		rte_spinlock_unlock(&qdma_dev->lock);
-		return -ENODEV;
-	}
+			if (qbman_result_DQ_is_pull_complete(dq_storage)) {
+				pending = 0;
+				/* Check for valid frame. */
+				status = qbman_result_DQ_flags(dq_storage);
+				if (unlikely((status &
+					QBMAN_DQ_STAT_VALIDFRAME) == 0))
+					continue;
+			}
+			fd = qbman_result_DQ_fd(dq_storage);
 
-	qdma_vqs[i].in_use = 1;
-	qdma_vqs[i].lcore_id = q_config->lcore_id;
-	memset(&qdma_vqs[i].rbp, 0, sizeof(struct rte_qdma_rbp));
-	rte_spinlock_unlock(&qdma_dev->lock);
+			vqid = qdma_vq->get_job(qdma_vq, fd, &job[num_rx]);
+			if (vq_id)
+				vq_id[num_rx] = vqid;
 
-	if (q_config->rbp != NULL)
-		memcpy(&qdma_vqs[i].rbp, q_config->rbp,
-		       sizeof(struct rte_qdma_rbp));
+			dq_storage++;
+			num_rx++;
+			num_pulled++;
 
-	return i;
+		} while (pending);
+	/* Last VDQ provided all packets and more packets are requested */
+	} while (next_pull && num_pulled == dpaa2_dqrr_size);
+
+	return num_rx;
 }
 
 static int
-dpdmai_dev_enqueue_multi(struct dpaa2_dpdmai_dev *dpdmai_dev,
-			uint16_t txq_id,
-			uint16_t vq_id,
-			struct rte_qdma_rbp *rbp,
+dpdmai_dev_enqueue_multi(
+			struct qdma_virt_queue *qdma_vq,
 			struct rte_qdma_job **job,
 			uint16_t nb_jobs)
 {
+	struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev;
+	uint16_t txq_id = qdma_pq->queue_id;
+
 	struct qbman_fd fd[RTE_QDMA_BURST_NB_MAX];
 	struct dpaa2_queue *txq;
 	struct qbman_eq_desc eqdesc;
@@ -692,8 +604,7 @@ dpdmai_dev_enqueue_multi(struct dpaa2_dpdmai_dev *dpdmai_dev,
 			dpaa2_eqcr_size : nb_jobs;
 
 		for (loop = 0; loop < num_to_send; loop++) {
-			ret = dpdmai_dev_set_fd(dpdmai_dev->qdma_dev, &fd[loop],
-						job[num_tx], rbp, vq_id);
+			ret = qdma_vq->set_fd(qdma_vq, &fd[loop], job[num_tx]);
 			if (ret < 0) {
 				/* Set nb_jobs to loop, so outer while loop
 				 * breaks out.
@@ -707,6 +618,7 @@ dpdmai_dev_enqueue_multi(struct dpaa2_dpdmai_dev *dpdmai_dev,
 
 		/* Enqueue the packet to the QBMAN */
 		uint32_t enqueue_loop = 0, retry_count = 0;
+
 		while (enqueue_loop < loop) {
 			ret = qbman_swp_enqueue_multiple(swp,
 						&eqdesc,
@@ -727,299 +639,426 @@ dpdmai_dev_enqueue_multi(struct dpaa2_dpdmai_dev *dpdmai_dev,
 	return num_tx;
 }
 
-static int
-dpaa2_qdma_enqueue(__rte_unused struct rte_rawdev *rawdev,
-		  __rte_unused struct rte_rawdev_buf **buffers,
-		  unsigned int nb_jobs,
-		  rte_rawdev_obj_t context)
+static struct qdma_hw_queue *
+alloc_hw_queue(uint32_t lcore_id)
 {
-	struct rte_qdma_enqdeq *e_context = (struct rte_qdma_enqdeq *)context;
-	struct qdma_virt_queue *qdma_vq = &qdma_vqs[e_context->vq_id];
-	struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue;
-	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev;
-	int ret;
+	struct qdma_hw_queue *queue = NULL;
 
-	/* Return error in case of wrong lcore_id */
-	if (rte_lcore_id() != qdma_vq->lcore_id) {
-		DPAA2_QDMA_ERR("QDMA enqueue for vqid %d on wrong core",
-				e_context->vq_id);
-		return -EINVAL;
-	}
+	DPAA2_QDMA_FUNC_TRACE();
 
-	ret = dpdmai_dev_enqueue_multi(dpdmai_dev,
-				 qdma_pq->queue_id,
-				 e_context->vq_id,
-				 &qdma_vq->rbp,
-				 e_context->job,
-				 nb_jobs);
-	if (ret < 0) {
-		DPAA2_QDMA_ERR("DPDMAI device enqueue failed: %d", ret);
-		return ret;
+	/* Get a free queue from the list */
+	TAILQ_FOREACH(queue, &qdma_queue_list, next) {
+		if (queue->num_users == 0) {
+			queue->lcore_id = lcore_id;
+			queue->num_users++;
+			break;
+		}
 	}
 
-	qdma_vq->num_enqueues += ret;
+	return queue;
+}
 
-	return ret;
+static void
+free_hw_queue(struct qdma_hw_queue *queue)
+{
+	DPAA2_QDMA_FUNC_TRACE();
+
+	queue->num_users--;
 }
 
-/* Function to receive a QDMA job for a given device and queue*/
-static int
-dpdmai_dev_dequeue_multijob_prefetch(
-			struct dpaa2_dpdmai_dev *dpdmai_dev,
-			uint16_t rxq_id,
-			uint16_t *vq_id,
-			struct rte_qdma_job **job,
-			uint16_t nb_jobs)
+
+static struct qdma_hw_queue *
+get_hw_queue(struct qdma_device *qdma_dev, uint32_t lcore_id)
 {
-	struct dpaa2_queue *rxq;
-	struct qbman_result *dq_storage, *dq_storage1 = NULL;
-	struct qbman_pull_desc pulldesc;
-	struct qbman_swp *swp;
-	struct queue_storage_info_t *q_storage;
-	uint32_t fqid;
-	uint8_t status, pending;
-	uint8_t num_rx = 0;
-	const struct qbman_fd *fd;
-	uint16_t vqid;
-	int ret, pull_size;
+	struct qdma_per_core_info *core_info;
+	struct qdma_hw_queue *queue, *temp;
+	uint32_t least_num_users;
+	int num_hw_queues, i;
 
-	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
-		ret = dpaa2_affine_qbman_swp();
-		if (ret) {
-			DPAA2_QDMA_ERR(
-				"Failed to allocate IO portal, tid: %d\n",
-				rte_gettid());
-			return 0;
+	DPAA2_QDMA_FUNC_TRACE();
+
+	core_info = &qdma_core_info[lcore_id];
+	num_hw_queues = core_info->num_hw_queues;
+
+	/*
+	 * Allocate a HW queue if there are less queues
+	 * than maximum per core queues configured
+	 */
+	if (num_hw_queues < qdma_dev->max_hw_queues_per_core) {
+		queue = alloc_hw_queue(lcore_id);
+		if (queue) {
+			core_info->hw_queues[num_hw_queues] = queue;
+			core_info->num_hw_queues++;
+			return queue;
 		}
 	}
-	swp = DPAA2_PER_LCORE_PORTAL;
 
-	pull_size = (nb_jobs > dpaa2_dqrr_size) ? dpaa2_dqrr_size : nb_jobs;
-	rxq = &(dpdmai_dev->rx_queue[rxq_id]);
-	fqid = rxq->fqid;
-	q_storage = rxq->q_storage;
+	queue = core_info->hw_queues[0];
+	/* In case there is no queue associated with the core return NULL */
+	if (!queue)
+		return NULL;
 
-	if (unlikely(!q_storage->active_dqs)) {
-		q_storage->toggle = 0;
-		dq_storage = q_storage->dq_storage[q_storage->toggle];
-		q_storage->last_num_pkts = pull_size;
-		qbman_pull_desc_clear(&pulldesc);
-		qbman_pull_desc_set_numframes(&pulldesc,
-					      q_storage->last_num_pkts);
-		qbman_pull_desc_set_fq(&pulldesc, fqid);
-		qbman_pull_desc_set_storage(&pulldesc, dq_storage,
-				(size_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
-		if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
-			while (!qbman_check_command_complete(
-			       get_swp_active_dqs(
-			       DPAA2_PER_LCORE_DPIO->index)))
-				;
-			clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
+	/* Fetch the least loaded H/W queue */
+	least_num_users = core_info->hw_queues[0]->num_users;
+	for (i = 0; i < num_hw_queues; i++) {
+		temp = core_info->hw_queues[i];
+		if (temp->num_users < least_num_users)
+			queue = temp;
+	}
+
+	if (queue)
+		queue->num_users++;
+
+	return queue;
+}
+
+static void
+put_hw_queue(struct qdma_hw_queue *queue)
+{
+	struct qdma_per_core_info *core_info;
+	int lcore_id, num_hw_queues, i;
+
+	DPAA2_QDMA_FUNC_TRACE();
+
+	/*
+	 * If this is the last user of the queue free it.
+	 * Also remove it from QDMA core info.
+	 */
+	if (queue->num_users == 1) {
+		free_hw_queue(queue);
+
+		/* Remove the physical queue from core info */
+		lcore_id = queue->lcore_id;
+		core_info = &qdma_core_info[lcore_id];
+		num_hw_queues = core_info->num_hw_queues;
+		for (i = 0; i < num_hw_queues; i++) {
+			if (queue == core_info->hw_queues[i])
+				break;
 		}
-		while (1) {
-			if (qbman_swp_pull(swp, &pulldesc)) {
-				DPAA2_QDMA_DP_WARN(
-					"VDQ command not issued.QBMAN busy\n");
-					/* Portal was busy, try again */
-				continue;
-			}
-			break;
+		for (; i < num_hw_queues - 1; i++)
+			core_info->hw_queues[i] = core_info->hw_queues[i + 1];
+		core_info->hw_queues[i] = NULL;
+	} else {
+		queue->num_users--;
+	}
+}
+
+static int
+dpaa2_qdma_attr_get(struct rte_rawdev *rawdev,
+		    __rte_unused const char *attr_name,
+		    uint64_t *attr_value)
+{
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct rte_qdma_attr *qdma_attr = (struct rte_qdma_attr *)attr_value;
+
+	DPAA2_QDMA_FUNC_TRACE();
+
+	qdma_attr->num_hw_queues = qdma_dev->num_hw_queues;
+
+	return 0;
+}
+
+static int
+dpaa2_qdma_reset(struct rte_rawdev *rawdev)
+{
+	struct qdma_hw_queue *queue;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	int i;
+
+	DPAA2_QDMA_FUNC_TRACE();
+
+	/* In case QDMA device is not in stopped state, return -EBUSY */
+	if (qdma_dev->state == 1) {
+		DPAA2_QDMA_ERR(
+			"Device is in running state. Stop before reset.");
+		return -EBUSY;
+	}
+
+	/* In case there are pending jobs on any VQ, return -EBUSY */
+	for (i = 0; i < qdma_dev->max_vqs; i++) {
+		if (qdma_dev->vqs[i].in_use && (qdma_dev->vqs[i].num_enqueues !=
+		    qdma_dev->vqs[i].num_dequeues)) {
+			DPAA2_QDMA_ERR("Jobs are still pending on VQ: %d", i);
+			return -EBUSY;
 		}
-		q_storage->active_dqs = dq_storage;
-		q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
-		set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index,
-				   dq_storage);
 	}
 
-	dq_storage = q_storage->active_dqs;
-	rte_prefetch0((void *)(size_t)(dq_storage));
-	rte_prefetch0((void *)(size_t)(dq_storage + 1));
+	/* Reset HW queues */
+	TAILQ_FOREACH(queue, &qdma_queue_list, next)
+		queue->num_users = 0;
 
-	/* Prepare next pull descriptor. This will give space for the
-	 * prefething done on DQRR entries
-	 */
-	q_storage->toggle ^= 1;
-	dq_storage1 = q_storage->dq_storage[q_storage->toggle];
-	qbman_pull_desc_clear(&pulldesc);
-	qbman_pull_desc_set_numframes(&pulldesc, pull_size);
-	qbman_pull_desc_set_fq(&pulldesc, fqid);
-	qbman_pull_desc_set_storage(&pulldesc, dq_storage1,
-		(size_t)(DPAA2_VADDR_TO_IOVA(dq_storage1)), 1);
+	/* Reset and free virtual queues */
+	for (i = 0; i < qdma_dev->max_vqs; i++) {
+		if (qdma_dev->vqs[i].status_ring)
+			rte_ring_free(qdma_dev->vqs[i].status_ring);
+	}
+	if (qdma_dev->vqs)
+		rte_free(qdma_dev->vqs);
+	qdma_dev->vqs = NULL;
 
-	/* Check if the previous issued command is completed.
-	 * Also seems like the SWP is shared between the Ethernet Driver
-	 * and the SEC driver.
+	/* Reset per core info */
+	memset(&qdma_core_info, 0,
+		sizeof(struct qdma_per_core_info) * RTE_MAX_LCORE);
+
+	/* Free the FLE pool */
+	if (qdma_dev->fle_pool)
+		rte_mempool_free(qdma_dev->fle_pool);
+
+	/* Reset QDMA device structure */
+	qdma_dev->max_hw_queues_per_core = 0;
+	qdma_dev->fle_pool = NULL;
+	qdma_dev->fle_pool_count = 0;
+	qdma_dev->max_vqs = 0;
+
+	return 0;
+}
+
+static int
+dpaa2_qdma_configure(const struct rte_rawdev *rawdev,
+			 rte_rawdev_obj_t config)
+{
+	char name[32]; /* RTE_MEMZONE_NAMESIZE = 32 */
+	struct rte_qdma_config *qdma_config = (struct rte_qdma_config *)config;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+
+	DPAA2_QDMA_FUNC_TRACE();
+
+	/* In case QDMA device is not in stopped state, return -EBUSY */
+	if (qdma_dev->state == 1) {
+		DPAA2_QDMA_ERR(
+			"Device is in running state. Stop before config.");
+		return -1;
+	}
+
+	/* Set max HW queue per core */
+	if (qdma_config->max_hw_queues_per_core > MAX_HW_QUEUE_PER_CORE) {
+		DPAA2_QDMA_ERR("H/W queues per core is more than: %d",
+			       MAX_HW_QUEUE_PER_CORE);
+		return -EINVAL;
+	}
+	qdma_dev->max_hw_queues_per_core =
+		qdma_config->max_hw_queues_per_core;
+
+	/* Allocate Virtual Queues */
+	sprintf(name, "qdma_%d_vq", rawdev->dev_id);
+	qdma_dev->vqs = rte_malloc(name,
+			(sizeof(struct qdma_virt_queue) * qdma_config->max_vqs),
+			RTE_CACHE_LINE_SIZE);
+	if (!qdma_dev->vqs) {
+		DPAA2_QDMA_ERR("qdma_virtual_queues allocation failed");
+		return -ENOMEM;
+	}
+	qdma_dev->max_vqs = qdma_config->max_vqs;
+
+	/* Allocate FLE pool; just append PID so that in case of
+	 * multiprocess, the pool's don't collide.
 	 */
-	while (!qbman_check_command_complete(dq_storage))
-		;
-	if (dq_storage == get_swp_active_dqs(q_storage->active_dpio_id))
-		clear_swp_active_dqs(q_storage->active_dpio_id);
+	snprintf(name, sizeof(name), "qdma_fle_pool%u",
+		 getpid());
+	qdma_dev->fle_pool = rte_mempool_create(name,
+			qdma_config->fle_pool_count, QDMA_FLE_POOL_SIZE,
+			QDMA_FLE_CACHE_SIZE(qdma_config->fle_pool_count), 0,
+			NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
+	if (!qdma_dev->fle_pool) {
+		DPAA2_QDMA_ERR("qdma_fle_pool create failed");
+		rte_free(qdma_dev->vqs);
+		qdma_dev->vqs = NULL;
+		return -ENOMEM;
+	}
+	qdma_dev->fle_pool_count = qdma_config->fle_pool_count;
 
-	pending = 1;
+	return 0;
+}
 
-	do {
-		/* Loop until the dq_storage is updated with
-		 * new token by QBMAN
-		 */
-		while (!qbman_check_new_result(dq_storage))
-			;
-		rte_prefetch0((void *)((size_t)(dq_storage + 2)));
-		/* Check whether Last Pull command is Expired and
-		 * setting Condition for Loop termination
-		 */
-		if (qbman_result_DQ_is_pull_complete(dq_storage)) {
-			pending = 0;
-			/* Check for valid frame. */
-			status = qbman_result_DQ_flags(dq_storage);
-			if (unlikely((status & QBMAN_DQ_STAT_VALIDFRAME) == 0))
-				continue;
-		}
-		fd = qbman_result_DQ_fd(dq_storage);
+static int
+dpaa2_qdma_start(struct rte_rawdev *rawdev)
+{
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+
+	DPAA2_QDMA_FUNC_TRACE();
 
-		vqid = dpdmai_dev_get_job(dpdmai_dev->qdma_dev, fd,
-					  &job[num_rx]);
-		if (vq_id)
-			vq_id[num_rx] = vqid;
+	qdma_dev->state = 1;
 
-		dq_storage++;
-		num_rx++;
-	} while (pending);
+	return 0;
+}
 
-	if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
-		while (!qbman_check_command_complete(
-		       get_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)))
-			;
-		clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
-	}
-	/* issue a volatile dequeue command for next pull */
-	while (1) {
-		if (qbman_swp_pull(swp, &pulldesc)) {
-			DPAA2_QDMA_DP_WARN("VDQ command is not issued."
-					  "QBMAN is busy (2)\n");
-			continue;
-		}
-		break;
+static int
+check_devargs_handler(__rte_unused const char *key, const char *value,
+		      __rte_unused void *opaque)
+{
+	if (strcmp(value, "1"))
+		return -1;
+
+	return 0;
+}
+
+static int
+dpaa2_get_devargs(struct rte_devargs *devargs, const char *key)
+{
+	struct rte_kvargs *kvlist;
+
+	if (!devargs)
+		return 0;
+
+	kvlist = rte_kvargs_parse(devargs->args, NULL);
+	if (!kvlist)
+		return 0;
+
+	if (!rte_kvargs_count(kvlist, key)) {
+		rte_kvargs_free(kvlist);
+		return 0;
 	}
 
-	q_storage->active_dqs = dq_storage1;
-	q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
-	set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index, dq_storage1);
+	if (rte_kvargs_process(kvlist, key,
+			       check_devargs_handler, NULL) < 0) {
+		rte_kvargs_free(kvlist);
+		return 0;
+	}
+	rte_kvargs_free(kvlist);
 
-	return num_rx;
+	return 1;
 }
 
 static int
-dpdmai_dev_dequeue_multijob_no_prefetch(
-		struct dpaa2_dpdmai_dev *dpdmai_dev,
-		uint16_t rxq_id,
-		uint16_t *vq_id,
-		struct rte_qdma_job **job,
-		uint16_t nb_jobs)
+dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
+			  __rte_unused uint16_t queue_id,
+			  rte_rawdev_obj_t queue_conf)
 {
-	struct dpaa2_queue *rxq;
-	struct qbman_result *dq_storage;
-	struct qbman_pull_desc pulldesc;
-	struct qbman_swp *swp;
-	uint32_t fqid;
-	uint8_t status, pending;
-	uint8_t num_rx = 0;
-	const struct qbman_fd *fd;
-	uint16_t vqid;
-	int ret, next_pull = nb_jobs, num_pulled = 0;
+	char ring_name[32];
+	int i;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct rte_qdma_queue_config *q_config =
+		(struct rte_qdma_queue_config *)queue_conf;
 
-	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
-		ret = dpaa2_affine_qbman_swp();
-		if (ret) {
-			DPAA2_QDMA_ERR(
-				"Failed to allocate IO portal, tid: %d\n",
-				rte_gettid());
-			return 0;
+	DPAA2_QDMA_FUNC_TRACE();
+
+	rte_spinlock_lock(&qdma_dev->lock);
+
+	/* Get a free Virtual Queue */
+	for (i = 0; i < qdma_dev->max_vqs; i++) {
+		if (qdma_dev->vqs[i].in_use == 0)
+			break;
+	}
+
+	/* Return in case no VQ is free */
+	if (i == qdma_dev->max_vqs) {
+		rte_spinlock_unlock(&qdma_dev->lock);
+		DPAA2_QDMA_ERR("Unable to get lock on QDMA device");
+		return -ENODEV;
+	}
+
+	if (q_config->flags & RTE_QDMA_VQ_EXCLUSIVE_PQ) {
+		/* Allocate HW queue for a VQ */
+		qdma_dev->vqs[i].hw_queue = alloc_hw_queue(q_config->lcore_id);
+		qdma_dev->vqs[i].exclusive_hw_queue = 1;
+	} else {
+		/* Allocate a Ring for Virtual Queue in VQ mode */
+		snprintf(ring_name, sizeof(ring_name), "status ring %d", i);
+		qdma_dev->vqs[i].status_ring = rte_ring_create(ring_name,
+			qdma_dev->fle_pool_count, rte_socket_id(), 0);
+		if (!qdma_dev->vqs[i].status_ring) {
+			DPAA2_QDMA_ERR("Status ring creation failed for vq");
+			rte_spinlock_unlock(&qdma_dev->lock);
+			return rte_errno;
 		}
+
+		/* Get a HW queue (shared) for a VQ */
+		qdma_dev->vqs[i].hw_queue = get_hw_queue(qdma_dev,
+						    q_config->lcore_id);
+		qdma_dev->vqs[i].exclusive_hw_queue = 0;
 	}
-	swp = DPAA2_PER_LCORE_PORTAL;
 
-	rxq = &(dpdmai_dev->rx_queue[rxq_id]);
-	fqid = rxq->fqid;
+	if (qdma_dev->vqs[i].hw_queue == NULL) {
+		DPAA2_QDMA_ERR("No H/W queue available for VQ");
+		if (qdma_dev->vqs[i].status_ring)
+			rte_ring_free(qdma_dev->vqs[i].status_ring);
+		qdma_dev->vqs[i].status_ring = NULL;
+		rte_spinlock_unlock(&qdma_dev->lock);
+		return -ENODEV;
+	}
 
-	do {
-		dq_storage = rxq->q_storage->dq_storage[0];
-		/* Prepare dequeue descriptor */
-		qbman_pull_desc_clear(&pulldesc);
-		qbman_pull_desc_set_fq(&pulldesc, fqid);
-		qbman_pull_desc_set_storage(&pulldesc, dq_storage,
-			(uint64_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
+	qdma_dev->vqs[i].in_use = 1;
+	qdma_dev->vqs[i].lcore_id = q_config->lcore_id;
+	memset(&qdma_dev->vqs[i].rbp, 0, sizeof(struct rte_qdma_rbp));
 
-		if (next_pull > dpaa2_dqrr_size) {
-			qbman_pull_desc_set_numframes(&pulldesc,
-					dpaa2_dqrr_size);
-			next_pull -= dpaa2_dqrr_size;
-		} else {
-			qbman_pull_desc_set_numframes(&pulldesc, next_pull);
-			next_pull = 0;
-		}
+	if (q_config->flags & RTE_QDMA_VQ_FD_LONG_FORMAT) {
+		qdma_dev->vqs[i].set_fd = dpdmai_dev_set_fd_lf;
+		qdma_dev->vqs[i].get_job = dpdmai_dev_get_job_lf;
+	} else {
+		qdma_dev->vqs[i].set_fd = dpdmai_dev_set_fd_us;
+		qdma_dev->vqs[i].get_job = dpdmai_dev_get_job_us;
+	}
+	if (dpaa2_get_devargs(rawdev->device->devargs,
+			DPAA2_QDMA_NO_PREFETCH) ||
+			(getenv("DPAA2_NO_QDMA_PREFETCH_RX"))) {
+		/* If no prefetch is configured. */
+		qdma_dev->vqs[i].dequeue_job =
+				dpdmai_dev_dequeue_multijob_no_prefetch;
+		DPAA2_QDMA_INFO("No Prefetch RX Mode enabled");
+	} else {
+		qdma_dev->vqs[i].dequeue_job =
+			dpdmai_dev_dequeue_multijob_prefetch;
+	}
 
-		while (1) {
-			if (qbman_swp_pull(swp, &pulldesc)) {
-				DPAA2_QDMA_DP_WARN("VDQ command not issued. QBMAN busy");
-				/* Portal was busy, try again */
-				continue;
-			}
-			break;
-		}
+	qdma_dev->vqs[i].enqueue_job = dpdmai_dev_enqueue_multi;
 
-		rte_prefetch0((void *)((size_t)(dq_storage + 1)));
-		/* Check if the previous issued command is completed. */
-		while (!qbman_check_command_complete(dq_storage))
-			;
+	if (q_config->rbp != NULL)
+		memcpy(&qdma_dev->vqs[i].rbp, q_config->rbp,
+				sizeof(struct rte_qdma_rbp));
 
-		num_pulled = 0;
-		pending = 1;
+	rte_spinlock_unlock(&qdma_dev->lock);
 
-		do {
-			/* Loop until dq_storage is updated
-			 * with new token by QBMAN
-			 */
-			while (!qbman_check_new_result(dq_storage))
-				;
-			rte_prefetch0((void *)((size_t)(dq_storage + 2)));
+	return i;
+}
 
-			if (qbman_result_DQ_is_pull_complete(dq_storage)) {
-				pending = 0;
-				/* Check for valid frame. */
-				status = qbman_result_DQ_flags(dq_storage);
-				if (unlikely((status &
-					QBMAN_DQ_STAT_VALIDFRAME) == 0))
-					continue;
-			}
-			fd = qbman_result_DQ_fd(dq_storage);
+static int
+dpaa2_qdma_enqueue(struct rte_rawdev *rawdev,
+		  __rte_unused struct rte_rawdev_buf **buffers,
+		  unsigned int nb_jobs,
+		  rte_rawdev_obj_t context)
+{
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct rte_qdma_enqdeq *e_context =
+		(struct rte_qdma_enqdeq *)context;
+	struct qdma_virt_queue *qdma_vq =
+		&dpdmai_dev->qdma_dev->vqs[e_context->vq_id];
+	int ret;
 
-			vqid = dpdmai_dev_get_job(dpdmai_dev->qdma_dev, fd,
-						  &job[num_rx]);
-			if (vq_id)
-				vq_id[num_rx] = vqid;
+	/* Return error in case of wrong lcore_id */
+	if (rte_lcore_id() != qdma_vq->lcore_id) {
+		DPAA2_QDMA_ERR("QDMA enqueue for vqid %d on wrong core",
+				e_context->vq_id);
+		return -EINVAL;
+	}
 
-			dq_storage++;
-			num_rx++;
-			num_pulled++;
+	ret = qdma_vq->enqueue_job(qdma_vq, e_context->job, nb_jobs);
+	if (ret < 0) {
+		DPAA2_QDMA_ERR("DPDMAI device enqueue failed: %d", ret);
+		return ret;
+	}
 
-		} while (pending);
-	/* Last VDQ provided all packets and more packets are requested */
-	} while (next_pull && num_pulled == dpaa2_dqrr_size);
+	qdma_vq->num_enqueues += ret;
 
-	return num_rx;
+	return ret;
 }
 
 static int
-dpaa2_qdma_dequeue(__rte_unused struct rte_rawdev *rawdev,
+dpaa2_qdma_dequeue(struct rte_rawdev *rawdev,
 		   __rte_unused struct rte_rawdev_buf **buffers,
 		   unsigned int nb_jobs,
 		   rte_rawdev_obj_t cntxt)
 {
-	struct rte_qdma_enqdeq *context = (struct rte_qdma_enqdeq *)cntxt;
-	struct qdma_virt_queue *qdma_vq = &qdma_vqs[context->vq_id];
-	struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct rte_qdma_enqdeq *context =
+		(struct rte_qdma_enqdeq *)cntxt;
+	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[context->vq_id];
 	struct qdma_virt_queue *temp_qdma_vq;
-	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev;
 	int ret = 0, i;
 	unsigned int ring_count;
 
@@ -1035,12 +1074,12 @@ dpaa2_qdma_dequeue(__rte_unused struct rte_rawdev *rawdev,
 		return 0;
 
 	if (qdma_vq->num_enqueues < (qdma_vq->num_dequeues + nb_jobs))
-		nb_jobs = (qdma_vq->num_enqueues -  qdma_vq->num_dequeues);
+		nb_jobs = (qdma_vq->num_enqueues - qdma_vq->num_dequeues);
 
 	if (qdma_vq->exclusive_hw_queue) {
 		/* In case of exclusive queue directly fetch from HW queue */
-		ret = dpdmai_dev_dequeue_multijob(dpdmai_dev, qdma_pq->queue_id,
-					 NULL, context->job, nb_jobs);
+		ret = qdma_vq->dequeue_job(qdma_vq, NULL,
+					context->job, nb_jobs);
 		if (ret < 0) {
 			DPAA2_QDMA_ERR(
 				"Dequeue from DPDMAI device failed: %d", ret);
@@ -1057,11 +1096,10 @@ dpaa2_qdma_dequeue(__rte_unused struct rte_rawdev *rawdev,
 		ring_count = rte_ring_count(qdma_vq->status_ring);
 		if (ring_count < nb_jobs) {
 			/* TODO - How to have right budget */
-			ret = dpdmai_dev_dequeue_multijob(dpdmai_dev,
-					qdma_pq->queue_id,
+			ret = qdma_vq->dequeue_job(qdma_vq,
 					temp_vq_id, context->job, nb_jobs);
 			for (i = 0; i < ret; i++) {
-				temp_qdma_vq = &qdma_vqs[temp_vq_id[i]];
+				temp_qdma_vq = &qdma_dev->vqs[temp_vq_id[i]];
 				rte_ring_enqueue(temp_qdma_vq->status_ring,
 					(void *)(context->job[i]));
 			}
@@ -1085,10 +1123,13 @@ dpaa2_qdma_dequeue(__rte_unused struct rte_rawdev *rawdev,
 }
 
 void
-rte_qdma_vq_stats(uint16_t vq_id,
-		  struct rte_qdma_vq_stats *vq_status)
+rte_qdma_vq_stats(struct rte_rawdev *rawdev,
+		uint16_t vq_id,
+		struct rte_qdma_vq_stats *vq_status)
 {
-	struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id];
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vq_id];
 
 	if (qdma_vq->in_use) {
 		vq_status->exclusive_hw_queue = qdma_vq->exclusive_hw_queue;
@@ -1107,7 +1148,7 @@ dpaa2_qdma_queue_release(struct rte_rawdev *rawdev,
 	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
 	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
 
-	struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id];
+	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vq_id];
 
 	DPAA2_QDMA_FUNC_TRACE();
 
@@ -1120,8 +1161,8 @@ dpaa2_qdma_queue_release(struct rte_rawdev *rawdev,
 	if (qdma_vq->exclusive_hw_queue)
 		free_hw_queue(qdma_vq->hw_queue);
 	else {
-		if (qdma_vqs->status_ring)
-			rte_ring_free(qdma_vqs->status_ring);
+		if (qdma_vq->status_ring)
+			rte_ring_free(qdma_vq->status_ring);
 
 		put_hw_queue(qdma_vq->hw_queue);
 	}
@@ -1245,43 +1286,6 @@ dpaa2_dpdmai_dev_uninit(struct rte_rawdev *rawdev)
 }
 
 static int
-check_devargs_handler(__rte_unused const char *key, const char *value,
-		      __rte_unused void *opaque)
-{
-	if (strcmp(value, "1"))
-		return -1;
-
-	return 0;
-}
-
-static int
-dpaa2_get_devargs(struct rte_devargs *devargs, const char *key)
-{
-	struct rte_kvargs *kvlist;
-
-	if (!devargs)
-		return 0;
-
-	kvlist = rte_kvargs_parse(devargs->args, NULL);
-	if (!kvlist)
-		return 0;
-
-	if (!rte_kvargs_count(kvlist, key)) {
-		rte_kvargs_free(kvlist);
-		return 0;
-	}
-
-	if (rte_kvargs_process(kvlist, key,
-			       check_devargs_handler, NULL) < 0) {
-		rte_kvargs_free(kvlist);
-		return 0;
-	}
-	rte_kvargs_free(kvlist);
-
-	return 1;
-}
-
-static int
 dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id)
 {
 	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
@@ -1384,17 +1388,6 @@ dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id)
 		goto init_err;
 	}
 
-	if (dpaa2_get_devargs(rawdev->device->devargs,
-		DPAA2_QDMA_NO_PREFETCH)) {
-		/* If no prefetch is configured. */
-		dpdmai_dev_dequeue_multijob =
-				dpdmai_dev_dequeue_multijob_no_prefetch;
-		DPAA2_QDMA_INFO("No Prefetch RX Mode enabled");
-	} else {
-		dpdmai_dev_dequeue_multijob =
-			dpdmai_dev_dequeue_multijob_prefetch;
-	}
-
 	if (!dpaa2_coherent_no_alloc_cache) {
 		if (dpaa2_svr_family == SVR_LX2160A) {
 			dpaa2_coherent_no_alloc_cache =
diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
index 3c112d2..4265ee8 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
@@ -52,10 +52,11 @@ struct qdma_device {
 	 * This is limited by MAX_HW_QUEUE_PER_CORE
 	 */
 	uint16_t max_hw_queues_per_core;
+
+	/** VQ's of this device */
+	struct qdma_virt_queue *vqs;
 	/** Maximum number of VQ's */
 	uint16_t max_vqs;
-	/** mode of operation - physical(h/w) or virtual */
-	uint8_t mode;
 	/** Device state - started or stopped */
 	uint8_t state;
 	/** FLE pool for the device */
@@ -80,6 +81,26 @@ struct qdma_hw_queue {
 	uint32_t num_users;
 };
 
+struct qdma_virt_queue;
+
+typedef uint16_t (qdma_get_job_t)(struct qdma_virt_queue *qdma_vq,
+					const struct qbman_fd *fd,
+					struct rte_qdma_job **job);
+typedef int (qdma_set_fd_t)(struct qdma_virt_queue *qdma_vq,
+					struct qbman_fd *fd,
+					struct rte_qdma_job *job);
+
+typedef int (qdma_dequeue_multijob_t)(
+				struct qdma_virt_queue *qdma_vq,
+				uint16_t *vq_id,
+				struct rte_qdma_job **job,
+				uint16_t nb_jobs);
+
+typedef int (qdma_enqueue_multijob_t)(
+			struct qdma_virt_queue *qdma_vq,
+			struct rte_qdma_job **job,
+			uint16_t nb_jobs);
+
 /** Represents a QDMA virtual queue */
 struct qdma_virt_queue {
 	/** Status ring of the virtual queue */
@@ -98,6 +119,14 @@ struct qdma_virt_queue {
 	uint64_t num_enqueues;
 	/* Total number of dequeues from this VQ */
 	uint64_t num_dequeues;
+
+	uint16_t vq_id;
+
+	qdma_set_fd_t *set_fd;
+	qdma_get_job_t *get_job;
+
+	qdma_dequeue_multijob_t *dequeue_job;
+	qdma_enqueue_multijob_t *enqueue_job;
 };
 
 /** Represents a QDMA per core hw queues allocation in virtual mode */
@@ -176,4 +205,10 @@ struct dpaa2_dpdmai_dev {
 	struct qdma_device *qdma_dev;
 };
 
+static inline struct qdma_device *
+QDMA_DEV_OF_VQ(struct qdma_virt_queue *vq)
+{
+	return vq->hw_queue->dpdmai_dev->qdma_dev;
+}
+
 #endif /* __DPAA2_QDMA_H__ */
diff --git a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
index 71894d3..ff4fc1d 100644
--- a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
@@ -50,6 +50,8 @@ enum {
  */
 #define RTE_QDMA_VQ_EXCLUSIVE_PQ	(1ULL)
 
+#define RTE_QDMA_VQ_FD_LONG_FORMAT		(1ULL << 1)
+
 /** States if the source addresses is physical. */
 #define RTE_QDMA_JOB_SRC_PHY		(1ULL)
 
@@ -68,10 +70,6 @@ struct rte_qdma_config {
 	uint16_t max_hw_queues_per_core;
 	/** Maximum number of VQ's to be used. */
 	uint16_t max_vqs;
-	/** mode of operation - physical(h/w) or virtual */
-	uint8_t mode;
-	/** FD format */
-	uint8_t format;
 	/**
 	 * User provides this as input to the driver as a size of the FLE pool.
 	 * FLE's (and corresponding source/destination descriptors) are
@@ -182,13 +180,16 @@ struct rte_qdma_queue_config {
 /**
  * Get a Virtual Queue statistics.
  *
+ * @param rawdev
+ *   Raw Device.
  * @param vq_id
  *   Virtual Queue ID.
  * @param vq_stats
  *   VQ statistics structure which will be filled in by the driver.
  */
 void
-rte_qdma_vq_stats(uint16_t vq_id,
-		  struct rte_qdma_vq_stats *vq_stats);
+rte_qdma_vq_stats(struct rte_rawdev *rawdev,
+		uint16_t vq_id,
+		struct rte_qdma_vq_stats *vq_stats);
 
 #endif /* __RTE_PMD_DPAA2_QDMA_H__*/
-- 
2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH 4/7] raw/dpaa2_qdma: optimize IOVA conversion
  2020-09-07  9:25 [dpdk-dev] [PATCH 0/7] raw/dpaa2_qdma: driver enhancement Gagandeep Singh
                   ` (2 preceding siblings ...)
  2020-09-07  9:26 ` [dpdk-dev] [PATCH 3/7] raw/dpaa2_qdma: refactor the code Gagandeep Singh
@ 2020-09-07  9:26 ` Gagandeep Singh
  2020-09-07  9:26 ` [dpdk-dev] [PATCH 5/7] raw/dpaa2_qdma: support scatter gather in enqueue Gagandeep Singh
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 23+ messages in thread
From: Gagandeep Singh @ 2020-09-07  9:26 UTC (permalink / raw)
  To: dev, nipun.gupta, hemant.agrawal; +Cc: thomas.monjalon, Jun Yang

From: Jun Yang <jun.yang@nxp.com>

rte_mempool_virt2iova is now used for converting with IOVA off.

Signed-off-by: Jun Yang <jun.yang@nxp.com>
---
 drivers/raw/dpaa2_qdma/dpaa2_qdma.c | 29 +++++++++++++++++++++++------
 drivers/raw/dpaa2_qdma/dpaa2_qdma.h |  5 +++++
 2 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
index ae45ec3..6b4d080 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
@@ -221,7 +221,9 @@ static inline int dpdmai_dev_set_fd_lf(
 {
 	struct rte_qdma_rbp *rbp = &qdma_vq->rbp;
 	struct rte_qdma_job **ppjob;
+	void *elem;
 	struct qbman_fle *fle;
+	uint64_t elem_iova, fle_iova;
 	int ret = 0;
 	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
 
@@ -229,19 +231,29 @@ static inline int dpdmai_dev_set_fd_lf(
 	 * Get an FLE/SDD from FLE pool.
 	 * Note: IO metadata is before the FLE and SDD memory.
 	 */
-	ret = rte_mempool_get(qdma_dev->fle_pool, (void **)(&ppjob));
+	ret = rte_mempool_get(qdma_dev->fle_pool, (void **)(&elem));
 	if (ret) {
 		DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
 		return ret;
 	}
 
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+	elem_iova = rte_mempool_virt2iova(elem);
+#else
+	elem_iova = DPAA2_VADDR_TO_IOVA(elem);
+#endif
+
 	/* Set the metadata */
 	job->vq_id = qdma_vq->vq_id;
+	ppjob = (struct rte_qdma_job **)
+			((uint64_t)elem + QDMA_FLE_JOB_OFFSET);
 	*ppjob = job;
 
-	fle = (struct qbman_fle *)(ppjob + 1);
+	fle = (struct qbman_fle *)
+			((uint64_t)elem + QDMA_FLE_FLE_OFFSET);
+	fle_iova = elem_iova + QDMA_FLE_FLE_OFFSET;
 
-	DPAA2_SET_FD_ADDR(fd, DPAA2_VADDR_TO_IOVA(fle));
+	DPAA2_SET_FD_ADDR(fd, fle_iova);
 	DPAA2_SET_FD_COMPOUND_FMT(fd);
 	DPAA2_SET_FD_FRC(fd, QDMA_SER_CTX);
 
@@ -283,6 +295,8 @@ static inline uint16_t dpdmai_dev_get_job_lf(
 						const struct qbman_fd *fd,
 						struct rte_qdma_job **job)
 {
+	void *elem;
+	struct qbman_fle *fle;
 	struct rte_qdma_job **ppjob;
 	uint16_t vqid;
 	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
@@ -291,9 +305,12 @@ static inline uint16_t dpdmai_dev_get_job_lf(
 	 * Fetch metadata from FLE. job and vq_id were set
 	 * in metadata in the enqueue operation.
 	 */
-	ppjob = (struct rte_qdma_job **)
+	fle = (struct qbman_fle *)
 			DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd));
-	ppjob -= 1;
+	elem = (void *)((uint64_t)fle - QDMA_FLE_FLE_OFFSET);
+
+	ppjob = (struct rte_qdma_job **)
+			((uint64_t)elem + QDMA_FLE_JOB_OFFSET);
 
 	*job = (struct rte_qdma_job *)*ppjob;
 	(*job)->status = (DPAA2_GET_FD_ERR(fd) << 8) |
@@ -301,7 +318,7 @@ static inline uint16_t dpdmai_dev_get_job_lf(
 	vqid = (*job)->vq_id;
 
 	/* Free FLE to the pool */
-	rte_mempool_put(qdma_dev->fle_pool, (void *)ppjob);
+	rte_mempool_put(qdma_dev->fle_pool, elem);
 
 	return vqid;
 }
diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
index 4265ee8..ff7743f 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
@@ -17,6 +17,11 @@ struct rte_qdma_job;
 #define QDMA_FLE_POOL_SIZE (sizeof(struct rte_qdma_job *) + \
 		sizeof(struct qbman_fle) * DPAA2_QDMA_MAX_FLE + \
 		sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD)
+
+#define QDMA_FLE_JOB_OFFSET 0
+#define QDMA_FLE_FLE_OFFSET \
+		(QDMA_FLE_JOB_OFFSET + sizeof(struct rte_qdma_job *))
+
 /** FLE pool cache size */
 #define QDMA_FLE_CACHE_SIZE(_num) (_num/(RTE_MAX_LCORE * 2))
 
-- 
2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH 5/7] raw/dpaa2_qdma: support scatter gather in enqueue
  2020-09-07  9:25 [dpdk-dev] [PATCH 0/7] raw/dpaa2_qdma: driver enhancement Gagandeep Singh
                   ` (3 preceding siblings ...)
  2020-09-07  9:26 ` [dpdk-dev] [PATCH 4/7] raw/dpaa2_qdma: optimize IOVA conversion Gagandeep Singh
@ 2020-09-07  9:26 ` Gagandeep Singh
  2020-09-07  9:26 ` [dpdk-dev] [PATCH 6/7] raw/dpaa2_qdma: support FLE pool per queue Gagandeep Singh
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 23+ messages in thread
From: Gagandeep Singh @ 2020-09-07  9:26 UTC (permalink / raw)
  To: dev, nipun.gupta, hemant.agrawal; +Cc: thomas.monjalon, Jun Yang

From: Jun Yang <jun.yang@nxp.com>

This patch add support to add Scatter Gather support
for different jobs for qdma queues.
It also supports gathering  multiple enqueue jobs into SG enqueue job(s).

Signed-off-by: Jun Yang <jun.yang@nxp.com>
---
 drivers/bus/fslmc/portal/dpaa2_hw_pvt.h     |  18 +-
 drivers/raw/dpaa2_qdma/dpaa2_qdma.c         | 397 +++++++++++++++++++++++-----
 drivers/raw/dpaa2_qdma/dpaa2_qdma.h         |  75 +++++-
 drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h |   2 +
 4 files changed, 411 insertions(+), 81 deletions(-)

diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
index 35423df..e540759 100644
--- a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
+++ b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
@@ -210,12 +210,28 @@ struct dpaa2_dpcon_dev {
 };
 
 /* Refer to Table 7-3 in SEC BG */
+#define QBMAN_FLE_WORD4_FMT_SBF 0x0    /* Single buffer frame */
+#define QBMAN_FLE_WORD4_FMT_SGE 0x2 /* Scatter gather frame */
+
+struct qbman_fle_word4 {
+	uint32_t bpid:14; /* Frame buffer pool ID */
+	uint32_t ivp:1; /* Invalid Pool ID. */
+	uint32_t bmt:1; /* Bypass Memory Translation */
+	uint32_t offset:12; /* Frame offset */
+	uint32_t fmt:2; /* Frame Format */
+	uint32_t sl:1; /* Short Length */
+	uint32_t f:1; /* Final bit */
+};
+
 struct qbman_fle {
 	uint32_t addr_lo;
 	uint32_t addr_hi;
 	uint32_t length;
 	/* FMT must be 00, MSB is final bit  */
-	uint32_t fin_bpid_offset;
+	union {
+		uint32_t fin_bpid_offset;
+		struct qbman_fle_word4 word4;
+	};
 	uint32_t frc;
 	uint32_t reserved[3]; /* Not used currently */
 };
diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
index 6b4d080..0c56a04 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
@@ -116,17 +116,21 @@ qdma_populate_fd_ddr(phys_addr_t src, phys_addr_t dest,
 
 static void
 dpaa2_qdma_populate_fle(struct qbman_fle *fle,
+			uint64_t fle_iova,
 			struct rte_qdma_rbp *rbp,
 			uint64_t src, uint64_t dest,
-			size_t len, uint32_t flags)
+			size_t len, uint32_t flags, uint32_t fmt)
 {
 	struct qdma_sdd *sdd;
+	uint64_t sdd_iova;
 
-	sdd = (struct qdma_sdd *)((uint8_t *)(fle) +
-		(DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle)));
+	sdd = (struct qdma_sdd *)
+			((uint64_t)fle - QDMA_FLE_FLE_OFFSET +
+			QDMA_FLE_SDD_OFFSET);
+	sdd_iova = fle_iova - QDMA_FLE_FLE_OFFSET + QDMA_FLE_SDD_OFFSET;
 
 	/* first frame list to source descriptor */
-	DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(sdd));
+	DPAA2_SET_FLE_ADDR(fle, sdd_iova);
 	DPAA2_SET_FLE_LEN(fle, (2 * (sizeof(struct qdma_sdd))));
 
 	/* source and destination descriptor */
@@ -164,20 +168,26 @@ dpaa2_qdma_populate_fle(struct qbman_fle *fle,
 	/* source frame list to source buffer */
 	if (flags & RTE_QDMA_JOB_SRC_PHY) {
 		DPAA2_SET_FLE_ADDR(fle, src);
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
 		DPAA2_SET_FLE_BMT(fle);
+#endif
 	} else {
 		DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(src));
 	}
+	fle->word4.fmt = fmt;
 	DPAA2_SET_FLE_LEN(fle, len);
 
 	fle++;
 	/* destination frame list to destination buffer */
 	if (flags & RTE_QDMA_JOB_DEST_PHY) {
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
 		DPAA2_SET_FLE_BMT(fle);
+#endif
 		DPAA2_SET_FLE_ADDR(fle, dest);
 	} else {
 		DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(dest));
 	}
+	fle->word4.fmt = fmt;
 	DPAA2_SET_FLE_LEN(fle, len);
 
 	/* Final bit: 1, for last frame list */
@@ -187,44 +197,169 @@ dpaa2_qdma_populate_fle(struct qbman_fle *fle,
 static inline int dpdmai_dev_set_fd_us(
 		struct qdma_virt_queue *qdma_vq,
 		struct qbman_fd *fd,
-		struct rte_qdma_job *job)
+		struct rte_qdma_job **job,
+		uint16_t nb_jobs)
 {
 	struct rte_qdma_rbp *rbp = &qdma_vq->rbp;
 	struct rte_qdma_job **ppjob;
 	size_t iova;
-	int ret = 0;
+	int ret = 0, loop;
+
+	for (loop = 0; loop < nb_jobs; loop++) {
+		if (job[loop]->src & QDMA_RBP_UPPER_ADDRESS_MASK)
+			iova = (size_t)job[loop]->dest;
+		else
+			iova = (size_t)job[loop]->src;
+
+		/* Set the metadata */
+		job[loop]->vq_id = qdma_vq->vq_id;
+		ppjob = (struct rte_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1;
+		*ppjob = job[loop];
+
+		if ((rbp->drbp == 1) || (rbp->srbp == 1))
+			ret = qdma_populate_fd_pci((phys_addr_t)job[loop]->src,
+						(phys_addr_t)job[loop]->dest,
+						job[loop]->len, &fd[loop], rbp);
+		else
+			ret = qdma_populate_fd_ddr((phys_addr_t)job[loop]->src,
+						(phys_addr_t)job[loop]->dest,
+						job[loop]->len, &fd[loop]);
+	}
 
-	if (job->src & QDMA_RBP_UPPER_ADDRESS_MASK)
-		iova = (size_t)job->dest;
-	else
-		iova = (size_t)job->src;
+	return ret;
+}
 
-	/* Set the metadata */
-	job->vq_id = qdma_vq->vq_id;
-	ppjob = (struct rte_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1;
-	*ppjob = job;
+static uint32_t qdma_populate_sg_entry(
+		struct rte_qdma_job **jobs,
+		struct qdma_sg_entry *src_sge,
+		struct qdma_sg_entry *dst_sge,
+		uint16_t nb_jobs)
+{
+	uint16_t i;
+	uint32_t total_len = 0;
+	uint64_t iova;
+
+	for (i = 0; i < nb_jobs; i++) {
+		/* source SG */
+		if (likely(jobs[i]->flags & RTE_QDMA_JOB_SRC_PHY)) {
+			src_sge->addr_lo = (uint32_t)jobs[i]->src;
+			src_sge->addr_hi = (jobs[i]->src >> 32);
+		} else {
+			iova = DPAA2_VADDR_TO_IOVA(jobs[i]->src);
+			src_sge->addr_lo = (uint32_t)iova;
+			src_sge->addr_hi = iova >> 32;
+		}
+		src_sge->data_len.data_len_sl0 = jobs[i]->len;
+		src_sge->ctrl.sl = QDMA_SG_SL_LONG;
+		src_sge->ctrl.fmt = QDMA_SG_FMT_SDB;
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+		src_sge->ctrl.bmt = QDMA_SG_BMT_ENABLE;
+#else
+		src_sge->ctrl.bmt = QDMA_SG_BMT_DISABLE;
+#endif
+		/* destination SG */
+		if (likely(jobs[i]->flags & RTE_QDMA_JOB_DEST_PHY)) {
+			dst_sge->addr_lo = (uint32_t)jobs[i]->dest;
+			dst_sge->addr_hi = (jobs[i]->dest >> 32);
+		} else {
+			iova = DPAA2_VADDR_TO_IOVA(jobs[i]->dest);
+			dst_sge->addr_lo = (uint32_t)iova;
+			dst_sge->addr_hi = iova >> 32;
+		}
+		dst_sge->data_len.data_len_sl0 = jobs[i]->len;
+		dst_sge->ctrl.sl = QDMA_SG_SL_LONG;
+		dst_sge->ctrl.fmt = QDMA_SG_FMT_SDB;
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+		dst_sge->ctrl.bmt = QDMA_SG_BMT_ENABLE;
+#else
+		dst_sge->ctrl.bmt = QDMA_SG_BMT_DISABLE;
+#endif
+		total_len += jobs[i]->len;
 
-	if ((rbp->drbp == 1) || (rbp->srbp == 1))
-		ret = qdma_populate_fd_pci((phys_addr_t) job->src,
-					   (phys_addr_t) job->dest,
-					   job->len, fd, rbp);
-	else
-		ret = qdma_populate_fd_ddr((phys_addr_t) job->src,
-					   (phys_addr_t) job->dest,
-					   job->len, fd);
-	return ret;
+		if (i == (nb_jobs - 1)) {
+			src_sge->ctrl.f = QDMA_SG_F;
+			dst_sge->ctrl.f = QDMA_SG_F;
+		} else {
+			src_sge->ctrl.f = 0;
+			dst_sge->ctrl.f = 0;
+		}
+		src_sge++;
+		dst_sge++;
+	}
+
+	return total_len;
 }
-static inline int dpdmai_dev_set_fd_lf(
+
+static inline int dpdmai_dev_set_multi_fd_lf(
 		struct qdma_virt_queue *qdma_vq,
 		struct qbman_fd *fd,
-		struct rte_qdma_job *job)
+		struct rte_qdma_job **job,
+		uint16_t nb_jobs)
 {
 	struct rte_qdma_rbp *rbp = &qdma_vq->rbp;
 	struct rte_qdma_job **ppjob;
-	void *elem;
+	uint16_t i;
+	int ret;
+	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
+	void *elem[RTE_QDMA_BURST_NB_MAX];
 	struct qbman_fle *fle;
 	uint64_t elem_iova, fle_iova;
-	int ret = 0;
+
+	ret = rte_mempool_get_bulk(qdma_dev->fle_pool, elem, nb_jobs);
+	if (ret) {
+		DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
+		return ret;
+	}
+
+	for (i = 0; i < nb_jobs; i++) {
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+		elem_iova = rte_mempool_virt2iova(elem[i]);
+#else
+		elem_iova = DPAA2_VADDR_TO_IOVA(elem[i]);
+#endif
+
+		*((uint16_t *)
+		((uint64_t)elem[i] + QDMA_FLE_JOB_NB_OFFSET)) = 1;
+
+		ppjob = (struct rte_qdma_job **)
+				((uint64_t)elem[i] + QDMA_FLE_JOBS_OFFSET);
+		*ppjob = job[i];
+
+		job[i]->vq_id = qdma_vq->vq_id;
+
+		fle = (struct qbman_fle *)
+				((uint64_t)elem[i] + QDMA_FLE_FLE_OFFSET);
+		fle_iova = elem_iova + QDMA_FLE_FLE_OFFSET;
+
+		DPAA2_SET_FD_ADDR(&fd[i], fle_iova);
+		DPAA2_SET_FD_COMPOUND_FMT(&fd[i]);
+		DPAA2_SET_FD_FRC(&fd[i], QDMA_SER_CTX);
+
+		memset(fle, 0, DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle) +
+			DPAA2_QDMA_MAX_SDD * sizeof(struct qdma_sdd));
+
+		dpaa2_qdma_populate_fle(fle, fle_iova, rbp,
+				job[i]->src, job[i]->dest, job[i]->len,
+				job[i]->flags, QBMAN_FLE_WORD4_FMT_SBF);
+	}
+
+	return 0;
+}
+
+static inline int dpdmai_dev_set_sg_fd_lf(
+		struct qdma_virt_queue *qdma_vq,
+		struct qbman_fd *fd,
+		struct rte_qdma_job **job,
+		uint16_t nb_jobs)
+{
+	struct rte_qdma_rbp *rbp = &qdma_vq->rbp;
+	struct rte_qdma_job **ppjob;
+	void *elem;
+	struct qbman_fle *fle;
+	uint64_t elem_iova, fle_iova, src, dst;
+	int ret = 0, i;
+	struct qdma_sg_entry *src_sge, *dst_sge;
+	uint32_t len, fmt, flags;
 	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
 
 	/*
@@ -244,10 +379,14 @@ static inline int dpdmai_dev_set_fd_lf(
 #endif
 
 	/* Set the metadata */
-	job->vq_id = qdma_vq->vq_id;
+	/* Save job context. */
+	*((uint16_t *)((uint64_t)elem + QDMA_FLE_JOB_NB_OFFSET)) = nb_jobs;
 	ppjob = (struct rte_qdma_job **)
-			((uint64_t)elem + QDMA_FLE_JOB_OFFSET);
-	*ppjob = job;
+			((uint64_t)elem + QDMA_FLE_JOBS_OFFSET);
+	for (i = 0; i < nb_jobs; i++)
+		ppjob[i] = job[i];
+
+	ppjob[0]->vq_id = qdma_vq->vq_id;
 
 	fle = (struct qbman_fle *)
 			((uint64_t)elem + QDMA_FLE_FLE_OFFSET);
@@ -258,9 +397,29 @@ static inline int dpdmai_dev_set_fd_lf(
 	DPAA2_SET_FD_FRC(fd, QDMA_SER_CTX);
 
 	/* Populate FLE */
-	memset(fle, 0, QDMA_FLE_POOL_SIZE);
-	dpaa2_qdma_populate_fle(fle, rbp, job->src, job->dest,
-				job->len, job->flags);
+	if (likely(nb_jobs > 1)) {
+		src_sge = (struct qdma_sg_entry *)
+				((uint64_t)elem + QDMA_FLE_SG_ENTRY_OFFSET);
+		dst_sge = src_sge + DPAA2_QDMA_MAX_SG_NB;
+		src = elem_iova + QDMA_FLE_SG_ENTRY_OFFSET;
+		dst = src +
+			DPAA2_QDMA_MAX_SG_NB * sizeof(struct qdma_sg_entry);
+		len = qdma_populate_sg_entry(job, src_sge, dst_sge, nb_jobs);
+		fmt = QBMAN_FLE_WORD4_FMT_SGE;
+		flags = RTE_QDMA_JOB_SRC_PHY | RTE_QDMA_JOB_DEST_PHY;
+	} else {
+		src = job[0]->src;
+		dst = job[0]->dest;
+		len = job[0]->len;
+		fmt = QBMAN_FLE_WORD4_FMT_SBF;
+		flags = job[0]->flags;
+	}
+
+	memset(fle, 0, DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle) +
+			DPAA2_QDMA_MAX_SDD * sizeof(struct qdma_sdd));
+
+	dpaa2_qdma_populate_fle(fle, fle_iova, rbp,
+					src, dst, len, flags, fmt);
 
 	return 0;
 }
@@ -268,7 +427,7 @@ static inline int dpdmai_dev_set_fd_lf(
 static inline uint16_t dpdmai_dev_get_job_us(
 				struct qdma_virt_queue *qdma_vq __rte_unused,
 				const struct qbman_fd *fd,
-				struct rte_qdma_job **job)
+				struct rte_qdma_job **job, uint16_t *nb_jobs)
 {
 	uint16_t vqid;
 	size_t iova;
@@ -286,6 +445,7 @@ static inline uint16_t dpdmai_dev_get_job_us(
 	(*job)->status = (fd->simple_pci.acc_err << 8) |
 					(fd->simple_pci.error);
 	vqid = (*job)->vq_id;
+	*nb_jobs = 1;
 
 	return vqid;
 }
@@ -293,12 +453,12 @@ static inline uint16_t dpdmai_dev_get_job_us(
 static inline uint16_t dpdmai_dev_get_job_lf(
 						struct qdma_virt_queue *qdma_vq,
 						const struct qbman_fd *fd,
-						struct rte_qdma_job **job)
+						struct rte_qdma_job **job,
+						uint16_t *nb_jobs)
 {
-	void *elem;
 	struct qbman_fle *fle;
-	struct rte_qdma_job **ppjob;
-	uint16_t vqid;
+	struct rte_qdma_job **ppjob = NULL;
+	uint16_t i, status;
 	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
 
 	/*
@@ -307,20 +467,24 @@ static inline uint16_t dpdmai_dev_get_job_lf(
 	 */
 	fle = (struct qbman_fle *)
 			DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd));
-	elem = (void *)((uint64_t)fle - QDMA_FLE_FLE_OFFSET);
 
-	ppjob = (struct rte_qdma_job **)
-			((uint64_t)elem + QDMA_FLE_JOB_OFFSET);
+	*nb_jobs = *((uint16_t *)((uint64_t)fle -
+				QDMA_FLE_FLE_OFFSET + QDMA_FLE_JOB_NB_OFFSET));
+	status = (DPAA2_GET_FD_ERR(fd) << 8) | (DPAA2_GET_FD_FRC(fd) & 0xFF);
 
-	*job = (struct rte_qdma_job *)*ppjob;
-	(*job)->status = (DPAA2_GET_FD_ERR(fd) << 8) |
-			 (DPAA2_GET_FD_FRC(fd) & 0xFF);
-	vqid = (*job)->vq_id;
+	ppjob = (struct rte_qdma_job **)((uint64_t)fle -
+				QDMA_FLE_FLE_OFFSET + QDMA_FLE_JOBS_OFFSET);
+
+	for (i = 0; i < (*nb_jobs); i++) {
+		job[i] = ppjob[i];
+		job[i]->status = status;
+	}
 
 	/* Free FLE to the pool */
-	rte_mempool_put(qdma_dev->fle_pool, elem);
+	rte_mempool_put(qdma_dev->fle_pool,
+				(void *)((uint64_t)fle - QDMA_FLE_FLE_OFFSET));
 
-	return vqid;
+	return job[0]->vq_id;
 }
 
 /* Function to receive a QDMA job for a given device and queue*/
@@ -344,9 +508,16 @@ dpdmai_dev_dequeue_multijob_prefetch(
 	uint8_t status, pending;
 	uint8_t num_rx = 0;
 	const struct qbman_fd *fd;
-	uint16_t vqid;
+	uint16_t vqid, num_rx_ret;
 	int ret, pull_size;
 
+	if (qdma_vq->flags & RTE_QDMA_VQ_FD_SG_FORMAT) {
+		/** Make sure there are enough space to get jobs.*/
+		if (unlikely(nb_jobs < DPAA2_QDMA_MAX_SG_NB))
+			return -EINVAL;
+		nb_jobs = 1;
+	}
+
 	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
 		ret = dpaa2_affine_qbman_swp();
 		if (ret) {
@@ -440,12 +611,13 @@ dpdmai_dev_dequeue_multijob_prefetch(
 		}
 		fd = qbman_result_DQ_fd(dq_storage);
 
-		vqid = qdma_vq->get_job(qdma_vq, fd, &job[num_rx]);
+		vqid = qdma_vq->get_job(qdma_vq, fd, &job[num_rx],
+								&num_rx_ret);
 		if (vq_id)
 			vq_id[num_rx] = vqid;
 
 		dq_storage++;
-		num_rx++;
+		num_rx += num_rx_ret;
 	} while (pending);
 
 	if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
@@ -490,8 +662,17 @@ dpdmai_dev_dequeue_multijob_no_prefetch(
 	uint8_t status, pending;
 	uint8_t num_rx = 0;
 	const struct qbman_fd *fd;
-	uint16_t vqid;
-	int ret, next_pull = nb_jobs, num_pulled = 0;
+	uint16_t vqid, num_rx_ret;
+	int ret, next_pull, num_pulled = 0;
+
+	if (qdma_vq->flags & RTE_QDMA_VQ_FD_SG_FORMAT) {
+		/** Make sure there are enough space to get jobs.*/
+		if (unlikely(nb_jobs < DPAA2_QDMA_MAX_SG_NB))
+			return -EINVAL;
+		nb_jobs = 1;
+	}
+
+	next_pull = nb_jobs;
 
 	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
 		ret = dpaa2_affine_qbman_swp();
@@ -560,12 +741,13 @@ dpdmai_dev_dequeue_multijob_no_prefetch(
 			}
 			fd = qbman_result_DQ_fd(dq_storage);
 
-			vqid = qdma_vq->get_job(qdma_vq, fd, &job[num_rx]);
+			vqid = qdma_vq->get_job(qdma_vq, fd,
+						&job[num_rx], &num_rx_ret);
 			if (vq_id)
 				vq_id[num_rx] = vqid;
 
 			dq_storage++;
-			num_rx++;
+			num_rx += num_rx_ret;
 			num_pulled++;
 
 		} while (pending);
@@ -592,6 +774,7 @@ dpdmai_dev_enqueue_multi(
 	int ret;
 	uint32_t num_to_send = 0;
 	uint16_t num_tx = 0;
+	uint32_t enqueue_loop, retry_count, loop;
 
 	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
 		ret = dpaa2_affine_qbman_swp();
@@ -612,45 +795,87 @@ dpdmai_dev_enqueue_multi(
 	qbman_eq_desc_set_no_orp(&eqdesc, 0);
 	qbman_eq_desc_set_response(&eqdesc, 0, 0);
 
+	if (qdma_vq->flags & RTE_QDMA_VQ_FD_SG_FORMAT) {
+		uint16_t fd_nb;
+		uint16_t sg_entry_nb = nb_jobs > DPAA2_QDMA_MAX_SG_NB ?
+						DPAA2_QDMA_MAX_SG_NB : nb_jobs;
+		uint16_t job_idx = 0;
+		uint16_t fd_sg_nb[8];
+		uint16_t nb_jobs_ret = 0;
+
+		if (nb_jobs % DPAA2_QDMA_MAX_SG_NB)
+			fd_nb = nb_jobs / DPAA2_QDMA_MAX_SG_NB + 1;
+		else
+			fd_nb = nb_jobs / DPAA2_QDMA_MAX_SG_NB;
+
+		memset(&fd[0], 0, sizeof(struct qbman_fd) * fd_nb);
+
+		for (loop = 0; loop < fd_nb; loop++) {
+			ret = qdma_vq->set_fd(qdma_vq, &fd[loop], &job[job_idx],
+					      sg_entry_nb);
+			if (unlikely(ret < 0))
+				return 0;
+			fd_sg_nb[loop] = sg_entry_nb;
+			nb_jobs -= sg_entry_nb;
+			job_idx += sg_entry_nb;
+			sg_entry_nb = nb_jobs > DPAA2_QDMA_MAX_SG_NB ?
+						DPAA2_QDMA_MAX_SG_NB : nb_jobs;
+		}
+
+		/* Enqueue the packet to the QBMAN */
+		enqueue_loop = 0; retry_count = 0;
+
+		while (enqueue_loop < fd_nb) {
+			ret = qbman_swp_enqueue_multiple(swp,
+					&eqdesc, &fd[enqueue_loop],
+					NULL, fd_nb - enqueue_loop);
+			if (unlikely(ret < 0)) {
+				retry_count++;
+				if (retry_count > DPAA2_MAX_TX_RETRY_COUNT)
+					return nb_jobs_ret;
+			} else {
+				for (loop = 0; loop < (uint32_t)ret; loop++)
+					nb_jobs_ret +=
+						fd_sg_nb[enqueue_loop + loop];
+				enqueue_loop += ret;
+				retry_count = 0;
+			}
+		}
+
+		return nb_jobs_ret;
+	}
+
 	memset(fd, 0, nb_jobs * sizeof(struct qbman_fd));
 
 	while (nb_jobs > 0) {
-		uint32_t loop;
-
 		num_to_send = (nb_jobs > dpaa2_eqcr_size) ?
 			dpaa2_eqcr_size : nb_jobs;
 
-		for (loop = 0; loop < num_to_send; loop++) {
-			ret = qdma_vq->set_fd(qdma_vq, &fd[loop], job[num_tx]);
-			if (ret < 0) {
-				/* Set nb_jobs to loop, so outer while loop
-				 * breaks out.
-				 */
-				nb_jobs = loop;
-				break;
-			}
-
-			num_tx++;
-		}
+		ret = qdma_vq->set_fd(qdma_vq, &fd[num_tx],
+						&job[num_tx], num_to_send);
+		if (unlikely(ret < 0))
+			break;
 
 		/* Enqueue the packet to the QBMAN */
-		uint32_t enqueue_loop = 0, retry_count = 0;
+		enqueue_loop = 0; retry_count = 0;
+		loop = num_to_send;
 
 		while (enqueue_loop < loop) {
 			ret = qbman_swp_enqueue_multiple(swp,
 						&eqdesc,
-						&fd[enqueue_loop],
+						&fd[num_tx + enqueue_loop],
 						NULL,
 						loop - enqueue_loop);
 			if (unlikely(ret < 0)) {
 				retry_count++;
 				if (retry_count > DPAA2_MAX_TX_RETRY_COUNT)
-					return num_tx - (loop - enqueue_loop);
+					return num_tx;
 			} else {
 				enqueue_loop += ret;
 				retry_count = 0;
 			}
 		}
+		num_tx += num_to_send;
 		nb_jobs -= loop;
 	}
 	return num_tx;
@@ -969,6 +1194,21 @@ dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
 		return -ENODEV;
 	}
 
+	if (q_config->flags & RTE_QDMA_VQ_FD_SG_FORMAT) {
+		if (!(q_config->flags & RTE_QDMA_VQ_EXCLUSIVE_PQ)) {
+			DPAA2_QDMA_ERR(
+				"qDMA SG format only supports physical queue!");
+			rte_spinlock_unlock(&qdma_dev->lock);
+			return -ENODEV;
+		}
+		if (!(q_config->flags & RTE_QDMA_VQ_FD_LONG_FORMAT)) {
+			DPAA2_QDMA_ERR(
+				"qDMA SG format only supports long FD format!");
+			rte_spinlock_unlock(&qdma_dev->lock);
+			return -ENODEV;
+		}
+	}
+
 	if (q_config->flags & RTE_QDMA_VQ_EXCLUSIVE_PQ) {
 		/* Allocate HW queue for a VQ */
 		qdma_dev->vqs[i].hw_queue = alloc_hw_queue(q_config->lcore_id);
@@ -999,12 +1239,16 @@ dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
 		return -ENODEV;
 	}
 
+	qdma_dev->vqs[i].flags = q_config->flags;
 	qdma_dev->vqs[i].in_use = 1;
 	qdma_dev->vqs[i].lcore_id = q_config->lcore_id;
 	memset(&qdma_dev->vqs[i].rbp, 0, sizeof(struct rte_qdma_rbp));
 
 	if (q_config->flags & RTE_QDMA_VQ_FD_LONG_FORMAT) {
-		qdma_dev->vqs[i].set_fd = dpdmai_dev_set_fd_lf;
+		if (q_config->flags & RTE_QDMA_VQ_FD_SG_FORMAT)
+			qdma_dev->vqs[i].set_fd = dpdmai_dev_set_sg_fd_lf;
+		else
+			qdma_dev->vqs[i].set_fd = dpdmai_dev_set_multi_fd_lf;
 		qdma_dev->vqs[i].get_job = dpdmai_dev_get_job_lf;
 	} else {
 		qdma_dev->vqs[i].set_fd = dpdmai_dev_set_fd_us;
@@ -1079,6 +1323,12 @@ dpaa2_qdma_dequeue(struct rte_rawdev *rawdev,
 	int ret = 0, i;
 	unsigned int ring_count;
 
+	if (qdma_vq->flags & RTE_QDMA_VQ_FD_SG_FORMAT) {
+		/** Make sure there are enough space to get jobs.*/
+		if (unlikely(nb_jobs < DPAA2_QDMA_MAX_SG_NB))
+			return -EINVAL;
+	}
+
 	/* Return error in case of wrong lcore_id */
 	if (rte_lcore_id() != (unsigned int)(qdma_vq->lcore_id)) {
 		DPAA2_QDMA_WARN("QDMA dequeue for vqid %d on wrong core",
@@ -1090,7 +1340,8 @@ dpaa2_qdma_dequeue(struct rte_rawdev *rawdev,
 	if (qdma_vq->num_enqueues == qdma_vq->num_dequeues)
 		return 0;
 
-	if (qdma_vq->num_enqueues < (qdma_vq->num_dequeues + nb_jobs))
+	if (!(qdma_vq->flags & RTE_QDMA_VQ_FD_SG_FORMAT) &&
+		qdma_vq->num_enqueues < (qdma_vq->num_dequeues + nb_jobs))
 		nb_jobs = (qdma_vq->num_enqueues - qdma_vq->num_dequeues);
 
 	if (qdma_vq->exclusive_hw_queue) {
diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
index ff7743f..43a01d5 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
@@ -11,16 +11,37 @@ struct rte_qdma_job;
 #define DPAA2_QDMA_MAX_FLE 3
 #define DPAA2_QDMA_MAX_SDD 2
 
+#define DPAA2_QDMA_MAX_SG_NB 64
+
 #define DPAA2_DPDMAI_MAX_QUEUES	8
 
-/** FLE pool size: 3 Frame list + 2 source/destination descriptor */
-#define QDMA_FLE_POOL_SIZE (sizeof(struct rte_qdma_job *) + \
+/** FLE pool size: job number(uint64_t) +
+ * 3 Frame list + 2 source/destination descriptor  +
+ * 32 (src + dst) sg entries + 32 jobs pointers.
+ */
+
+#define QDMA_FLE_POOL_SIZE (sizeof(uint64_t) + \
 		sizeof(struct qbman_fle) * DPAA2_QDMA_MAX_FLE + \
-		sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD)
+		sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD + \
+		sizeof(struct qdma_sg_entry) * DPAA2_QDMA_MAX_SG_NB * 2 + \
+		sizeof(struct rte_qdma_job *) * DPAA2_QDMA_MAX_SG_NB)
+
+#define QDMA_FLE_JOB_NB_OFFSET 0
 
-#define QDMA_FLE_JOB_OFFSET 0
 #define QDMA_FLE_FLE_OFFSET \
-		(QDMA_FLE_JOB_OFFSET + sizeof(struct rte_qdma_job *))
+		(QDMA_FLE_JOB_NB_OFFSET + sizeof(uint64_t))
+
+#define QDMA_FLE_SDD_OFFSET \
+		(QDMA_FLE_FLE_OFFSET + \
+		sizeof(struct qbman_fle) * DPAA2_QDMA_MAX_FLE)
+
+#define QDMA_FLE_SG_ENTRY_OFFSET \
+		(QDMA_FLE_SDD_OFFSET + \
+		sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD)
+
+#define QDMA_FLE_JOBS_OFFSET \
+		(QDMA_FLE_SG_ENTRY_OFFSET + \
+		sizeof(struct qdma_sg_entry) * DPAA2_QDMA_MAX_SG_NB * 2)
 
 /** FLE pool cache size */
 #define QDMA_FLE_CACHE_SIZE(_num) (_num/(RTE_MAX_LCORE * 2))
@@ -90,10 +111,12 @@ struct qdma_virt_queue;
 
 typedef uint16_t (qdma_get_job_t)(struct qdma_virt_queue *qdma_vq,
 					const struct qbman_fd *fd,
-					struct rte_qdma_job **job);
+					struct rte_qdma_job **job,
+					uint16_t *nb_jobs);
 typedef int (qdma_set_fd_t)(struct qdma_virt_queue *qdma_vq,
 					struct qbman_fd *fd,
-					struct rte_qdma_job *job);
+					struct rte_qdma_job **job,
+					uint16_t nb_jobs);
 
 typedef int (qdma_dequeue_multijob_t)(
 				struct qdma_virt_queue *qdma_vq,
@@ -126,6 +149,7 @@ struct qdma_virt_queue {
 	uint64_t num_dequeues;
 
 	uint16_t vq_id;
+	uint32_t flags;
 
 	qdma_set_fd_t *set_fd;
 	qdma_get_job_t *get_job;
@@ -191,6 +215,43 @@ struct qdma_sdd {
 	};
 } __rte_packed;
 
+#define QDMA_SG_FMT_SDB	0x0 /* single data buffer */
+#define QDMA_SG_FMT_FDS	0x1 /* frame data section */
+#define QDMA_SG_FMT_SGTE	0x2 /* SGT extension */
+#define QDMA_SG_SL_SHORT	0x1 /* short length */
+#define QDMA_SG_SL_LONG	0x0 /* long length */
+#define QDMA_SG_F	0x1 /* last sg entry */
+#define QDMA_SG_BMT_ENABLE 0x1
+#define QDMA_SG_BMT_DISABLE 0x0
+
+struct qdma_sg_entry {
+	uint32_t addr_lo;		/* address 0:31 */
+	uint32_t addr_hi:17;	/* address 32:48 */
+	uint32_t rsv:15;
+	union {
+		uint32_t data_len_sl0;	/* SL=0, the long format */
+		struct {
+			uint32_t len:17;	/* SL=1, the short format */
+			uint32_t reserve:3;
+			uint32_t sf:1;
+			uint32_t sr:1;
+			uint32_t size:10;	/* buff size */
+		} data_len_sl1;
+	} data_len;					/* AVAIL_LENGTH */
+	union {
+		uint32_t ctrl_fields;
+		struct {
+			uint32_t bpid:14;
+			uint32_t ivp:1;
+			uint32_t bmt:1;
+			uint32_t offset:12;
+			uint32_t fmt:2;
+			uint32_t sl:1;
+			uint32_t f:1;
+		} ctrl;
+	};
+} __attribute__((__packed__));
+
 /** Represents a DPDMAI raw device */
 struct dpaa2_dpdmai_dev {
 	/** Pointer to Next device instance */
diff --git a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
index ff4fc1d..cfec303 100644
--- a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
@@ -52,6 +52,8 @@ enum {
 
 #define RTE_QDMA_VQ_FD_LONG_FORMAT		(1ULL << 1)
 
+#define RTE_QDMA_VQ_FD_SG_FORMAT		(1ULL << 2)
+
 /** States if the source addresses is physical. */
 #define RTE_QDMA_JOB_SRC_PHY		(1ULL)
 
-- 
2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH 6/7] raw/dpaa2_qdma: support FLE pool per queue
  2020-09-07  9:25 [dpdk-dev] [PATCH 0/7] raw/dpaa2_qdma: driver enhancement Gagandeep Singh
                   ` (4 preceding siblings ...)
  2020-09-07  9:26 ` [dpdk-dev] [PATCH 5/7] raw/dpaa2_qdma: support scatter gather in enqueue Gagandeep Singh
@ 2020-09-07  9:26 ` Gagandeep Singh
  2020-09-07  9:26 ` [dpdk-dev] [PATCH 7/7] raw/dpaa2_qdma: support enqueue without response wait Gagandeep Singh
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 23+ messages in thread
From: Gagandeep Singh @ 2020-09-07  9:26 UTC (permalink / raw)
  To: dev, nipun.gupta, hemant.agrawal; +Cc: thomas.monjalon, Jun Yang

From: Jun Yang <jun.yang@nxp.com>

Don't mix SG/none-SG with same FLE pool format,
otherwise, it impacts none-SG performance.

In order to support SG queue and none-SG queue
with different FLE pool element formats, associate
FLE pool with queue instead of device.

Signed-off-by: Jun Yang <jun.yang@nxp.com>
---
 drivers/raw/dpaa2_qdma/dpaa2_qdma.c         | 111 +++++++++++++++++-----------
 drivers/raw/dpaa2_qdma/dpaa2_qdma.h         |  28 ++++---
 drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h |   2 +-
 3 files changed, 88 insertions(+), 53 deletions(-)

diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
index 0c56a04..ba46ed0 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
@@ -300,12 +300,11 @@ static inline int dpdmai_dev_set_multi_fd_lf(
 	struct rte_qdma_job **ppjob;
 	uint16_t i;
 	int ret;
-	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
 	void *elem[RTE_QDMA_BURST_NB_MAX];
 	struct qbman_fle *fle;
 	uint64_t elem_iova, fle_iova;
 
-	ret = rte_mempool_get_bulk(qdma_dev->fle_pool, elem, nb_jobs);
+	ret = rte_mempool_get_bulk(qdma_vq->fle_pool, elem, nb_jobs);
 	if (ret) {
 		DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
 		return ret;
@@ -318,11 +317,8 @@ static inline int dpdmai_dev_set_multi_fd_lf(
 		elem_iova = DPAA2_VADDR_TO_IOVA(elem[i]);
 #endif
 
-		*((uint16_t *)
-		((uint64_t)elem[i] + QDMA_FLE_JOB_NB_OFFSET)) = 1;
-
 		ppjob = (struct rte_qdma_job **)
-				((uint64_t)elem[i] + QDMA_FLE_JOBS_OFFSET);
+			((uint64_t)elem[i] + QDMA_FLE_SINGLE_JOB_OFFSET);
 		*ppjob = job[i];
 
 		job[i]->vq_id = qdma_vq->vq_id;
@@ -360,13 +356,12 @@ static inline int dpdmai_dev_set_sg_fd_lf(
 	int ret = 0, i;
 	struct qdma_sg_entry *src_sge, *dst_sge;
 	uint32_t len, fmt, flags;
-	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
 
 	/*
 	 * Get an FLE/SDD from FLE pool.
 	 * Note: IO metadata is before the FLE and SDD memory.
 	 */
-	ret = rte_mempool_get(qdma_dev->fle_pool, (void **)(&elem));
+	ret = rte_mempool_get(qdma_vq->fle_pool, (void **)(&elem));
 	if (ret) {
 		DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
 		return ret;
@@ -382,7 +377,7 @@ static inline int dpdmai_dev_set_sg_fd_lf(
 	/* Save job context. */
 	*((uint16_t *)((uint64_t)elem + QDMA_FLE_JOB_NB_OFFSET)) = nb_jobs;
 	ppjob = (struct rte_qdma_job **)
-			((uint64_t)elem + QDMA_FLE_JOBS_OFFSET);
+			((uint64_t)elem + QDMA_FLE_SG_JOBS_OFFSET);
 	for (i = 0; i < nb_jobs; i++)
 		ppjob[i] = job[i];
 
@@ -450,7 +445,40 @@ static inline uint16_t dpdmai_dev_get_job_us(
 	return vqid;
 }
 
-static inline uint16_t dpdmai_dev_get_job_lf(
+static inline uint16_t dpdmai_dev_get_single_job_lf(
+						struct qdma_virt_queue *qdma_vq,
+						const struct qbman_fd *fd,
+						struct rte_qdma_job **job,
+						uint16_t *nb_jobs)
+{
+	struct qbman_fle *fle;
+	struct rte_qdma_job **ppjob = NULL;
+	uint16_t status;
+
+	/*
+	 * Fetch metadata from FLE. job and vq_id were set
+	 * in metadata in the enqueue operation.
+	 */
+	fle = (struct qbman_fle *)
+			DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd));
+
+	*nb_jobs = 1;
+	ppjob = (struct rte_qdma_job **)((uint64_t)fle -
+			QDMA_FLE_FLE_OFFSET + QDMA_FLE_SINGLE_JOB_OFFSET);
+
+	status = (DPAA2_GET_FD_ERR(fd) << 8) | (DPAA2_GET_FD_FRC(fd) & 0xFF);
+
+	*job = *ppjob;
+	(*job)->status = status;
+
+	/* Free FLE to the pool */
+	rte_mempool_put(qdma_vq->fle_pool,
+				(void *)((uint64_t)fle - QDMA_FLE_FLE_OFFSET));
+
+	return (*job)->vq_id;
+}
+
+static inline uint16_t dpdmai_dev_get_sg_job_lf(
 						struct qdma_virt_queue *qdma_vq,
 						const struct qbman_fd *fd,
 						struct rte_qdma_job **job,
@@ -459,7 +487,6 @@ static inline uint16_t dpdmai_dev_get_job_lf(
 	struct qbman_fle *fle;
 	struct rte_qdma_job **ppjob = NULL;
 	uint16_t i, status;
-	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
 
 	/*
 	 * Fetch metadata from FLE. job and vq_id were set
@@ -470,10 +497,9 @@ static inline uint16_t dpdmai_dev_get_job_lf(
 
 	*nb_jobs = *((uint16_t *)((uint64_t)fle -
 				QDMA_FLE_FLE_OFFSET + QDMA_FLE_JOB_NB_OFFSET));
-	status = (DPAA2_GET_FD_ERR(fd) << 8) | (DPAA2_GET_FD_FRC(fd) & 0xFF);
-
 	ppjob = (struct rte_qdma_job **)((uint64_t)fle -
-				QDMA_FLE_FLE_OFFSET + QDMA_FLE_JOBS_OFFSET);
+				QDMA_FLE_FLE_OFFSET + QDMA_FLE_SG_JOBS_OFFSET);
+	status = (DPAA2_GET_FD_ERR(fd) << 8) | (DPAA2_GET_FD_FRC(fd) & 0xFF);
 
 	for (i = 0; i < (*nb_jobs); i++) {
 		job[i] = ppjob[i];
@@ -481,7 +507,7 @@ static inline uint16_t dpdmai_dev_get_job_lf(
 	}
 
 	/* Free FLE to the pool */
-	rte_mempool_put(qdma_dev->fle_pool,
+	rte_mempool_put(qdma_vq->fle_pool,
 				(void *)((uint64_t)fle - QDMA_FLE_FLE_OFFSET));
 
 	return job[0]->vq_id;
@@ -1044,14 +1070,9 @@ dpaa2_qdma_reset(struct rte_rawdev *rawdev)
 	memset(&qdma_core_info, 0,
 		sizeof(struct qdma_per_core_info) * RTE_MAX_LCORE);
 
-	/* Free the FLE pool */
-	if (qdma_dev->fle_pool)
-		rte_mempool_free(qdma_dev->fle_pool);
-
 	/* Reset QDMA device structure */
 	qdma_dev->max_hw_queues_per_core = 0;
-	qdma_dev->fle_pool = NULL;
-	qdma_dev->fle_pool_count = 0;
+	qdma_dev->fle_queue_pool_cnt = 0;
 	qdma_dev->max_vqs = 0;
 
 	return 0;
@@ -1094,23 +1115,7 @@ dpaa2_qdma_configure(const struct rte_rawdev *rawdev,
 		return -ENOMEM;
 	}
 	qdma_dev->max_vqs = qdma_config->max_vqs;
-
-	/* Allocate FLE pool; just append PID so that in case of
-	 * multiprocess, the pool's don't collide.
-	 */
-	snprintf(name, sizeof(name), "qdma_fle_pool%u",
-		 getpid());
-	qdma_dev->fle_pool = rte_mempool_create(name,
-			qdma_config->fle_pool_count, QDMA_FLE_POOL_SIZE,
-			QDMA_FLE_CACHE_SIZE(qdma_config->fle_pool_count), 0,
-			NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
-	if (!qdma_dev->fle_pool) {
-		DPAA2_QDMA_ERR("qdma_fle_pool create failed");
-		rte_free(qdma_dev->vqs);
-		qdma_dev->vqs = NULL;
-		return -ENOMEM;
-	}
-	qdma_dev->fle_pool_count = qdma_config->fle_pool_count;
+	qdma_dev->fle_queue_pool_cnt = qdma_config->fle_queue_pool_cnt;
 
 	return 0;
 }
@@ -1171,11 +1176,13 @@ dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
 			  rte_rawdev_obj_t queue_conf)
 {
 	char ring_name[32];
+	char pool_name[64];
 	int i;
 	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
 	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
 	struct rte_qdma_queue_config *q_config =
 		(struct rte_qdma_queue_config *)queue_conf;
+	uint32_t pool_size;
 
 	DPAA2_QDMA_FUNC_TRACE();
 
@@ -1207,6 +1214,9 @@ dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
 			rte_spinlock_unlock(&qdma_dev->lock);
 			return -ENODEV;
 		}
+		pool_size = QDMA_FLE_SG_POOL_SIZE;
+	} else {
+		pool_size = QDMA_FLE_SINGLE_POOL_SIZE;
 	}
 
 	if (q_config->flags & RTE_QDMA_VQ_EXCLUSIVE_PQ) {
@@ -1217,7 +1227,7 @@ dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
 		/* Allocate a Ring for Virtual Queue in VQ mode */
 		snprintf(ring_name, sizeof(ring_name), "status ring %d", i);
 		qdma_dev->vqs[i].status_ring = rte_ring_create(ring_name,
-			qdma_dev->fle_pool_count, rte_socket_id(), 0);
+			qdma_dev->fle_queue_pool_cnt, rte_socket_id(), 0);
 		if (!qdma_dev->vqs[i].status_ring) {
 			DPAA2_QDMA_ERR("Status ring creation failed for vq");
 			rte_spinlock_unlock(&qdma_dev->lock);
@@ -1239,17 +1249,31 @@ dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
 		return -ENODEV;
 	}
 
+	snprintf(pool_name, sizeof(pool_name),
+		"qdma_fle_pool%u_queue%d", getpid(), i);
+	qdma_dev->vqs[i].fle_pool = rte_mempool_create(pool_name,
+			qdma_dev->fle_queue_pool_cnt, pool_size,
+			QDMA_FLE_CACHE_SIZE(qdma_dev->fle_queue_pool_cnt), 0,
+			NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
+	if (!qdma_dev->vqs[i].fle_pool) {
+		DPAA2_QDMA_ERR("qdma_fle_pool create failed");
+		rte_spinlock_unlock(&qdma_dev->lock);
+		return -ENOMEM;
+	}
+
 	qdma_dev->vqs[i].flags = q_config->flags;
 	qdma_dev->vqs[i].in_use = 1;
 	qdma_dev->vqs[i].lcore_id = q_config->lcore_id;
 	memset(&qdma_dev->vqs[i].rbp, 0, sizeof(struct rte_qdma_rbp));
 
 	if (q_config->flags & RTE_QDMA_VQ_FD_LONG_FORMAT) {
-		if (q_config->flags & RTE_QDMA_VQ_FD_SG_FORMAT)
+		if (q_config->flags & RTE_QDMA_VQ_FD_SG_FORMAT) {
 			qdma_dev->vqs[i].set_fd = dpdmai_dev_set_sg_fd_lf;
-		else
+			qdma_dev->vqs[i].get_job = dpdmai_dev_get_sg_job_lf;
+		} else {
 			qdma_dev->vqs[i].set_fd = dpdmai_dev_set_multi_fd_lf;
-		qdma_dev->vqs[i].get_job = dpdmai_dev_get_job_lf;
+			qdma_dev->vqs[i].get_job = dpdmai_dev_get_single_job_lf;
+		}
 	} else {
 		qdma_dev->vqs[i].set_fd = dpdmai_dev_set_fd_us;
 		qdma_dev->vqs[i].get_job = dpdmai_dev_get_job_us;
@@ -1435,6 +1459,9 @@ dpaa2_qdma_queue_release(struct rte_rawdev *rawdev,
 		put_hw_queue(qdma_vq->hw_queue);
 	}
 
+	if (qdma_vq->fle_pool)
+		rte_mempool_free(qdma_vq->fle_pool);
+
 	memset(qdma_vq, 0, sizeof(struct qdma_virt_queue));
 
 	rte_spinlock_unlock(&qdma_dev->lock);
diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
index 43a01d5..0892a19 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
@@ -15,19 +15,27 @@ struct rte_qdma_job;
 
 #define DPAA2_DPDMAI_MAX_QUEUES	8
 
-/** FLE pool size: job number(uint64_t) +
- * 3 Frame list + 2 source/destination descriptor  +
- * 32 (src + dst) sg entries + 32 jobs pointers.
+/** FLE single job pool size: job pointer(uint64_t) +
+ * 3 Frame list + 2 source/destination descriptor.
  */
+#define QDMA_FLE_SINGLE_POOL_SIZE (sizeof(uint64_t) + \
+			sizeof(struct qbman_fle) * DPAA2_QDMA_MAX_FLE + \
+			sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD)
 
-#define QDMA_FLE_POOL_SIZE (sizeof(uint64_t) + \
+/** FLE sg jobs pool size: job number(uint64_t) +
+ * 3 Frame list + 2 source/destination descriptor  +
+ * 64 (src + dst) sg entries + 64 jobs pointers.
+ */
+#define QDMA_FLE_SG_POOL_SIZE (sizeof(uint64_t) + \
 		sizeof(struct qbman_fle) * DPAA2_QDMA_MAX_FLE + \
 		sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD + \
-		sizeof(struct qdma_sg_entry) * DPAA2_QDMA_MAX_SG_NB * 2 + \
+		sizeof(struct qdma_sg_entry) * (DPAA2_QDMA_MAX_SG_NB * 2) + \
 		sizeof(struct rte_qdma_job *) * DPAA2_QDMA_MAX_SG_NB)
 
 #define QDMA_FLE_JOB_NB_OFFSET 0
 
+#define QDMA_FLE_SINGLE_JOB_OFFSET 0
+
 #define QDMA_FLE_FLE_OFFSET \
 		(QDMA_FLE_JOB_NB_OFFSET + sizeof(uint64_t))
 
@@ -39,7 +47,7 @@ struct rte_qdma_job;
 		(QDMA_FLE_SDD_OFFSET + \
 		sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD)
 
-#define QDMA_FLE_JOBS_OFFSET \
+#define QDMA_FLE_SG_JOBS_OFFSET \
 		(QDMA_FLE_SG_ENTRY_OFFSET + \
 		sizeof(struct qdma_sg_entry) * DPAA2_QDMA_MAX_SG_NB * 2)
 
@@ -85,10 +93,8 @@ struct qdma_device {
 	uint16_t max_vqs;
 	/** Device state - started or stopped */
 	uint8_t state;
-	/** FLE pool for the device */
-	struct rte_mempool *fle_pool;
-	/** FLE pool size */
-	int fle_pool_count;
+	/** FLE queue pool size */
+	int fle_queue_pool_cnt;
 	/** A lock to QDMA device whenever required */
 	rte_spinlock_t lock;
 };
@@ -135,6 +141,8 @@ struct qdma_virt_queue {
 	struct rte_ring *status_ring;
 	/** Associated hw queue */
 	struct qdma_hw_queue *hw_queue;
+	/** FLE pool for the queue */
+	struct rte_mempool *fle_pool;
 	/** Route by port */
 	struct rte_qdma_rbp rbp;
 	/** Associated lcore id */
diff --git a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
index cfec303..3cd4167 100644
--- a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
@@ -80,7 +80,7 @@ struct rte_qdma_config {
 	 * maximum number of inflight jobs on the QDMA device. This should
 	 * be power of 2.
 	 */
-	int fle_pool_count;
+	int fle_queue_pool_cnt;
 };
 
 struct rte_qdma_rbp {
-- 
2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH 7/7] raw/dpaa2_qdma: support enqueue without response wait
  2020-09-07  9:25 [dpdk-dev] [PATCH 0/7] raw/dpaa2_qdma: driver enhancement Gagandeep Singh
                   ` (5 preceding siblings ...)
  2020-09-07  9:26 ` [dpdk-dev] [PATCH 6/7] raw/dpaa2_qdma: support FLE pool per queue Gagandeep Singh
@ 2020-09-07  9:26 ` Gagandeep Singh
  2020-09-25 10:54 ` [dpdk-dev] [PATCH 0/7] raw/dpaa2_qdma: driver enhancement Hemant Agrawal
  2020-10-15  9:47 ` [dpdk-dev] [PATCH v2 " Gagandeep Singh
  8 siblings, 0 replies; 23+ messages in thread
From: Gagandeep Singh @ 2020-09-07  9:26 UTC (permalink / raw)
  To: dev, nipun.gupta, hemant.agrawal; +Cc: thomas.monjalon, Jun Yang

From: Jun Yang <jun.yang@nxp.com>

In this condition, user needs to check if dma transfer is completed
by its own logic.

qDMA FLE pool is not used in this condition since there is no chance to put
FLE back to pool without dequeue response.

User application is responsible to transfer FLE memory to qDMA driver
by qdma job descriptor and maintain it as well.

Signed-off-by: Jun Yang <jun.yang@nxp.com>
---
 drivers/raw/dpaa2_qdma/dpaa2_qdma.c         | 85 ++++++++++++++++++++++++-----
 drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h |  7 +++
 2 files changed, 78 insertions(+), 14 deletions(-)

diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
index ba46ed0..845aa53 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
@@ -46,7 +46,7 @@ static struct qdma_per_core_info qdma_core_info[RTE_MAX_LCORE];
 static inline int
 qdma_populate_fd_pci(phys_addr_t src, phys_addr_t dest,
 			uint32_t len, struct qbman_fd *fd,
-			struct rte_qdma_rbp *rbp)
+			struct rte_qdma_rbp *rbp, int ser)
 {
 	fd->simple_pci.saddr_lo = lower_32_bits((uint64_t) (src));
 	fd->simple_pci.saddr_hi = upper_32_bits((uint64_t) (src));
@@ -56,7 +56,7 @@ qdma_populate_fd_pci(phys_addr_t src, phys_addr_t dest,
 	fd->simple_pci.bmt = 1;
 	fd->simple_pci.fmt = 3;
 	fd->simple_pci.sl = 1;
-	fd->simple_pci.ser = 1;
+	fd->simple_pci.ser = ser;
 
 	fd->simple_pci.sportid = rbp->sportid;	/*pcie 3 */
 	fd->simple_pci.srbp = rbp->srbp;
@@ -81,7 +81,7 @@ qdma_populate_fd_pci(phys_addr_t src, phys_addr_t dest,
 
 static inline int
 qdma_populate_fd_ddr(phys_addr_t src, phys_addr_t dest,
-			uint32_t len, struct qbman_fd *fd)
+			uint32_t len, struct qbman_fd *fd, int ser)
 {
 	fd->simple_ddr.saddr_lo = lower_32_bits((uint64_t) (src));
 	fd->simple_ddr.saddr_hi = upper_32_bits((uint64_t) (src));
@@ -91,7 +91,7 @@ qdma_populate_fd_ddr(phys_addr_t src, phys_addr_t dest,
 	fd->simple_ddr.bmt = 1;
 	fd->simple_ddr.fmt = 3;
 	fd->simple_ddr.sl = 1;
-	fd->simple_ddr.ser = 1;
+	fd->simple_ddr.ser = ser;
 	/**
 	 * src If RBP=0 {NS,RDTTYPE[3:0]}: 0_1011
 	 * Coherent copy of cacheable memory,
@@ -204,6 +204,8 @@ static inline int dpdmai_dev_set_fd_us(
 	struct rte_qdma_job **ppjob;
 	size_t iova;
 	int ret = 0, loop;
+	int ser = (qdma_vq->flags & RTE_QDMA_VQ_NO_RESPONSE) ?
+				0 : 1;
 
 	for (loop = 0; loop < nb_jobs; loop++) {
 		if (job[loop]->src & QDMA_RBP_UPPER_ADDRESS_MASK)
@@ -218,12 +220,12 @@ static inline int dpdmai_dev_set_fd_us(
 
 		if ((rbp->drbp == 1) || (rbp->srbp == 1))
 			ret = qdma_populate_fd_pci((phys_addr_t)job[loop]->src,
-						(phys_addr_t)job[loop]->dest,
-						job[loop]->len, &fd[loop], rbp);
+					(phys_addr_t)job[loop]->dest,
+					job[loop]->len, &fd[loop], rbp, ser);
 		else
 			ret = qdma_populate_fd_ddr((phys_addr_t)job[loop]->src,
-						(phys_addr_t)job[loop]->dest,
-						job[loop]->len, &fd[loop]);
+					(phys_addr_t)job[loop]->dest,
+					job[loop]->len, &fd[loop], ser);
 	}
 
 	return ret;
@@ -290,6 +292,51 @@ static uint32_t qdma_populate_sg_entry(
 	return total_len;
 }
 
+static inline int dpdmai_dev_set_multi_fd_lf_no_rsp(
+		struct qdma_virt_queue *qdma_vq,
+		struct qbman_fd *fd,
+		struct rte_qdma_job **job,
+		uint16_t nb_jobs)
+{
+	struct rte_qdma_rbp *rbp = &qdma_vq->rbp;
+	struct rte_qdma_job **ppjob;
+	uint16_t i;
+	void *elem;
+	struct qbman_fle *fle;
+	uint64_t elem_iova, fle_iova;
+
+	for (i = 0; i < nb_jobs; i++) {
+		elem = job[i]->usr_elem;
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+		elem_iova = rte_mempool_virt2iova(elem);
+#else
+		elem_iova = DPAA2_VADDR_TO_IOVA(elem);
+#endif
+
+		ppjob = (struct rte_qdma_job **)
+				((uint64_t)elem + QDMA_FLE_SINGLE_JOB_OFFSET);
+		*ppjob = job[i];
+
+		job[i]->vq_id = qdma_vq->vq_id;
+
+		fle = (struct qbman_fle *)
+				((uint64_t)elem + QDMA_FLE_FLE_OFFSET);
+		fle_iova = elem_iova + QDMA_FLE_FLE_OFFSET;
+
+		DPAA2_SET_FD_ADDR(&fd[i], fle_iova);
+		DPAA2_SET_FD_COMPOUND_FMT(&fd[i]);
+
+		memset(fle, 0, DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle) +
+				DPAA2_QDMA_MAX_SDD * sizeof(struct qdma_sdd));
+
+		dpaa2_qdma_populate_fle(fle, fle_iova, rbp,
+			job[i]->src, job[i]->dest, job[i]->len,
+			job[i]->flags, QBMAN_FLE_WORD4_FMT_SBF);
+	}
+
+	return 0;
+}
+
 static inline int dpdmai_dev_set_multi_fd_lf(
 		struct qdma_virt_queue *qdma_vq,
 		struct qbman_fd *fd,
@@ -361,10 +408,14 @@ static inline int dpdmai_dev_set_sg_fd_lf(
 	 * Get an FLE/SDD from FLE pool.
 	 * Note: IO metadata is before the FLE and SDD memory.
 	 */
-	ret = rte_mempool_get(qdma_vq->fle_pool, (void **)(&elem));
-	if (ret) {
-		DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
-		return ret;
+	if (qdma_vq->flags & RTE_QDMA_VQ_NO_RESPONSE) {
+		elem = job[0]->usr_elem;
+	} else {
+		ret = rte_mempool_get(qdma_vq->fle_pool, &elem);
+		if (ret) {
+			DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
+			return ret;
+		}
 	}
 
 #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
@@ -389,7 +440,8 @@ static inline int dpdmai_dev_set_sg_fd_lf(
 
 	DPAA2_SET_FD_ADDR(fd, fle_iova);
 	DPAA2_SET_FD_COMPOUND_FMT(fd);
-	DPAA2_SET_FD_FRC(fd, QDMA_SER_CTX);
+	if (!(qdma_vq->flags & RTE_QDMA_VQ_NO_RESPONSE))
+		DPAA2_SET_FD_FRC(fd, QDMA_SER_CTX);
 
 	/* Populate FLE */
 	if (likely(nb_jobs > 1)) {
@@ -1271,7 +1323,12 @@ dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
 			qdma_dev->vqs[i].set_fd = dpdmai_dev_set_sg_fd_lf;
 			qdma_dev->vqs[i].get_job = dpdmai_dev_get_sg_job_lf;
 		} else {
-			qdma_dev->vqs[i].set_fd = dpdmai_dev_set_multi_fd_lf;
+			if (q_config->flags & RTE_QDMA_VQ_NO_RESPONSE)
+				qdma_dev->vqs[i].set_fd =
+					dpdmai_dev_set_multi_fd_lf_no_rsp;
+			else
+				qdma_dev->vqs[i].set_fd =
+					dpdmai_dev_set_multi_fd_lf;
 			qdma_dev->vqs[i].get_job = dpdmai_dev_get_single_job_lf;
 		}
 	} else {
diff --git a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
index 3cd4167..cc1ac25 100644
--- a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
@@ -54,6 +54,8 @@ enum {
 
 #define RTE_QDMA_VQ_FD_SG_FORMAT		(1ULL << 2)
 
+#define RTE_QDMA_VQ_NO_RESPONSE			(1ULL << 3)
+
 /** States if the source addresses is physical. */
 #define RTE_QDMA_JOB_SRC_PHY		(1ULL)
 
@@ -154,6 +156,11 @@ struct rte_qdma_job {
 	 */
 	uint16_t status;
 	uint16_t vq_id;
+	/**
+	 * FLE pool element maintained by user, in case no qDMA response.
+	 * Note: the address must be allocated from DPDK memory pool.
+	 */
+	void *usr_elem;
 };
 
 struct rte_qdma_enqdeq {
-- 
2.7.4


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [dpdk-dev] [PATCH 1/7] raw/dpaa2_qdma: change DPAA2 QDMA APIs to rawdev ops
  2020-09-07  9:25 ` [dpdk-dev] [PATCH 1/7] raw/dpaa2_qdma: change DPAA2 QDMA APIs to rawdev ops Gagandeep Singh
@ 2020-09-25 10:53   ` Hemant Agrawal
  2020-10-06 21:36   ` Thomas Monjalon
  1 sibling, 0 replies; 23+ messages in thread
From: Hemant Agrawal @ 2020-09-25 10:53 UTC (permalink / raw)
  To: Gagandeep Singh, dev, nipun.gupta, hemant.agrawal

Acked-by: Hemant Agrawal <hemant.agrawal@nxp.com>



^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [dpdk-dev] [PATCH 0/7] raw/dpaa2_qdma: driver enhancement
  2020-09-07  9:25 [dpdk-dev] [PATCH 0/7] raw/dpaa2_qdma: driver enhancement Gagandeep Singh
                   ` (6 preceding siblings ...)
  2020-09-07  9:26 ` [dpdk-dev] [PATCH 7/7] raw/dpaa2_qdma: support enqueue without response wait Gagandeep Singh
@ 2020-09-25 10:54 ` Hemant Agrawal
  2020-10-15  9:47 ` [dpdk-dev] [PATCH v2 " Gagandeep Singh
  8 siblings, 0 replies; 23+ messages in thread
From: Hemant Agrawal @ 2020-09-25 10:54 UTC (permalink / raw)
  To: Gagandeep Singh, dev, nipun.gupta, hemant.agrawal

Series-

Acked-by: Hemant Agrawal <hemant.agrawal@nxp.com>

On 9/7/2020 2:55 PM, Gagandeep Singh wrote:
> In this patchset, we have done some changes in dpaa2_qdma driver
> related to rawdev APIs, optimizations, scatter-gather support on TX,
> enqueue without wait.
>
> Gagandeep Singh (2):
>    raw/dpaa2_qdma: change DPAA2 QDMA APIs to rawdev ops
>    raw/dpaa2_qdma: memset to only required memory
>
> Jun Yang (5):
>    raw/dpaa2_qdma: refactor the code
>    raw/dpaa2_qdma: optimize IOVA conversion
>    raw/dpaa2_qdma: support scatter gather in enqueue
>    raw/dpaa2_qdma: support FLE pool per queue
>    raw/dpaa2_qdma: support enqueue without response wait
>
>   drivers/bus/fslmc/portal/dpaa2_hw_pvt.h     |   18 +-
>   drivers/raw/dpaa2_qdma/dpaa2_qdma.c         | 1824 ++++++++++++++++-----------
>   drivers/raw/dpaa2_qdma/dpaa2_qdma.h         |  128 +-
>   drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h |  231 +---
>   4 files changed, 1257 insertions(+), 944 deletions(-)
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [dpdk-dev] [PATCH 1/7] raw/dpaa2_qdma: change DPAA2 QDMA APIs to rawdev ops
  2020-09-07  9:25 ` [dpdk-dev] [PATCH 1/7] raw/dpaa2_qdma: change DPAA2 QDMA APIs to rawdev ops Gagandeep Singh
  2020-09-25 10:53   ` Hemant Agrawal
@ 2020-10-06 21:36   ` Thomas Monjalon
  2020-10-14 10:27     ` Gagandeep Singh
  1 sibling, 1 reply; 23+ messages in thread
From: Thomas Monjalon @ 2020-10-06 21:36 UTC (permalink / raw)
  To: Gagandeep Singh; +Cc: dev, nipun.gupta, hemant.agrawal

Hi,

I see a build issue in the first patch,
so I didn't check further:

drivers/raw/dpaa2_qdma/dpaa2_qdma.c:1158:30: error:
initialization of ‘int (*)(const struct rte_rawdev *, void *, size_t)’ {aka ‘int (*)(const struct rte_rawdev *, void *, long unsigned int)’} from incompatible pointer type ‘int (*)(const struct rte_rawdev *, void *)’ [-Werror=incompatible-pointer-types]
 1158 |  .dev_configure            = dpaa2_qdma_configure,




^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [dpdk-dev] [PATCH 1/7] raw/dpaa2_qdma: change DPAA2 QDMA APIs to rawdev ops
  2020-10-06 21:36   ` Thomas Monjalon
@ 2020-10-14 10:27     ` Gagandeep Singh
  2020-10-14 11:37       ` Bruce Richardson
  0 siblings, 1 reply; 23+ messages in thread
From: Gagandeep Singh @ 2020-10-14 10:27 UTC (permalink / raw)
  To: Thomas Monjalon, bruce.richardson; +Cc: dev, Nipun Gupta, Hemant Agrawal

Hi Bruce,

May I know the correct way of 32bit compilation on 64bit X86 machine. I tried the "-m32" c_args, but it returns the below error:

b10814@dpdk-xeon:~/dpdk-up$ meson build -Dc_args='-m32'
Directory already configured.

Just run your build command (e.g. ninja) and Meson will regenerate as necessary.
If ninja fails, run "ninja reconfigure" or "meson --reconfigure"
to force Meson to regenerate.

If build failures persist, run "meson setup --wipe" to rebuild from scratch
using the same options as passed when configuring the build.
To change option values, run "meson configure" instead.
b10814@dpdk-xeon:~/dpdk-up$ ninja -C build
ninja: Entering directory `build'
[21/2145] Linking target buildtools/pmdinfogen/pmdinfogen
FAILED: buildtools/pmdinfogen/pmdinfogen
cc  -o buildtools/pmdinfogen/pmdinfogen 'buildtools/pmdinfogen/7ea0a9a@@pmdinfogen@exe/pmdinfogen.c.o' -Wl,--as-needed -Wl,--no-undefined -Wl,-O1
/usr/bin/ld: i386 architecture of input file `buildtools/pmdinfogen/7ea0a9a@@pmdinfogen@exe/pmdinfogen.c.o' is incompatible with i386:x86-64 output
collect2: error: ld returned 1 exit status
[23/2145] Linking target lib/librte_kvargs.so.21.0
FAILED: lib/librte_kvargs.so.21.0
cc  -o lib/librte_kvargs.so.21.0 'lib/76b5a35@@rte_kvargs@sta/librte_kvargs_rte_kvargs.c.o' -Wl,--as-needed -Wl,--no-undefined -Wl,-O1 -shared -fPIC -Wl,--start-group -Wl,-soname,librte_kvargs.so.21 -Wl,--no-as-needed -pthread -lm -ldl -Wl,--end-group -Wl,--version-script=/home/b10814/dpdk-up/lib/librte_kvargs/rte_kvargs_version.map
/usr/bin/ld: i386 architecture of input file `lib/76b5a35@@rte_kvargs@sta/librte_kvargs_rte_kvargs.c.o' is incompatible with i386:x86-64 output
/usr/bin/ld: lib/76b5a35@@rte_kvargs@sta/librte_kvargs_rte_kvargs.c.o: file class ELFCLASS32 incompatible with ELFCLASS64
/usr/bin/ld: final link failed: File in wrong format
collect2: error: ld returned 1 exit status
[46/2145] Compiling C object 'lib/76b5a35@@rte_cryptodev@sta/librte_cryptodev_rte_cryptodev.c.o'
ninja: build stopped: subcommand failed.
b10814@dpdk-xeon:~/dpdk-up$


> -----Original Message-----
> From: Thomas Monjalon <thomas@monjalon.net>
> Sent: Wednesday, October 7, 2020 3:07 AM
> To: Gagandeep Singh <G.Singh@nxp.com>
> Cc: dev@dpdk.org; Nipun Gupta <nipun.gupta@nxp.com>; Hemant Agrawal
> <hemant.agrawal@nxp.com>
> Subject: Re: [dpdk-dev] [PATCH 1/7] raw/dpaa2_qdma: change DPAA2 QDMA
> APIs to rawdev ops
> 
> Hi,
> 
> I see a build issue in the first patch,
> so I didn't check further:
> 
> drivers/raw/dpaa2_qdma/dpaa2_qdma.c:1158:30: error:
> initialization of ‘int (*)(const struct rte_rawdev *, void *, size_t)’ {aka ‘int
> (*)(const struct rte_rawdev *, void *, long unsigned int)’} from incompatible
> pointer type ‘int (*)(const struct rte_rawdev *, void *)’ [-Werror=incompatible-
> pointer-types]
>  1158 |  .dev_configure            = dpaa2_qdma_configure,
> 
> 


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [dpdk-dev] [PATCH 1/7] raw/dpaa2_qdma: change DPAA2 QDMA APIs to rawdev ops
  2020-10-14 10:27     ` Gagandeep Singh
@ 2020-10-14 11:37       ` Bruce Richardson
  2020-10-14 12:43         ` Ferruh Yigit
  0 siblings, 1 reply; 23+ messages in thread
From: Bruce Richardson @ 2020-10-14 11:37 UTC (permalink / raw)
  To: Gagandeep Singh; +Cc: Thomas Monjalon, dev, Nipun Gupta, Hemant Agrawal

On Wed, Oct 14, 2020 at 10:27:48AM +0000, Gagandeep Singh wrote:
> Hi Bruce,
> 
> May I know the correct way of 32bit compilation on 64bit X86 machine. I tried the "-m32" c_args, but it returns the below error:
> 
When I build, I also always set -m32 in 'c_link_args' and set
PKG_CONFIG_LIBDIR in the environment to point to the 32-bit pkg-config
directory e.g. /usr/lib/i386-linux-gnu/pkgconfig on debian/ubuntu systems.

Regards,
/Bruce

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [dpdk-dev] [PATCH 1/7] raw/dpaa2_qdma: change DPAA2 QDMA APIs to rawdev ops
  2020-10-14 11:37       ` Bruce Richardson
@ 2020-10-14 12:43         ` Ferruh Yigit
  0 siblings, 0 replies; 23+ messages in thread
From: Ferruh Yigit @ 2020-10-14 12:43 UTC (permalink / raw)
  To: Bruce Richardson, Gagandeep Singh
  Cc: Thomas Monjalon, dev, Nipun Gupta, Hemant Agrawal

On 10/14/2020 12:37 PM, Bruce Richardson wrote:
> On Wed, Oct 14, 2020 at 10:27:48AM +0000, Gagandeep Singh wrote:
>> Hi Bruce,
>>
>> May I know the correct way of 32bit compilation on 64bit X86 machine. I tried the "-m32" c_args, but it returns the below error:
>>
> When I build, I also always set -m32 in 'c_link_args' and set
> PKG_CONFIG_LIBDIR in the environment to point to the 32-bit pkg-config
> directory e.g. /usr/lib/i386-linux-gnu/pkgconfig on debian/ubuntu systems.
> 

And "PKG_CONFIG_LIBDIR=/usr/lib/pkgconfig" is working for me with Fedora

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH v2 0/7] raw/dpaa2_qdma: driver enhancement
  2020-09-07  9:25 [dpdk-dev] [PATCH 0/7] raw/dpaa2_qdma: driver enhancement Gagandeep Singh
                   ` (7 preceding siblings ...)
  2020-09-25 10:54 ` [dpdk-dev] [PATCH 0/7] raw/dpaa2_qdma: driver enhancement Hemant Agrawal
@ 2020-10-15  9:47 ` Gagandeep Singh
  2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 1/7] raw/dpaa2_qdma: change DPAA2 QDMA APIs to rawdev ops Gagandeep Singh
                     ` (6 more replies)
  8 siblings, 7 replies; 23+ messages in thread
From: Gagandeep Singh @ 2020-10-15  9:47 UTC (permalink / raw)
  To: dev, nipun.gupta, hemant.agrawal; +Cc: thomas, Gagandeep Singh

In this patchset, we have done some changes in dpaa2_qdma driver
related to rawdev APIs, optimizations, scatter-gather support on TX,
enqueue without wait.

v2-change-log:
* Rebase and compilation fixes

Gagandeep Singh (2):
  raw/dpaa2_qdma: change DPAA2 QDMA APIs to rawdev ops
  raw/dpaa2_qdma: memset to only required memory

Jun Yang (5):
  raw/dpaa2_qdma: refactor the code
  raw/dpaa2_qdma: optimize IOVA conversion
  raw/dpaa2_qdma: support scatter gather in enqueue
  raw/dpaa2_qdma: support FLE pool per queue
  raw/dpaa2_qdma: support enqueue without response wait

 drivers/bus/fslmc/portal/dpaa2_hw_pvt.h     |   18 +-
 drivers/raw/dpaa2_qdma/dpaa2_qdma.c         | 1833 +++++++++++--------
 drivers/raw/dpaa2_qdma/dpaa2_qdma.h         |  128 +-
 drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h |  231 +--
 4 files changed, 1267 insertions(+), 943 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH v2 1/7] raw/dpaa2_qdma: change DPAA2 QDMA APIs to rawdev ops
  2020-10-15  9:47 ` [dpdk-dev] [PATCH v2 " Gagandeep Singh
@ 2020-10-15  9:47   ` Gagandeep Singh
  2020-10-19 11:45     ` Thomas Monjalon
  2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 2/7] raw/dpaa2_qdma: memset to only required memory Gagandeep Singh
                     ` (5 subsequent siblings)
  6 siblings, 1 reply; 23+ messages in thread
From: Gagandeep Singh @ 2020-10-15  9:47 UTC (permalink / raw)
  To: dev, nipun.gupta, hemant.agrawal; +Cc: thomas, Gagandeep Singh

dpaa2_qdma was partially using direct pmd APIs.
This patch changes that and adapt the driver to use
more of the rawdev APIs

Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
---
 drivers/raw/dpaa2_qdma/dpaa2_qdma.c         | 339 ++++++++++----------
 drivers/raw/dpaa2_qdma/dpaa2_qdma.h         |   3 +-
 drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h | 207 ++----------
 3 files changed, 195 insertions(+), 354 deletions(-)

diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
index 4b8474951..530ee156d 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2018-2019 NXP
+ * Copyright 2018-2020 NXP
  */
 
 #include <string.h>
@@ -30,7 +30,7 @@ uint32_t dpaa2_coherent_no_alloc_cache;
 uint32_t dpaa2_coherent_alloc_cache;
 
 /* QDMA device */
-static struct qdma_device qdma_dev;
+static struct qdma_device q_dev;
 
 /* QDMA H/W queues list */
 TAILQ_HEAD(qdma_hw_queue_list, qdma_hw_queue);
@@ -51,9 +51,11 @@ typedef int (dpdmai_dev_dequeue_multijob_t)(struct dpaa2_dpdmai_dev *dpdmai_dev,
 
 dpdmai_dev_dequeue_multijob_t *dpdmai_dev_dequeue_multijob;
 
-typedef uint16_t (dpdmai_dev_get_job_t)(const struct qbman_fd *fd,
+typedef uint16_t (dpdmai_dev_get_job_t)(struct qdma_device *qdma_dev,
+					const struct qbman_fd *fd,
 					struct rte_qdma_job **job);
-typedef int (dpdmai_dev_set_fd_t)(struct qbman_fd *fd,
+typedef int (dpdmai_dev_set_fd_t)(struct qdma_device *qdma_dev,
+				  struct qbman_fd *fd,
 				  struct rte_qdma_job *job,
 				  struct rte_qdma_rbp *rbp,
 				  uint16_t vq_id);
@@ -201,10 +203,12 @@ dpaa2_qdma_populate_fle(struct qbman_fle *fle,
 	DPAA2_SET_FLE_FIN(fle);
 }
 
-static inline int dpdmai_dev_set_fd_us(struct qbman_fd *fd,
-					struct rte_qdma_job *job,
-					struct rte_qdma_rbp *rbp,
-					uint16_t vq_id)
+static inline int dpdmai_dev_set_fd_us(
+				struct qdma_device *qdma_dev __rte_unused,
+				struct qbman_fd *fd,
+				struct rte_qdma_job *job,
+				struct rte_qdma_rbp *rbp,
+				uint16_t vq_id)
 {
 	struct rte_qdma_job **ppjob;
 	size_t iova;
@@ -230,7 +234,8 @@ static inline int dpdmai_dev_set_fd_us(struct qbman_fd *fd,
 					   job->len, fd);
 	return ret;
 }
-static inline int dpdmai_dev_set_fd_lf(struct qbman_fd *fd,
+static inline int dpdmai_dev_set_fd_lf(struct qdma_device *qdma_dev,
+					struct qbman_fd *fd,
 					struct rte_qdma_job *job,
 					struct rte_qdma_rbp *rbp,
 					uint16_t vq_id)
@@ -242,7 +247,7 @@ static inline int dpdmai_dev_set_fd_lf(struct qbman_fd *fd,
 	 * Get an FLE/SDD from FLE pool.
 	 * Note: IO metadata is before the FLE and SDD memory.
 	 */
-	ret = rte_mempool_get(qdma_dev.fle_pool, (void **)(&ppjob));
+	ret = rte_mempool_get(qdma_dev->fle_pool, (void **)(&ppjob));
 	if (ret) {
 		DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
 		return ret;
@@ -266,8 +271,10 @@ static inline int dpdmai_dev_set_fd_lf(struct qbman_fd *fd,
 	return 0;
 }
 
-static inline uint16_t dpdmai_dev_get_job_us(const struct qbman_fd *fd,
-					struct rte_qdma_job **job)
+static inline uint16_t dpdmai_dev_get_job_us(
+				struct qdma_device *qdma_dev __rte_unused,
+				const struct qbman_fd *fd,
+				struct rte_qdma_job **job)
 {
 	uint16_t vqid;
 	size_t iova;
@@ -288,8 +295,9 @@ static inline uint16_t dpdmai_dev_get_job_us(const struct qbman_fd *fd,
 	return vqid;
 }
 
-static inline uint16_t dpdmai_dev_get_job_lf(const struct qbman_fd *fd,
-					struct rte_qdma_job **job)
+static inline uint16_t dpdmai_dev_get_job_lf(struct qdma_device *qdma_dev,
+					     const struct qbman_fd *fd,
+					     struct rte_qdma_job **job)
 {
 	struct rte_qdma_job **ppjob;
 	uint16_t vqid;
@@ -307,7 +315,7 @@ static inline uint16_t dpdmai_dev_get_job_lf(const struct qbman_fd *fd,
 	vqid = (*job)->vq_id;
 
 	/* Free FLE to the pool */
-	rte_mempool_put(qdma_dev.fle_pool, (void *)ppjob);
+	rte_mempool_put(qdma_dev->fle_pool, (void *)ppjob);
 
 	return vqid;
 }
@@ -341,7 +349,7 @@ free_hw_queue(struct qdma_hw_queue *queue)
 
 
 static struct qdma_hw_queue *
-get_hw_queue(uint32_t lcore_id)
+get_hw_queue(struct qdma_device *qdma_dev, uint32_t lcore_id)
 {
 	struct qdma_per_core_info *core_info;
 	struct qdma_hw_queue *queue, *temp;
@@ -357,7 +365,7 @@ get_hw_queue(uint32_t lcore_id)
 	 * Allocate a HW queue if there are less queues
 	 * than maximum per core queues configured
 	 */
-	if (num_hw_queues < qdma_dev.max_hw_queues_per_core) {
+	if (num_hw_queues < qdma_dev->max_hw_queues_per_core) {
 		queue = alloc_hw_queue(lcore_id);
 		if (queue) {
 			core_info->hw_queues[num_hw_queues] = queue;
@@ -416,41 +424,41 @@ put_hw_queue(struct qdma_hw_queue *queue)
 	}
 }
 
-int
-rte_qdma_init(void)
+static int
+dpaa2_qdma_attr_get(struct rte_rawdev *rawdev,
+		    __rte_unused const char *attr_name,
+		    uint64_t *attr_value)
 {
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct rte_qdma_attr *qdma_attr = (struct rte_qdma_attr *)attr_value;
+
 	DPAA2_QDMA_FUNC_TRACE();
 
-	rte_spinlock_init(&qdma_dev.lock);
+	qdma_attr->num_hw_queues = qdma_dev->num_hw_queues;
 
 	return 0;
 }
 
-void
-rte_qdma_attr_get(struct rte_qdma_attr *qdma_attr)
-{
-	DPAA2_QDMA_FUNC_TRACE();
-
-	qdma_attr->num_hw_queues = qdma_dev.num_hw_queues;
-}
-
-int
-rte_qdma_reset(void)
+static int
+dpaa2_qdma_reset(struct rte_rawdev *rawdev)
 {
 	struct qdma_hw_queue *queue;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
 	int i;
 
 	DPAA2_QDMA_FUNC_TRACE();
 
 	/* In case QDMA device is not in stopped state, return -EBUSY */
-	if (qdma_dev.state == 1) {
+	if (qdma_dev->state == 1) {
 		DPAA2_QDMA_ERR(
 			"Device is in running state. Stop before reset.");
 		return -EBUSY;
 	}
 
 	/* In case there are pending jobs on any VQ, return -EBUSY */
-	for (i = 0; i < qdma_dev.max_vqs; i++) {
+	for (i = 0; i < qdma_dev->max_vqs; i++) {
 		if (qdma_vqs[i].in_use && (qdma_vqs[i].num_enqueues !=
 		    qdma_vqs[i].num_dequeues)) {
 			DPAA2_QDMA_ERR("Jobs are still pending on VQ: %d", i);
@@ -463,7 +471,7 @@ rte_qdma_reset(void)
 		queue->num_users = 0;
 
 	/* Reset and free virtual queues */
-	for (i = 0; i < qdma_dev.max_vqs; i++) {
+	for (i = 0; i < qdma_dev->max_vqs; i++) {
 		if (qdma_vqs[i].status_ring)
 			rte_ring_free(qdma_vqs[i].status_ring);
 	}
@@ -476,43 +484,43 @@ rte_qdma_reset(void)
 		sizeof(struct qdma_per_core_info) * RTE_MAX_LCORE);
 
 	/* Free the FLE pool */
-	if (qdma_dev.fle_pool)
-		rte_mempool_free(qdma_dev.fle_pool);
+	if (qdma_dev->fle_pool)
+		rte_mempool_free(qdma_dev->fle_pool);
 
 	/* Reset QDMA device structure */
-	qdma_dev.mode = RTE_QDMA_MODE_HW;
-	qdma_dev.max_hw_queues_per_core = 0;
-	qdma_dev.fle_pool = NULL;
-	qdma_dev.fle_pool_count = 0;
-	qdma_dev.max_vqs = 0;
+	qdma_dev->mode = RTE_QDMA_MODE_HW;
+	qdma_dev->max_hw_queues_per_core = 0;
+	qdma_dev->fle_pool = NULL;
+	qdma_dev->fle_pool_count = 0;
+	qdma_dev->max_vqs = 0;
 
 	return 0;
 }
 
-int
-rte_qdma_configure(struct rte_qdma_config *qdma_config)
+static int
+dpaa2_qdma_configure(const struct rte_rawdev *rawdev,
+			 rte_rawdev_obj_t config,
+			 size_t config_size)
 {
-	int ret;
 	char fle_pool_name[32]; /* RTE_MEMZONE_NAMESIZE = 32 */
+	struct rte_qdma_config *qdma_config = (struct rte_qdma_config *)config;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
 
 	DPAA2_QDMA_FUNC_TRACE();
 
+	if (config_size != sizeof(*qdma_config))
+		return -EINVAL;
+
 	/* In case QDMA device is not in stopped state, return -EBUSY */
-	if (qdma_dev.state == 1) {
+	if (qdma_dev->state == 1) {
 		DPAA2_QDMA_ERR(
 			"Device is in running state. Stop before config.");
 		return -1;
 	}
 
-	/* Reset the QDMA device */
-	ret = rte_qdma_reset();
-	if (ret) {
-		DPAA2_QDMA_ERR("Resetting QDMA failed");
-		return ret;
-	}
-
 	/* Set mode */
-	qdma_dev.mode = qdma_config->mode;
+	qdma_dev->mode = qdma_config->mode;
 
 	/* Set max HW queue per core */
 	if (qdma_config->max_hw_queues_per_core > MAX_HW_QUEUE_PER_CORE) {
@@ -520,7 +528,7 @@ rte_qdma_configure(struct rte_qdma_config *qdma_config)
 			       MAX_HW_QUEUE_PER_CORE);
 		return -EINVAL;
 	}
-	qdma_dev.max_hw_queues_per_core =
+	qdma_dev->max_hw_queues_per_core =
 		qdma_config->max_hw_queues_per_core;
 
 	/* Allocate Virtual Queues */
@@ -531,24 +539,24 @@ rte_qdma_configure(struct rte_qdma_config *qdma_config)
 		DPAA2_QDMA_ERR("qdma_virtual_queues allocation failed");
 		return -ENOMEM;
 	}
-	qdma_dev.max_vqs = qdma_config->max_vqs;
+	qdma_dev->max_vqs = qdma_config->max_vqs;
 
 	/* Allocate FLE pool; just append PID so that in case of
 	 * multiprocess, the pool's don't collide.
 	 */
 	snprintf(fle_pool_name, sizeof(fle_pool_name), "qdma_fle_pool%u",
 		 getpid());
-	qdma_dev.fle_pool = rte_mempool_create(fle_pool_name,
+	qdma_dev->fle_pool = rte_mempool_create(fle_pool_name,
 			qdma_config->fle_pool_count, QDMA_FLE_POOL_SIZE,
 			QDMA_FLE_CACHE_SIZE(qdma_config->fle_pool_count), 0,
 			NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
-	if (!qdma_dev.fle_pool) {
+	if (!qdma_dev->fle_pool) {
 		DPAA2_QDMA_ERR("qdma_fle_pool create failed");
 		rte_free(qdma_vqs);
 		qdma_vqs = NULL;
 		return -ENOMEM;
 	}
-	qdma_dev.fle_pool_count = qdma_config->fle_pool_count;
+	qdma_dev->fle_pool_count = qdma_config->fle_pool_count;
 
 	if (qdma_config->format == RTE_QDMA_ULTRASHORT_FORMAT) {
 		dpdmai_dev_get_job = dpdmai_dev_get_job_us;
@@ -560,57 +568,71 @@ rte_qdma_configure(struct rte_qdma_config *qdma_config)
 	return 0;
 }
 
-int
-rte_qdma_start(void)
+static int
+dpaa2_qdma_start(struct rte_rawdev *rawdev)
 {
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+
 	DPAA2_QDMA_FUNC_TRACE();
 
-	qdma_dev.state = 1;
+	qdma_dev->state = 1;
 
 	return 0;
 }
 
-int
-rte_qdma_vq_create(uint32_t lcore_id, uint32_t flags)
+static int
+dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
+			  __rte_unused uint16_t queue_id,
+			  rte_rawdev_obj_t queue_conf,
+			  size_t conf_size)
 {
 	char ring_name[32];
 	int i;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct rte_qdma_queue_config *q_config =
+		(struct rte_qdma_queue_config *)queue_conf;
 
 	DPAA2_QDMA_FUNC_TRACE();
 
-	rte_spinlock_lock(&qdma_dev.lock);
+	if (conf_size != sizeof(*q_config))
+		return -EINVAL;
+
+	rte_spinlock_lock(&qdma_dev->lock);
 
 	/* Get a free Virtual Queue */
-	for (i = 0; i < qdma_dev.max_vqs; i++) {
+	for (i = 0; i < qdma_dev->max_vqs; i++) {
 		if (qdma_vqs[i].in_use == 0)
 			break;
 	}
 
 	/* Return in case no VQ is free */
-	if (i == qdma_dev.max_vqs) {
-		rte_spinlock_unlock(&qdma_dev.lock);
+	if (i == qdma_dev->max_vqs) {
+		rte_spinlock_unlock(&qdma_dev->lock);
 		DPAA2_QDMA_ERR("Unable to get lock on QDMA device");
 		return -ENODEV;
 	}
 
-	if (qdma_dev.mode == RTE_QDMA_MODE_HW ||
-			(flags & RTE_QDMA_VQ_EXCLUSIVE_PQ)) {
+	if (qdma_dev->mode == RTE_QDMA_MODE_HW ||
+			(q_config->flags & RTE_QDMA_VQ_EXCLUSIVE_PQ)) {
 		/* Allocate HW queue for a VQ */
-		qdma_vqs[i].hw_queue = alloc_hw_queue(lcore_id);
+		qdma_vqs[i].hw_queue = alloc_hw_queue(q_config->lcore_id);
 		qdma_vqs[i].exclusive_hw_queue = 1;
 	} else {
 		/* Allocate a Ring for Virutal Queue in VQ mode */
 		snprintf(ring_name, sizeof(ring_name), "status ring %d", i);
 		qdma_vqs[i].status_ring = rte_ring_create(ring_name,
-			qdma_dev.fle_pool_count, rte_socket_id(), 0);
+			qdma_dev->fle_pool_count, rte_socket_id(), 0);
 		if (!qdma_vqs[i].status_ring) {
 			DPAA2_QDMA_ERR("Status ring creation failed for vq");
-			rte_spinlock_unlock(&qdma_dev.lock);
+			rte_spinlock_unlock(&qdma_dev->lock);
 			return rte_errno;
 		}
 
 		/* Get a HW queue (shared) for a VQ */
-		qdma_vqs[i].hw_queue = get_hw_queue(lcore_id);
+		qdma_vqs[i].hw_queue = get_hw_queue(qdma_dev,
+						    q_config->lcore_id);
 		qdma_vqs[i].exclusive_hw_queue = 0;
 	}
 
@@ -619,28 +641,18 @@ rte_qdma_vq_create(uint32_t lcore_id, uint32_t flags)
 		if (qdma_vqs[i].status_ring)
 			rte_ring_free(qdma_vqs[i].status_ring);
 		qdma_vqs[i].status_ring = NULL;
-		rte_spinlock_unlock(&qdma_dev.lock);
+		rte_spinlock_unlock(&qdma_dev->lock);
 		return -ENODEV;
 	}
 
 	qdma_vqs[i].in_use = 1;
-	qdma_vqs[i].lcore_id = lcore_id;
+	qdma_vqs[i].lcore_id = q_config->lcore_id;
 	memset(&qdma_vqs[i].rbp, 0, sizeof(struct rte_qdma_rbp));
-	rte_spinlock_unlock(&qdma_dev.lock);
+	rte_spinlock_unlock(&qdma_dev->lock);
 
-	return i;
-}
-
-/*create vq for route-by-port*/
-int
-rte_qdma_vq_create_rbp(uint32_t lcore_id, uint32_t flags,
-			struct rte_qdma_rbp *rbp)
-{
-	int i;
-
-	i = rte_qdma_vq_create(lcore_id, flags);
-
-	memcpy(&qdma_vqs[i].rbp, rbp, sizeof(struct rte_qdma_rbp));
+	if (q_config->rbp != NULL)
+		memcpy(&qdma_vqs[i].rbp, q_config->rbp,
+		       sizeof(struct rte_qdma_rbp));
 
 	return i;
 }
@@ -689,7 +701,7 @@ dpdmai_dev_enqueue_multi(struct dpaa2_dpdmai_dev *dpdmai_dev,
 			dpaa2_eqcr_size : nb_jobs;
 
 		for (loop = 0; loop < num_to_send; loop++) {
-			ret = dpdmai_dev_set_fd(&fd[loop],
+			ret = dpdmai_dev_set_fd(dpdmai_dev->qdma_dev, &fd[loop],
 						job[num_tx], rbp, vq_id);
 			if (ret < 0) {
 				/* Set nb_jobs to loop, so outer while loop
@@ -724,12 +736,14 @@ dpdmai_dev_enqueue_multi(struct dpaa2_dpdmai_dev *dpdmai_dev,
 	return num_tx;
 }
 
-int
-rte_qdma_vq_enqueue_multi(uint16_t vq_id,
-			  struct rte_qdma_job **job,
-			  uint16_t nb_jobs)
+static int
+dpaa2_qdma_enqueue(__rte_unused struct rte_rawdev *rawdev,
+		  __rte_unused struct rte_rawdev_buf **buffers,
+		  unsigned int nb_jobs,
+		  rte_rawdev_obj_t context)
 {
-	struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id];
+	struct rte_qdma_enqdeq *e_context = (struct rte_qdma_enqdeq *)context;
+	struct qdma_virt_queue *qdma_vq = &qdma_vqs[e_context->vq_id];
 	struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue;
 	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev;
 	int ret;
@@ -737,15 +751,15 @@ rte_qdma_vq_enqueue_multi(uint16_t vq_id,
 	/* Return error in case of wrong lcore_id */
 	if (rte_lcore_id() != qdma_vq->lcore_id) {
 		DPAA2_QDMA_ERR("QDMA enqueue for vqid %d on wrong core",
-				vq_id);
+				e_context->vq_id);
 		return -EINVAL;
 	}
 
 	ret = dpdmai_dev_enqueue_multi(dpdmai_dev,
 				 qdma_pq->queue_id,
-				 vq_id,
+				 e_context->vq_id,
 				 &qdma_vq->rbp,
-				 job,
+				 e_context->job,
 				 nb_jobs);
 	if (ret < 0) {
 		DPAA2_QDMA_ERR("DPDMAI device enqueue failed: %d", ret);
@@ -757,13 +771,6 @@ rte_qdma_vq_enqueue_multi(uint16_t vq_id,
 	return ret;
 }
 
-int
-rte_qdma_vq_enqueue(uint16_t vq_id,
-		    struct rte_qdma_job *job)
-{
-	return rte_qdma_vq_enqueue_multi(vq_id, &job, 1);
-}
-
 /* Function to receive a QDMA job for a given device and queue*/
 static int
 dpdmai_dev_dequeue_multijob_prefetch(
@@ -878,7 +885,8 @@ dpdmai_dev_dequeue_multijob_prefetch(
 		}
 		fd = qbman_result_DQ_fd(dq_storage);
 
-		vqid = dpdmai_dev_get_job(fd, &job[num_rx]);
+		vqid = dpdmai_dev_get_job(dpdmai_dev->qdma_dev, fd,
+					  &job[num_rx]);
 		if (vq_id)
 			vq_id[num_rx] = vqid;
 
@@ -994,7 +1002,8 @@ dpdmai_dev_dequeue_multijob_no_prefetch(
 			}
 			fd = qbman_result_DQ_fd(dq_storage);
 
-			vqid = dpdmai_dev_get_job(fd, &job[num_rx]);
+			vqid = dpdmai_dev_get_job(dpdmai_dev->qdma_dev, fd,
+						  &job[num_rx]);
 			if (vq_id)
 				vq_id[num_rx] = vqid;
 
@@ -1009,21 +1018,24 @@ dpdmai_dev_dequeue_multijob_no_prefetch(
 	return num_rx;
 }
 
-int
-rte_qdma_vq_dequeue_multi(uint16_t vq_id,
-			  struct rte_qdma_job **job,
-			  uint16_t nb_jobs)
+static int
+dpaa2_qdma_dequeue(__rte_unused struct rte_rawdev *rawdev,
+		   __rte_unused struct rte_rawdev_buf **buffers,
+		   unsigned int nb_jobs,
+		   rte_rawdev_obj_t cntxt)
 {
-	struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id];
+	struct rte_qdma_enqdeq *context = (struct rte_qdma_enqdeq *)cntxt;
+	struct qdma_virt_queue *qdma_vq = &qdma_vqs[context->vq_id];
 	struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue;
 	struct qdma_virt_queue *temp_qdma_vq;
 	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev;
-	int ring_count, ret = 0, i;
+	int ret = 0, i;
+	unsigned int ring_count;
 
 	/* Return error in case of wrong lcore_id */
 	if (rte_lcore_id() != (unsigned int)(qdma_vq->lcore_id)) {
 		DPAA2_QDMA_WARN("QDMA dequeue for vqid %d on wrong core",
-				vq_id);
+				context->vq_id);
 		return -1;
 	}
 
@@ -1037,7 +1049,7 @@ rte_qdma_vq_dequeue_multi(uint16_t vq_id,
 	if (qdma_vq->exclusive_hw_queue) {
 		/* In case of exclusive queue directly fetch from HW queue */
 		ret = dpdmai_dev_dequeue_multijob(dpdmai_dev, qdma_pq->queue_id,
-					 NULL, job, nb_jobs);
+					 NULL, context->job, nb_jobs);
 		if (ret < 0) {
 			DPAA2_QDMA_ERR(
 				"Dequeue from DPDMAI device failed: %d", ret);
@@ -1056,11 +1068,11 @@ rte_qdma_vq_dequeue_multi(uint16_t vq_id,
 			/* TODO - How to have right budget */
 			ret = dpdmai_dev_dequeue_multijob(dpdmai_dev,
 					qdma_pq->queue_id,
-					temp_vq_id, job, nb_jobs);
+					temp_vq_id, context->job, nb_jobs);
 			for (i = 0; i < ret; i++) {
 				temp_qdma_vq = &qdma_vqs[temp_vq_id[i]];
 				rte_ring_enqueue(temp_qdma_vq->status_ring,
-					(void *)(job[i]));
+					(void *)(context->job[i]));
 			}
 			ring_count = rte_ring_count(
 					qdma_vq->status_ring);
@@ -1071,7 +1083,8 @@ rte_qdma_vq_dequeue_multi(uint16_t vq_id,
 			 * to provide to the user
 			 */
 			ret = rte_ring_dequeue_bulk(qdma_vq->status_ring,
-					(void **)job, ring_count, NULL);
+						    (void **)context->job,
+						    ring_count, NULL);
 			if (ret)
 				qdma_vq->num_dequeues += ret;
 		}
@@ -1080,19 +1093,6 @@ rte_qdma_vq_dequeue_multi(uint16_t vq_id,
 	return ret;
 }
 
-struct rte_qdma_job *
-rte_qdma_vq_dequeue(uint16_t vq_id)
-{
-	int ret;
-	struct rte_qdma_job *job = NULL;
-
-	ret = rte_qdma_vq_dequeue_multi(vq_id, &job, 1);
-	if (ret < 0)
-		DPAA2_QDMA_DP_WARN("DPDMAI device dequeue failed: %d", ret);
-
-	return job;
-}
-
 void
 rte_qdma_vq_stats(uint16_t vq_id,
 		  struct rte_qdma_vq_stats *vq_status)
@@ -1109,9 +1109,13 @@ rte_qdma_vq_stats(uint16_t vq_id,
 	}
 }
 
-int
-rte_qdma_vq_destroy(uint16_t vq_id)
+static int
+dpaa2_qdma_queue_release(struct rte_rawdev *rawdev,
+			 uint16_t vq_id)
 {
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+
 	struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id];
 
 	DPAA2_QDMA_FUNC_TRACE();
@@ -1120,7 +1124,7 @@ rte_qdma_vq_destroy(uint16_t vq_id)
 	if (qdma_vq->num_enqueues != qdma_vq->num_dequeues)
 		return -EBUSY;
 
-	rte_spinlock_lock(&qdma_dev.lock);
+	rte_spinlock_lock(&qdma_dev->lock);
 
 	if (qdma_vq->exclusive_hw_queue)
 		free_hw_queue(qdma_vq->hw_queue);
@@ -1133,57 +1137,44 @@ rte_qdma_vq_destroy(uint16_t vq_id)
 
 	memset(qdma_vq, 0, sizeof(struct qdma_virt_queue));
 
-	rte_spinlock_unlock(&qdma_dev.lock);
+	rte_spinlock_unlock(&qdma_dev->lock);
 
 	return 0;
 }
 
-int
-rte_qdma_vq_destroy_rbp(uint16_t vq_id)
+static void
+dpaa2_qdma_stop(struct rte_rawdev *rawdev)
 {
-	struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id];
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
 
 	DPAA2_QDMA_FUNC_TRACE();
 
-	/* In case there are pending jobs on any VQ, return -EBUSY */
-	if (qdma_vq->num_enqueues != qdma_vq->num_dequeues)
-		return -EBUSY;
-
-	rte_spinlock_lock(&qdma_dev.lock);
-
-	if (qdma_vq->exclusive_hw_queue) {
-		free_hw_queue(qdma_vq->hw_queue);
-	} else {
-		if (qdma_vqs->status_ring)
-			rte_ring_free(qdma_vqs->status_ring);
-
-		put_hw_queue(qdma_vq->hw_queue);
-	}
-
-	memset(qdma_vq, 0, sizeof(struct qdma_virt_queue));
-
-	rte_spinlock_unlock(&qdma_dev.lock);
-
-	return 0;
+	qdma_dev->state = 0;
 }
 
-void
-rte_qdma_stop(void)
+static int
+dpaa2_qdma_close(struct rte_rawdev *rawdev)
 {
 	DPAA2_QDMA_FUNC_TRACE();
 
-	qdma_dev.state = 0;
-}
+	dpaa2_qdma_reset(rawdev);
 
-void
-rte_qdma_destroy(void)
-{
-	DPAA2_QDMA_FUNC_TRACE();
-
-	rte_qdma_reset();
+	return 0;
 }
 
-static const struct rte_rawdev_ops dpaa2_qdma_ops;
+static struct rte_rawdev_ops dpaa2_qdma_ops = {
+	.dev_configure            = dpaa2_qdma_configure,
+	.dev_start                = dpaa2_qdma_start,
+	.dev_stop                 = dpaa2_qdma_stop,
+	.dev_reset                = dpaa2_qdma_reset,
+	.dev_close                = dpaa2_qdma_close,
+	.queue_setup		  = dpaa2_qdma_queue_setup,
+	.queue_release		  = dpaa2_qdma_queue_release,
+	.attr_get		  = dpaa2_qdma_attr_get,
+	.enqueue_bufs		  = dpaa2_qdma_enqueue,
+	.dequeue_bufs		  = dpaa2_qdma_dequeue,
+};
 
 static int
 add_hw_queues_to_list(struct dpaa2_dpdmai_dev *dpdmai_dev)
@@ -1205,7 +1196,7 @@ add_hw_queues_to_list(struct dpaa2_dpdmai_dev *dpdmai_dev)
 		queue->queue_id = i;
 
 		TAILQ_INSERT_TAIL(&qdma_queue_list, queue, next);
-		qdma_dev.num_hw_queues++;
+		dpdmai_dev->qdma_dev->num_hw_queues++;
 	}
 
 	return 0;
@@ -1314,6 +1305,7 @@ dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id)
 	/* Open DPDMAI device */
 	dpdmai_dev->dpdmai_id = dpdmai_id;
 	dpdmai_dev->dpdmai.regs = dpaa2_get_mcp_ptr(MC_PORTAL_INDEX);
+	dpdmai_dev->qdma_dev = &q_dev;
 	ret = dpdmai_open(&dpdmai_dev->dpdmai, CMD_PRI_LOW,
 			  dpdmai_dev->dpdmai_id, &dpdmai_dev->token);
 	if (ret) {
@@ -1428,6 +1420,8 @@ dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id)
 
 	DPAA2_QDMA_DEBUG("Initialized dpdmai object successfully");
 
+	rte_spinlock_init(&dpdmai_dev->qdma_dev->lock);
+
 	return 0;
 init_err:
 	dpaa2_dpdmai_dev_uninit(rawdev);
@@ -1463,6 +1457,13 @@ rte_dpaa2_qdma_probe(struct rte_dpaa2_driver *dpaa2_drv,
 		return ret;
 	}
 
+	/* Reset the QDMA device */
+	ret = dpaa2_qdma_reset(rawdev);
+	if (ret) {
+		DPAA2_QDMA_ERR("Resetting QDMA failed");
+		return ret;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
index 017638075..3c112d28f 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2018-2019 NXP
+ * Copyright 2018-2020 NXP
  */
 
 #ifndef __DPAA2_QDMA_H__
@@ -173,6 +173,7 @@ struct dpaa2_dpdmai_dev {
 	struct dpaa2_queue rx_queue[DPAA2_DPDMAI_MAX_QUEUES];
 	/** TX queues */
 	struct dpaa2_queue tx_queue[DPAA2_DPDMAI_MAX_QUEUES];
+	struct qdma_device *qdma_dev;
 };
 
 #endif /* __DPAA2_QDMA_H__ */
diff --git a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
index 4e1268cc5..71894d35e 100644
--- a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
@@ -1,10 +1,12 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2018-2019 NXP
+ * Copyright 2018-2020 NXP
  */
 
 #ifndef __RTE_PMD_DPAA2_QDMA_H__
 #define __RTE_PMD_DPAA2_QDMA_H__
 
+#include <rte_rawdev.h>
+
 /**
  * @file
  *
@@ -154,150 +156,29 @@ struct rte_qdma_job {
 	uint16_t vq_id;
 };
 
-/**
- * Initialize the QDMA device.
- *
- * @returns
- *   - 0: Success.
- *   - <0: Error code.
- */
-int
-rte_qdma_init(void);
-
-/**
- * Get the QDMA attributes.
- *
- * @param qdma_attr
- *   QDMA attributes providing total number of hw queues etc.
- */
-void
-rte_qdma_attr_get(struct rte_qdma_attr *qdma_attr);
-
-/**
- * Reset the QDMA device. This API will completely reset the QDMA
- * device, bringing it to original state as if only rte_qdma_init() API
- * has been called.
- *
- * @returns
- *   - 0: Success.
- *   - <0: Error code.
- */
-int
-rte_qdma_reset(void);
-
-/**
- * Configure the QDMA device.
- *
- * @returns
- *   - 0: Success.
- *   - <0: Error code.
- */
-int
-rte_qdma_configure(struct rte_qdma_config *qdma_config);
-
-/**
- * Start the QDMA device.
- *
- * @returns
- *   - 0: Success.
- *   - <0: Error code.
- */
-int
-rte_qdma_start(void);
-
-/**
- * Create a Virtual Queue on a particular lcore id.
- * This API can be called from any thread/core. User can create/destroy
- * VQ's at runtime.
- *
- * @param lcore_id
- *   LCORE ID on which this particular queue would be associated with.
- * @param flags
- *  RTE_QDMA_VQ_ flags. See macro definitions.
- *
- * @returns
- *   - >= 0: Virtual queue ID.
- *   - <0: Error code.
- */
-int
-rte_qdma_vq_create(uint32_t lcore_id, uint32_t flags);
-
-/*create vq for route-by-port*/
-int
-rte_qdma_vq_create_rbp(uint32_t lcore_id, uint32_t flags,
-			struct rte_qdma_rbp *rbp);
-
-/**
- * Enqueue multiple jobs to a Virtual Queue.
- * If the enqueue is successful, the H/W will perform DMA operations
- * on the basis of the QDMA jobs provided.
- *
- * @param vq_id
- *   Virtual Queue ID.
- * @param job
- *   List of QDMA Jobs containing relevant information related to DMA.
- * @param nb_jobs
- *   Number of QDMA jobs provided by the user.
- *
- * @returns
- *   - >=0: Number of jobs successfully submitted
- *   - <0: Error code.
- */
-int
-rte_qdma_vq_enqueue_multi(uint16_t vq_id,
-			  struct rte_qdma_job **job,
-			  uint16_t nb_jobs);
-
-/**
- * Enqueue a single job to a Virtual Queue.
- * If the enqueue is successful, the H/W will perform DMA operations
- * on the basis of the QDMA job provided.
- *
- * @param vq_id
- *   Virtual Queue ID.
- * @param job
- *   A QDMA Job containing relevant information related to DMA.
- *
- * @returns
- *   - >=0: Number of jobs successfully submitted
- *   - <0: Error code.
- */
-int
-rte_qdma_vq_enqueue(uint16_t vq_id,
-		    struct rte_qdma_job *job);
+struct rte_qdma_enqdeq {
+	uint16_t vq_id;
+	struct rte_qdma_job **job;
+};
 
-/**
- * Dequeue multiple completed jobs from a Virtual Queue.
- * Provides the list of completed jobs capped by nb_jobs.
- *
- * @param vq_id
- *   Virtual Queue ID.
- * @param job
- *   List of QDMA Jobs returned from the API.
- * @param nb_jobs
- *   Number of QDMA jobs requested for dequeue by the user.
- *
- * @returns
- *   - >=0: Number of jobs successfully received
- *   - <0: Error code.
- */
-int
-rte_qdma_vq_dequeue_multi(uint16_t vq_id,
-			  struct rte_qdma_job **job,
-			  uint16_t nb_jobs);
+struct rte_qdma_queue_config {
+	uint32_t lcore_id;
+	uint32_t flags;
+	struct rte_qdma_rbp *rbp;
+};
 
-/**
- * Dequeue a single completed jobs from a Virtual Queue.
- *
- * @param vq_id
- *   Virtual Queue ID.
- *
- * @returns
- *   - A completed job or NULL if no job is there.
- */
-struct rte_qdma_job *
-rte_qdma_vq_dequeue(uint16_t vq_id);
+#define rte_qdma_info rte_rawdev_info
+#define rte_qdma_start(id) rte_rawdev_start(id)
+#define rte_qdma_reset(id) rte_rawdev_reset(id)
+#define rte_qdma_configure(id, cf) rte_rawdev_configure(id, cf)
+#define rte_qdma_dequeue_buffers(id, buf, num, ctxt) \
+	rte_rawdev_dequeue_buffers(id, buf, num, ctxt)
+#define rte_qdma_enqueue_buffers(id, buf, num, ctxt) \
+	rte_rawdev_enqueue_buffers(id, buf, num, ctxt)
+#define rte_qdma_queue_setup(id, qid, cfg) \
+	rte_rawdev_queue_setup(id, qid, cfg)
 
+/*TODO introduce per queue stats API in rawdew */
 /**
  * Get a Virtual Queue statistics.
  *
@@ -310,46 +191,4 @@ void
 rte_qdma_vq_stats(uint16_t vq_id,
 		  struct rte_qdma_vq_stats *vq_stats);
 
-/**
- * Destroy the Virtual Queue specified by vq_id.
- * This API can be called from any thread/core. User can create/destroy
- * VQ's at runtime.
- *
- * @param vq_id
- *   Virtual Queue ID which needs to be uninitialized.
- *
- * @returns
- *   - 0: Success.
- *   - <0: Error code.
- */
-int
-rte_qdma_vq_destroy(uint16_t vq_id);
-
-/**
- * Destroy the RBP specific Virtual Queue specified by vq_id.
- * This API can be called from any thread/core. User can create/destroy
- * VQ's at runtime.
- *
- * @param vq_id
- *   RBP based Virtual Queue ID which needs to be uninitialized.
- *
- * @returns
- *   - 0: Success.
- *   - <0: Error code.
- */
-
-int
-rte_qdma_vq_destroy_rbp(uint16_t vq_id);
-/**
- * Stop QDMA device.
- */
-void
-rte_qdma_stop(void);
-
-/**
- * Destroy the QDMA device.
- */
-void
-rte_qdma_destroy(void);
-
 #endif /* __RTE_PMD_DPAA2_QDMA_H__*/
-- 
2.17.1


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH v2 2/7] raw/dpaa2_qdma: memset to only required memory
  2020-10-15  9:47 ` [dpdk-dev] [PATCH v2 " Gagandeep Singh
  2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 1/7] raw/dpaa2_qdma: change DPAA2 QDMA APIs to rawdev ops Gagandeep Singh
@ 2020-10-15  9:47   ` Gagandeep Singh
  2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 3/7] raw/dpaa2_qdma: refactor the code Gagandeep Singh
                     ` (4 subsequent siblings)
  6 siblings, 0 replies; 23+ messages in thread
From: Gagandeep Singh @ 2020-10-15  9:47 UTC (permalink / raw)
  To: dev, nipun.gupta, hemant.agrawal; +Cc: thomas, Gagandeep Singh

performance improvement: memset should be done only
for required memory.

Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
---
 drivers/raw/dpaa2_qdma/dpaa2_qdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
index 530ee156d..09e295fe8 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
@@ -692,7 +692,7 @@ dpdmai_dev_enqueue_multi(struct dpaa2_dpdmai_dev *dpdmai_dev,
 	qbman_eq_desc_set_no_orp(&eqdesc, 0);
 	qbman_eq_desc_set_response(&eqdesc, 0, 0);
 
-	memset(fd, 0, RTE_QDMA_BURST_NB_MAX * sizeof(struct qbman_fd));
+	memset(fd, 0, nb_jobs * sizeof(struct qbman_fd));
 
 	while (nb_jobs > 0) {
 		uint32_t loop;
-- 
2.17.1


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH v2 3/7] raw/dpaa2_qdma: refactor the code
  2020-10-15  9:47 ` [dpdk-dev] [PATCH v2 " Gagandeep Singh
  2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 1/7] raw/dpaa2_qdma: change DPAA2 QDMA APIs to rawdev ops Gagandeep Singh
  2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 2/7] raw/dpaa2_qdma: memset to only required memory Gagandeep Singh
@ 2020-10-15  9:47   ` Gagandeep Singh
  2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 4/7] raw/dpaa2_qdma: optimize IOVA conversion Gagandeep Singh
                     ` (3 subsequent siblings)
  6 siblings, 0 replies; 23+ messages in thread
From: Gagandeep Singh @ 2020-10-15  9:47 UTC (permalink / raw)
  To: dev, nipun.gupta, hemant.agrawal; +Cc: thomas, Jun Yang

From: Jun Yang <jun.yang@nxp.com>

This patch moves qdma queue specific configurations from driver
global configuration to per-queue setup. This is required
as each queue can be configured differently.

Signed-off-by: Jun Yang <jun.yang@nxp.com>
---
 drivers/raw/dpaa2_qdma/dpaa2_qdma.c         | 1270 +++++++++----------
 drivers/raw/dpaa2_qdma/dpaa2_qdma.h         |   39 +-
 drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h |   13 +-
 3 files changed, 675 insertions(+), 647 deletions(-)

diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
index 09e295fe8..6eef7a57a 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
@@ -26,6 +26,9 @@
 
 #define DPAA2_QDMA_NO_PREFETCH "no_prefetch"
 
+/* Dynamic log type identifier */
+int dpaa2_qdma_logtype;
+
 uint32_t dpaa2_coherent_no_alloc_cache;
 uint32_t dpaa2_coherent_alloc_cache;
 
@@ -37,31 +40,9 @@ TAILQ_HEAD(qdma_hw_queue_list, qdma_hw_queue);
 static struct qdma_hw_queue_list qdma_queue_list
 	= TAILQ_HEAD_INITIALIZER(qdma_queue_list);
 
-/* QDMA Virtual Queues */
-static struct qdma_virt_queue *qdma_vqs;
-
 /* QDMA per core data */
 static struct qdma_per_core_info qdma_core_info[RTE_MAX_LCORE];
 
-typedef int (dpdmai_dev_dequeue_multijob_t)(struct dpaa2_dpdmai_dev *dpdmai_dev,
-					    uint16_t rxq_id,
-					    uint16_t *vq_id,
-					    struct rte_qdma_job **job,
-					    uint16_t nb_jobs);
-
-dpdmai_dev_dequeue_multijob_t *dpdmai_dev_dequeue_multijob;
-
-typedef uint16_t (dpdmai_dev_get_job_t)(struct qdma_device *qdma_dev,
-					const struct qbman_fd *fd,
-					struct rte_qdma_job **job);
-typedef int (dpdmai_dev_set_fd_t)(struct qdma_device *qdma_dev,
-				  struct qbman_fd *fd,
-				  struct rte_qdma_job *job,
-				  struct rte_qdma_rbp *rbp,
-				  uint16_t vq_id);
-dpdmai_dev_get_job_t *dpdmai_dev_get_job;
-dpdmai_dev_set_fd_t *dpdmai_dev_set_fd;
-
 static inline int
 qdma_populate_fd_pci(phys_addr_t src, phys_addr_t dest,
 			uint32_t len, struct qbman_fd *fd,
@@ -114,7 +95,7 @@ qdma_populate_fd_ddr(phys_addr_t src, phys_addr_t dest,
 	/**
 	 * src If RBP=0 {NS,RDTTYPE[3:0]}: 0_1011
 	 * Coherent copy of cacheable memory,
-	 * lookup in downstream cache, no allocate
+	* lookup in downstream cache, no allocate
 	 * on miss
 	 */
 	fd->simple_ddr.rns = 0;
@@ -204,12 +185,11 @@ dpaa2_qdma_populate_fle(struct qbman_fle *fle,
 }
 
 static inline int dpdmai_dev_set_fd_us(
-				struct qdma_device *qdma_dev __rte_unused,
-				struct qbman_fd *fd,
-				struct rte_qdma_job *job,
-				struct rte_qdma_rbp *rbp,
-				uint16_t vq_id)
+		struct qdma_virt_queue *qdma_vq,
+		struct qbman_fd *fd,
+		struct rte_qdma_job *job)
 {
+	struct rte_qdma_rbp *rbp = &qdma_vq->rbp;
 	struct rte_qdma_job **ppjob;
 	size_t iova;
 	int ret = 0;
@@ -220,7 +200,7 @@ static inline int dpdmai_dev_set_fd_us(
 		iova = (size_t)job->src;
 
 	/* Set the metadata */
-	job->vq_id = vq_id;
+	job->vq_id = qdma_vq->vq_id;
 	ppjob = (struct rte_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1;
 	*ppjob = job;
 
@@ -234,15 +214,17 @@ static inline int dpdmai_dev_set_fd_us(
 					   job->len, fd);
 	return ret;
 }
-static inline int dpdmai_dev_set_fd_lf(struct qdma_device *qdma_dev,
-					struct qbman_fd *fd,
-					struct rte_qdma_job *job,
-					struct rte_qdma_rbp *rbp,
-					uint16_t vq_id)
+static inline int dpdmai_dev_set_fd_lf(
+		struct qdma_virt_queue *qdma_vq,
+		struct qbman_fd *fd,
+		struct rte_qdma_job *job)
 {
+	struct rte_qdma_rbp *rbp = &qdma_vq->rbp;
 	struct rte_qdma_job **ppjob;
 	struct qbman_fle *fle;
 	int ret = 0;
+	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
+
 	/*
 	 * Get an FLE/SDD from FLE pool.
 	 * Note: IO metadata is before the FLE and SDD memory.
@@ -254,7 +236,7 @@ static inline int dpdmai_dev_set_fd_lf(struct qdma_device *qdma_dev,
 	}
 
 	/* Set the metadata */
-	job->vq_id = vq_id;
+	job->vq_id = qdma_vq->vq_id;
 	*ppjob = job;
 
 	fle = (struct qbman_fle *)(ppjob + 1);
@@ -272,7 +254,7 @@ static inline int dpdmai_dev_set_fd_lf(struct qdma_device *qdma_dev,
 }
 
 static inline uint16_t dpdmai_dev_get_job_us(
-				struct qdma_device *qdma_dev __rte_unused,
+				struct qdma_virt_queue *qdma_vq __rte_unused,
 				const struct qbman_fd *fd,
 				struct rte_qdma_job **job)
 {
@@ -281,7 +263,7 @@ static inline uint16_t dpdmai_dev_get_job_us(
 	struct rte_qdma_job **ppjob;
 
 	if (fd->simple_pci.saddr_hi & (QDMA_RBP_UPPER_ADDRESS_MASK >> 32))
-		iova = (size_t) (((uint64_t)fd->simple_pci.daddr_hi) << 32
+		iova = (size_t)(((uint64_t)fd->simple_pci.daddr_hi) << 32
 				| (uint64_t)fd->simple_pci.daddr_lo);
 	else
 		iova = (size_t)(((uint64_t)fd->simple_pci.saddr_hi) << 32
@@ -289,18 +271,22 @@ static inline uint16_t dpdmai_dev_get_job_us(
 
 	ppjob = (struct rte_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1;
 	*job = (struct rte_qdma_job *)*ppjob;
-	(*job)->status = (fd->simple_pci.acc_err << 8) | (fd->simple_pci.error);
+	(*job)->status = (fd->simple_pci.acc_err << 8) |
+					(fd->simple_pci.error);
 	vqid = (*job)->vq_id;
 
 	return vqid;
 }
 
-static inline uint16_t dpdmai_dev_get_job_lf(struct qdma_device *qdma_dev,
-					     const struct qbman_fd *fd,
-					     struct rte_qdma_job **job)
+static inline uint16_t dpdmai_dev_get_job_lf(
+						struct qdma_virt_queue *qdma_vq,
+						const struct qbman_fd *fd,
+						struct rte_qdma_job **job)
 {
 	struct rte_qdma_job **ppjob;
 	uint16_t vqid;
+	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
+
 	/*
 	 * Fetch metadata from FLE. job and vq_id were set
 	 * in metadata in the enqueue operation.
@@ -320,351 +306,268 @@ static inline uint16_t dpdmai_dev_get_job_lf(struct qdma_device *qdma_dev,
 	return vqid;
 }
 
-static struct qdma_hw_queue *
-alloc_hw_queue(uint32_t lcore_id)
+/* Function to receive a QDMA job for a given device and queue*/
+static int
+dpdmai_dev_dequeue_multijob_prefetch(
+			struct qdma_virt_queue *qdma_vq,
+			uint16_t *vq_id,
+			struct rte_qdma_job **job,
+			uint16_t nb_jobs)
 {
-	struct qdma_hw_queue *queue = NULL;
+	struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev;
+	uint16_t rxq_id = qdma_pq->queue_id;
 
-	DPAA2_QDMA_FUNC_TRACE();
+	struct dpaa2_queue *rxq;
+	struct qbman_result *dq_storage, *dq_storage1 = NULL;
+	struct qbman_pull_desc pulldesc;
+	struct qbman_swp *swp;
+	struct queue_storage_info_t *q_storage;
+	uint32_t fqid;
+	uint8_t status, pending;
+	uint8_t num_rx = 0;
+	const struct qbman_fd *fd;
+	uint16_t vqid;
+	int ret, pull_size;
 
-	/* Get a free queue from the list */
-	TAILQ_FOREACH(queue, &qdma_queue_list, next) {
-		if (queue->num_users == 0) {
-			queue->lcore_id = lcore_id;
-			queue->num_users++;
-			break;
+	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
+		ret = dpaa2_affine_qbman_swp();
+		if (ret) {
+			DPAA2_QDMA_ERR(
+				"Failed to allocate IO portal, tid: %d\n",
+				rte_gettid());
+			return 0;
 		}
 	}
+	swp = DPAA2_PER_LCORE_PORTAL;
 
-	return queue;
-}
-
-static void
-free_hw_queue(struct qdma_hw_queue *queue)
-{
-	DPAA2_QDMA_FUNC_TRACE();
-
-	queue->num_users--;
-}
-
-
-static struct qdma_hw_queue *
-get_hw_queue(struct qdma_device *qdma_dev, uint32_t lcore_id)
-{
-	struct qdma_per_core_info *core_info;
-	struct qdma_hw_queue *queue, *temp;
-	uint32_t least_num_users;
-	int num_hw_queues, i;
-
-	DPAA2_QDMA_FUNC_TRACE();
-
-	core_info = &qdma_core_info[lcore_id];
-	num_hw_queues = core_info->num_hw_queues;
+	pull_size = (nb_jobs > dpaa2_dqrr_size) ? dpaa2_dqrr_size : nb_jobs;
+	rxq = &(dpdmai_dev->rx_queue[rxq_id]);
+	fqid = rxq->fqid;
+	q_storage = rxq->q_storage;
 
-	/*
-	 * Allocate a HW queue if there are less queues
-	 * than maximum per core queues configured
-	 */
-	if (num_hw_queues < qdma_dev->max_hw_queues_per_core) {
-		queue = alloc_hw_queue(lcore_id);
-		if (queue) {
-			core_info->hw_queues[num_hw_queues] = queue;
-			core_info->num_hw_queues++;
-			return queue;
+	if (unlikely(!q_storage->active_dqs)) {
+		q_storage->toggle = 0;
+		dq_storage = q_storage->dq_storage[q_storage->toggle];
+		q_storage->last_num_pkts = pull_size;
+		qbman_pull_desc_clear(&pulldesc);
+		qbman_pull_desc_set_numframes(&pulldesc,
+					      q_storage->last_num_pkts);
+		qbman_pull_desc_set_fq(&pulldesc, fqid);
+		qbman_pull_desc_set_storage(&pulldesc, dq_storage,
+				(size_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
+		if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
+			while (!qbman_check_command_complete(
+				get_swp_active_dqs(
+				DPAA2_PER_LCORE_DPIO->index)))
+				;
+			clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
+		}
+		while (1) {
+			if (qbman_swp_pull(swp, &pulldesc)) {
+				DPAA2_QDMA_DP_WARN(
+					"VDQ command not issued.QBMAN busy\n");
+					/* Portal was busy, try again */
+				continue;
+			}
+			break;
 		}
+		q_storage->active_dqs = dq_storage;
+		q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
+		set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index,
+				   dq_storage);
 	}
 
-	queue = core_info->hw_queues[0];
-	/* In case there is no queue associated with the core return NULL */
-	if (!queue)
-		return NULL;
+	dq_storage = q_storage->active_dqs;
+	rte_prefetch0((void *)(size_t)(dq_storage));
+	rte_prefetch0((void *)(size_t)(dq_storage + 1));
 
-	/* Fetch the least loaded H/W queue */
-	least_num_users = core_info->hw_queues[0]->num_users;
-	for (i = 0; i < num_hw_queues; i++) {
-		temp = core_info->hw_queues[i];
-		if (temp->num_users < least_num_users)
-			queue = temp;
-	}
+	/* Prepare next pull descriptor. This will give space for the
+	 * prefething done on DQRR entries
+	 */
+	q_storage->toggle ^= 1;
+	dq_storage1 = q_storage->dq_storage[q_storage->toggle];
+	qbman_pull_desc_clear(&pulldesc);
+	qbman_pull_desc_set_numframes(&pulldesc, pull_size);
+	qbman_pull_desc_set_fq(&pulldesc, fqid);
+	qbman_pull_desc_set_storage(&pulldesc, dq_storage1,
+		(size_t)(DPAA2_VADDR_TO_IOVA(dq_storage1)), 1);
 
-	if (queue)
-		queue->num_users++;
+	/* Check if the previous issued command is completed.
+	 * Also seems like the SWP is shared between the Ethernet Driver
+	 * and the SEC driver.
+	 */
+	while (!qbman_check_command_complete(dq_storage))
+		;
+	if (dq_storage == get_swp_active_dqs(q_storage->active_dpio_id))
+		clear_swp_active_dqs(q_storage->active_dpio_id);
 
-	return queue;
-}
+	pending = 1;
 
-static void
-put_hw_queue(struct qdma_hw_queue *queue)
-{
-	struct qdma_per_core_info *core_info;
-	int lcore_id, num_hw_queues, i;
+	do {
+		/* Loop until the dq_storage is updated with
+		 * new token by QBMAN
+		 */
+		while (!qbman_check_new_result(dq_storage))
+			;
+		rte_prefetch0((void *)((size_t)(dq_storage + 2)));
+		/* Check whether Last Pull command is Expired and
+		 * setting Condition for Loop termination
+		 */
+		if (qbman_result_DQ_is_pull_complete(dq_storage)) {
+			pending = 0;
+			/* Check for valid frame. */
+			status = qbman_result_DQ_flags(dq_storage);
+			if (unlikely((status & QBMAN_DQ_STAT_VALIDFRAME) == 0))
+				continue;
+		}
+		fd = qbman_result_DQ_fd(dq_storage);
 
-	DPAA2_QDMA_FUNC_TRACE();
+		vqid = qdma_vq->get_job(qdma_vq, fd, &job[num_rx]);
+		if (vq_id)
+			vq_id[num_rx] = vqid;
 
-	/*
-	 * If this is the last user of the queue free it.
-	 * Also remove it from QDMA core info.
-	 */
-	if (queue->num_users == 1) {
-		free_hw_queue(queue);
+		dq_storage++;
+		num_rx++;
+	} while (pending);
 
-		/* Remove the physical queue from core info */
-		lcore_id = queue->lcore_id;
-		core_info = &qdma_core_info[lcore_id];
-		num_hw_queues = core_info->num_hw_queues;
-		for (i = 0; i < num_hw_queues; i++) {
-			if (queue == core_info->hw_queues[i])
-				break;
+	if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
+		while (!qbman_check_command_complete(
+			get_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)))
+			;
+		clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
+	}
+	/* issue a volatile dequeue command for next pull */
+	while (1) {
+		if (qbman_swp_pull(swp, &pulldesc)) {
+			DPAA2_QDMA_DP_WARN(
+				"VDQ command is not issued. QBMAN is busy (2)\n");
+			continue;
 		}
-		for (; i < num_hw_queues - 1; i++)
-			core_info->hw_queues[i] = core_info->hw_queues[i + 1];
-		core_info->hw_queues[i] = NULL;
-	} else {
-		queue->num_users--;
+		break;
 	}
-}
-
-static int
-dpaa2_qdma_attr_get(struct rte_rawdev *rawdev,
-		    __rte_unused const char *attr_name,
-		    uint64_t *attr_value)
-{
-	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-	struct rte_qdma_attr *qdma_attr = (struct rte_qdma_attr *)attr_value;
-
-	DPAA2_QDMA_FUNC_TRACE();
 
-	qdma_attr->num_hw_queues = qdma_dev->num_hw_queues;
+	q_storage->active_dqs = dq_storage1;
+	q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
+	set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index, dq_storage1);
 
-	return 0;
+	return num_rx;
 }
 
 static int
-dpaa2_qdma_reset(struct rte_rawdev *rawdev)
+dpdmai_dev_dequeue_multijob_no_prefetch(
+		struct qdma_virt_queue *qdma_vq,
+		uint16_t *vq_id,
+		struct rte_qdma_job **job,
+		uint16_t nb_jobs)
 {
-	struct qdma_hw_queue *queue;
-	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-	int i;
+	struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev;
+	uint16_t rxq_id = qdma_pq->queue_id;
 
-	DPAA2_QDMA_FUNC_TRACE();
+	struct dpaa2_queue *rxq;
+	struct qbman_result *dq_storage;
+	struct qbman_pull_desc pulldesc;
+	struct qbman_swp *swp;
+	uint32_t fqid;
+	uint8_t status, pending;
+	uint8_t num_rx = 0;
+	const struct qbman_fd *fd;
+	uint16_t vqid;
+	int ret, next_pull = nb_jobs, num_pulled = 0;
 
-	/* In case QDMA device is not in stopped state, return -EBUSY */
-	if (qdma_dev->state == 1) {
-		DPAA2_QDMA_ERR(
-			"Device is in running state. Stop before reset.");
-		return -EBUSY;
+	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
+		ret = dpaa2_affine_qbman_swp();
+		if (ret) {
+			DPAA2_QDMA_ERR(
+				"Failed to allocate IO portal, tid: %d\n",
+				rte_gettid());
+			return 0;
+		}
 	}
+	swp = DPAA2_PER_LCORE_PORTAL;
 
-	/* In case there are pending jobs on any VQ, return -EBUSY */
-	for (i = 0; i < qdma_dev->max_vqs; i++) {
-		if (qdma_vqs[i].in_use && (qdma_vqs[i].num_enqueues !=
-		    qdma_vqs[i].num_dequeues)) {
-			DPAA2_QDMA_ERR("Jobs are still pending on VQ: %d", i);
-			return -EBUSY;
-		}
-	}
-
-	/* Reset HW queues */
-	TAILQ_FOREACH(queue, &qdma_queue_list, next)
-		queue->num_users = 0;
-
-	/* Reset and free virtual queues */
-	for (i = 0; i < qdma_dev->max_vqs; i++) {
-		if (qdma_vqs[i].status_ring)
-			rte_ring_free(qdma_vqs[i].status_ring);
-	}
-	if (qdma_vqs)
-		rte_free(qdma_vqs);
-	qdma_vqs = NULL;
-
-	/* Reset per core info */
-	memset(&qdma_core_info, 0,
-		sizeof(struct qdma_per_core_info) * RTE_MAX_LCORE);
-
-	/* Free the FLE pool */
-	if (qdma_dev->fle_pool)
-		rte_mempool_free(qdma_dev->fle_pool);
-
-	/* Reset QDMA device structure */
-	qdma_dev->mode = RTE_QDMA_MODE_HW;
-	qdma_dev->max_hw_queues_per_core = 0;
-	qdma_dev->fle_pool = NULL;
-	qdma_dev->fle_pool_count = 0;
-	qdma_dev->max_vqs = 0;
-
-	return 0;
-}
-
-static int
-dpaa2_qdma_configure(const struct rte_rawdev *rawdev,
-			 rte_rawdev_obj_t config,
-			 size_t config_size)
-{
-	char fle_pool_name[32]; /* RTE_MEMZONE_NAMESIZE = 32 */
-	struct rte_qdma_config *qdma_config = (struct rte_qdma_config *)config;
-	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-
-	DPAA2_QDMA_FUNC_TRACE();
-
-	if (config_size != sizeof(*qdma_config))
-		return -EINVAL;
-
-	/* In case QDMA device is not in stopped state, return -EBUSY */
-	if (qdma_dev->state == 1) {
-		DPAA2_QDMA_ERR(
-			"Device is in running state. Stop before config.");
-		return -1;
-	}
-
-	/* Set mode */
-	qdma_dev->mode = qdma_config->mode;
-
-	/* Set max HW queue per core */
-	if (qdma_config->max_hw_queues_per_core > MAX_HW_QUEUE_PER_CORE) {
-		DPAA2_QDMA_ERR("H/W queues per core is more than: %d",
-			       MAX_HW_QUEUE_PER_CORE);
-		return -EINVAL;
-	}
-	qdma_dev->max_hw_queues_per_core =
-		qdma_config->max_hw_queues_per_core;
-
-	/* Allocate Virtual Queues */
-	qdma_vqs = rte_malloc("qdma_virtual_queues",
-			(sizeof(struct qdma_virt_queue) * qdma_config->max_vqs),
-			RTE_CACHE_LINE_SIZE);
-	if (!qdma_vqs) {
-		DPAA2_QDMA_ERR("qdma_virtual_queues allocation failed");
-		return -ENOMEM;
-	}
-	qdma_dev->max_vqs = qdma_config->max_vqs;
-
-	/* Allocate FLE pool; just append PID so that in case of
-	 * multiprocess, the pool's don't collide.
-	 */
-	snprintf(fle_pool_name, sizeof(fle_pool_name), "qdma_fle_pool%u",
-		 getpid());
-	qdma_dev->fle_pool = rte_mempool_create(fle_pool_name,
-			qdma_config->fle_pool_count, QDMA_FLE_POOL_SIZE,
-			QDMA_FLE_CACHE_SIZE(qdma_config->fle_pool_count), 0,
-			NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
-	if (!qdma_dev->fle_pool) {
-		DPAA2_QDMA_ERR("qdma_fle_pool create failed");
-		rte_free(qdma_vqs);
-		qdma_vqs = NULL;
-		return -ENOMEM;
-	}
-	qdma_dev->fle_pool_count = qdma_config->fle_pool_count;
-
-	if (qdma_config->format == RTE_QDMA_ULTRASHORT_FORMAT) {
-		dpdmai_dev_get_job = dpdmai_dev_get_job_us;
-		dpdmai_dev_set_fd = dpdmai_dev_set_fd_us;
-	} else {
-		dpdmai_dev_get_job = dpdmai_dev_get_job_lf;
-		dpdmai_dev_set_fd = dpdmai_dev_set_fd_lf;
-	}
-	return 0;
-}
-
-static int
-dpaa2_qdma_start(struct rte_rawdev *rawdev)
-{
-	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-
-	DPAA2_QDMA_FUNC_TRACE();
-
-	qdma_dev->state = 1;
-
-	return 0;
-}
-
-static int
-dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
-			  __rte_unused uint16_t queue_id,
-			  rte_rawdev_obj_t queue_conf,
-			  size_t conf_size)
-{
-	char ring_name[32];
-	int i;
-	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
-	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
-	struct rte_qdma_queue_config *q_config =
-		(struct rte_qdma_queue_config *)queue_conf;
-
-	DPAA2_QDMA_FUNC_TRACE();
+	rxq = &(dpdmai_dev->rx_queue[rxq_id]);
+	fqid = rxq->fqid;
 
-	if (conf_size != sizeof(*q_config))
-		return -EINVAL;
+	do {
+		dq_storage = rxq->q_storage->dq_storage[0];
+		/* Prepare dequeue descriptor */
+		qbman_pull_desc_clear(&pulldesc);
+		qbman_pull_desc_set_fq(&pulldesc, fqid);
+		qbman_pull_desc_set_storage(&pulldesc, dq_storage,
+			(uint64_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
 
-	rte_spinlock_lock(&qdma_dev->lock);
+		if (next_pull > dpaa2_dqrr_size) {
+			qbman_pull_desc_set_numframes(&pulldesc,
+					dpaa2_dqrr_size);
+			next_pull -= dpaa2_dqrr_size;
+		} else {
+			qbman_pull_desc_set_numframes(&pulldesc, next_pull);
+			next_pull = 0;
+		}
 
-	/* Get a free Virtual Queue */
-	for (i = 0; i < qdma_dev->max_vqs; i++) {
-		if (qdma_vqs[i].in_use == 0)
+		while (1) {
+			if (qbman_swp_pull(swp, &pulldesc)) {
+				DPAA2_QDMA_DP_WARN(
+					"VDQ command not issued. QBMAN busy");
+				/* Portal was busy, try again */
+				continue;
+			}
 			break;
-	}
+		}
 
-	/* Return in case no VQ is free */
-	if (i == qdma_dev->max_vqs) {
-		rte_spinlock_unlock(&qdma_dev->lock);
-		DPAA2_QDMA_ERR("Unable to get lock on QDMA device");
-		return -ENODEV;
-	}
+		rte_prefetch0((void *)((size_t)(dq_storage + 1)));
+		/* Check if the previous issued command is completed. */
+		while (!qbman_check_command_complete(dq_storage))
+			;
 
-	if (qdma_dev->mode == RTE_QDMA_MODE_HW ||
-			(q_config->flags & RTE_QDMA_VQ_EXCLUSIVE_PQ)) {
-		/* Allocate HW queue for a VQ */
-		qdma_vqs[i].hw_queue = alloc_hw_queue(q_config->lcore_id);
-		qdma_vqs[i].exclusive_hw_queue = 1;
-	} else {
-		/* Allocate a Ring for Virutal Queue in VQ mode */
-		snprintf(ring_name, sizeof(ring_name), "status ring %d", i);
-		qdma_vqs[i].status_ring = rte_ring_create(ring_name,
-			qdma_dev->fle_pool_count, rte_socket_id(), 0);
-		if (!qdma_vqs[i].status_ring) {
-			DPAA2_QDMA_ERR("Status ring creation failed for vq");
-			rte_spinlock_unlock(&qdma_dev->lock);
-			return rte_errno;
-		}
+		num_pulled = 0;
+		pending = 1;
 
-		/* Get a HW queue (shared) for a VQ */
-		qdma_vqs[i].hw_queue = get_hw_queue(qdma_dev,
-						    q_config->lcore_id);
-		qdma_vqs[i].exclusive_hw_queue = 0;
-	}
+		do {
+			/* Loop until dq_storage is updated
+			 * with new token by QBMAN
+			 */
+			while (!qbman_check_new_result(dq_storage))
+				;
+			rte_prefetch0((void *)((size_t)(dq_storage + 2)));
 
-	if (qdma_vqs[i].hw_queue == NULL) {
-		DPAA2_QDMA_ERR("No H/W queue available for VQ");
-		if (qdma_vqs[i].status_ring)
-			rte_ring_free(qdma_vqs[i].status_ring);
-		qdma_vqs[i].status_ring = NULL;
-		rte_spinlock_unlock(&qdma_dev->lock);
-		return -ENODEV;
-	}
+			if (qbman_result_DQ_is_pull_complete(dq_storage)) {
+				pending = 0;
+				/* Check for valid frame. */
+				status = qbman_result_DQ_flags(dq_storage);
+				if (unlikely((status &
+					QBMAN_DQ_STAT_VALIDFRAME) == 0))
+					continue;
+			}
+			fd = qbman_result_DQ_fd(dq_storage);
 
-	qdma_vqs[i].in_use = 1;
-	qdma_vqs[i].lcore_id = q_config->lcore_id;
-	memset(&qdma_vqs[i].rbp, 0, sizeof(struct rte_qdma_rbp));
-	rte_spinlock_unlock(&qdma_dev->lock);
+			vqid = qdma_vq->get_job(qdma_vq, fd, &job[num_rx]);
+			if (vq_id)
+				vq_id[num_rx] = vqid;
 
-	if (q_config->rbp != NULL)
-		memcpy(&qdma_vqs[i].rbp, q_config->rbp,
-		       sizeof(struct rte_qdma_rbp));
+			dq_storage++;
+			num_rx++;
+			num_pulled++;
 
-	return i;
+		} while (pending);
+	/* Last VDQ provided all packets and more packets are requested */
+	} while (next_pull && num_pulled == dpaa2_dqrr_size);
+
+	return num_rx;
 }
 
 static int
-dpdmai_dev_enqueue_multi(struct dpaa2_dpdmai_dev *dpdmai_dev,
-			uint16_t txq_id,
-			uint16_t vq_id,
-			struct rte_qdma_rbp *rbp,
+dpdmai_dev_enqueue_multi(
+			struct qdma_virt_queue *qdma_vq,
 			struct rte_qdma_job **job,
 			uint16_t nb_jobs)
 {
+	struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev;
+	uint16_t txq_id = qdma_pq->queue_id;
+
 	struct qbman_fd fd[RTE_QDMA_BURST_NB_MAX];
 	struct dpaa2_queue *txq;
 	struct qbman_eq_desc eqdesc;
@@ -701,8 +604,7 @@ dpdmai_dev_enqueue_multi(struct dpaa2_dpdmai_dev *dpdmai_dev,
 			dpaa2_eqcr_size : nb_jobs;
 
 		for (loop = 0; loop < num_to_send; loop++) {
-			ret = dpdmai_dev_set_fd(dpdmai_dev->qdma_dev, &fd[loop],
-						job[num_tx], rbp, vq_id);
+			ret = qdma_vq->set_fd(qdma_vq, &fd[loop], job[num_tx]);
 			if (ret < 0) {
 				/* Set nb_jobs to loop, so outer while loop
 				 * breaks out.
@@ -716,6 +618,7 @@ dpdmai_dev_enqueue_multi(struct dpaa2_dpdmai_dev *dpdmai_dev,
 
 		/* Enqueue the packet to the QBMAN */
 		uint32_t enqueue_loop = 0, retry_count = 0;
+
 		while (enqueue_loop < loop) {
 			ret = qbman_swp_enqueue_multiple(swp,
 						&eqdesc,
@@ -736,299 +639,434 @@ dpdmai_dev_enqueue_multi(struct dpaa2_dpdmai_dev *dpdmai_dev,
 	return num_tx;
 }
 
-static int
-dpaa2_qdma_enqueue(__rte_unused struct rte_rawdev *rawdev,
-		  __rte_unused struct rte_rawdev_buf **buffers,
-		  unsigned int nb_jobs,
-		  rte_rawdev_obj_t context)
+static struct qdma_hw_queue *
+alloc_hw_queue(uint32_t lcore_id)
 {
-	struct rte_qdma_enqdeq *e_context = (struct rte_qdma_enqdeq *)context;
-	struct qdma_virt_queue *qdma_vq = &qdma_vqs[e_context->vq_id];
-	struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue;
-	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev;
-	int ret;
+	struct qdma_hw_queue *queue = NULL;
 
-	/* Return error in case of wrong lcore_id */
-	if (rte_lcore_id() != qdma_vq->lcore_id) {
-		DPAA2_QDMA_ERR("QDMA enqueue for vqid %d on wrong core",
-				e_context->vq_id);
-		return -EINVAL;
-	}
+	DPAA2_QDMA_FUNC_TRACE();
 
-	ret = dpdmai_dev_enqueue_multi(dpdmai_dev,
-				 qdma_pq->queue_id,
-				 e_context->vq_id,
-				 &qdma_vq->rbp,
-				 e_context->job,
-				 nb_jobs);
-	if (ret < 0) {
-		DPAA2_QDMA_ERR("DPDMAI device enqueue failed: %d", ret);
-		return ret;
+	/* Get a free queue from the list */
+	TAILQ_FOREACH(queue, &qdma_queue_list, next) {
+		if (queue->num_users == 0) {
+			queue->lcore_id = lcore_id;
+			queue->num_users++;
+			break;
+		}
 	}
 
-	qdma_vq->num_enqueues += ret;
+	return queue;
+}
 
-	return ret;
+static void
+free_hw_queue(struct qdma_hw_queue *queue)
+{
+	DPAA2_QDMA_FUNC_TRACE();
+
+	queue->num_users--;
 }
 
-/* Function to receive a QDMA job for a given device and queue*/
-static int
-dpdmai_dev_dequeue_multijob_prefetch(
-			struct dpaa2_dpdmai_dev *dpdmai_dev,
-			uint16_t rxq_id,
-			uint16_t *vq_id,
-			struct rte_qdma_job **job,
-			uint16_t nb_jobs)
+
+static struct qdma_hw_queue *
+get_hw_queue(struct qdma_device *qdma_dev, uint32_t lcore_id)
 {
-	struct dpaa2_queue *rxq;
-	struct qbman_result *dq_storage, *dq_storage1 = NULL;
-	struct qbman_pull_desc pulldesc;
-	struct qbman_swp *swp;
-	struct queue_storage_info_t *q_storage;
-	uint32_t fqid;
-	uint8_t status, pending;
-	uint8_t num_rx = 0;
-	const struct qbman_fd *fd;
-	uint16_t vqid;
-	int ret, pull_size;
+	struct qdma_per_core_info *core_info;
+	struct qdma_hw_queue *queue, *temp;
+	uint32_t least_num_users;
+	int num_hw_queues, i;
 
-	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
-		ret = dpaa2_affine_qbman_swp();
-		if (ret) {
-			DPAA2_QDMA_ERR(
-				"Failed to allocate IO portal, tid: %d\n",
-				rte_gettid());
-			return 0;
+	DPAA2_QDMA_FUNC_TRACE();
+
+	core_info = &qdma_core_info[lcore_id];
+	num_hw_queues = core_info->num_hw_queues;
+
+	/*
+	 * Allocate a HW queue if there are less queues
+	 * than maximum per core queues configured
+	 */
+	if (num_hw_queues < qdma_dev->max_hw_queues_per_core) {
+		queue = alloc_hw_queue(lcore_id);
+		if (queue) {
+			core_info->hw_queues[num_hw_queues] = queue;
+			core_info->num_hw_queues++;
+			return queue;
 		}
 	}
-	swp = DPAA2_PER_LCORE_PORTAL;
 
-	pull_size = (nb_jobs > dpaa2_dqrr_size) ? dpaa2_dqrr_size : nb_jobs;
-	rxq = &(dpdmai_dev->rx_queue[rxq_id]);
-	fqid = rxq->fqid;
-	q_storage = rxq->q_storage;
+	queue = core_info->hw_queues[0];
+	/* In case there is no queue associated with the core return NULL */
+	if (!queue)
+		return NULL;
 
-	if (unlikely(!q_storage->active_dqs)) {
-		q_storage->toggle = 0;
-		dq_storage = q_storage->dq_storage[q_storage->toggle];
-		q_storage->last_num_pkts = pull_size;
-		qbman_pull_desc_clear(&pulldesc);
-		qbman_pull_desc_set_numframes(&pulldesc,
-					      q_storage->last_num_pkts);
-		qbman_pull_desc_set_fq(&pulldesc, fqid);
-		qbman_pull_desc_set_storage(&pulldesc, dq_storage,
-				(size_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
-		if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
-			while (!qbman_check_command_complete(
-			       get_swp_active_dqs(
-			       DPAA2_PER_LCORE_DPIO->index)))
-				;
-			clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
+	/* Fetch the least loaded H/W queue */
+	least_num_users = core_info->hw_queues[0]->num_users;
+	for (i = 0; i < num_hw_queues; i++) {
+		temp = core_info->hw_queues[i];
+		if (temp->num_users < least_num_users)
+			queue = temp;
+	}
+
+	if (queue)
+		queue->num_users++;
+
+	return queue;
+}
+
+static void
+put_hw_queue(struct qdma_hw_queue *queue)
+{
+	struct qdma_per_core_info *core_info;
+	int lcore_id, num_hw_queues, i;
+
+	DPAA2_QDMA_FUNC_TRACE();
+
+	/*
+	 * If this is the last user of the queue free it.
+	 * Also remove it from QDMA core info.
+	 */
+	if (queue->num_users == 1) {
+		free_hw_queue(queue);
+
+		/* Remove the physical queue from core info */
+		lcore_id = queue->lcore_id;
+		core_info = &qdma_core_info[lcore_id];
+		num_hw_queues = core_info->num_hw_queues;
+		for (i = 0; i < num_hw_queues; i++) {
+			if (queue == core_info->hw_queues[i])
+				break;
 		}
-		while (1) {
-			if (qbman_swp_pull(swp, &pulldesc)) {
-				DPAA2_QDMA_DP_WARN(
-					"VDQ command not issued.QBMAN busy\n");
-					/* Portal was busy, try again */
-				continue;
-			}
-			break;
+		for (; i < num_hw_queues - 1; i++)
+			core_info->hw_queues[i] = core_info->hw_queues[i + 1];
+		core_info->hw_queues[i] = NULL;
+	} else {
+		queue->num_users--;
+	}
+}
+
+static int
+dpaa2_qdma_attr_get(struct rte_rawdev *rawdev,
+		    __rte_unused const char *attr_name,
+		    uint64_t *attr_value)
+{
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct rte_qdma_attr *qdma_attr = (struct rte_qdma_attr *)attr_value;
+
+	DPAA2_QDMA_FUNC_TRACE();
+
+	qdma_attr->num_hw_queues = qdma_dev->num_hw_queues;
+
+	return 0;
+}
+
+static int
+dpaa2_qdma_reset(struct rte_rawdev *rawdev)
+{
+	struct qdma_hw_queue *queue;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	int i;
+
+	DPAA2_QDMA_FUNC_TRACE();
+
+	/* In case QDMA device is not in stopped state, return -EBUSY */
+	if (qdma_dev->state == 1) {
+		DPAA2_QDMA_ERR(
+			"Device is in running state. Stop before reset.");
+		return -EBUSY;
+	}
+
+	/* In case there are pending jobs on any VQ, return -EBUSY */
+	for (i = 0; i < qdma_dev->max_vqs; i++) {
+		if (qdma_dev->vqs[i].in_use && (qdma_dev->vqs[i].num_enqueues !=
+		    qdma_dev->vqs[i].num_dequeues)) {
+			DPAA2_QDMA_ERR("Jobs are still pending on VQ: %d", i);
+			return -EBUSY;
 		}
-		q_storage->active_dqs = dq_storage;
-		q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
-		set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index,
-				   dq_storage);
 	}
 
-	dq_storage = q_storage->active_dqs;
-	rte_prefetch0((void *)(size_t)(dq_storage));
-	rte_prefetch0((void *)(size_t)(dq_storage + 1));
+	/* Reset HW queues */
+	TAILQ_FOREACH(queue, &qdma_queue_list, next)
+		queue->num_users = 0;
 
-	/* Prepare next pull descriptor. This will give space for the
-	 * prefething done on DQRR entries
-	 */
-	q_storage->toggle ^= 1;
-	dq_storage1 = q_storage->dq_storage[q_storage->toggle];
-	qbman_pull_desc_clear(&pulldesc);
-	qbman_pull_desc_set_numframes(&pulldesc, pull_size);
-	qbman_pull_desc_set_fq(&pulldesc, fqid);
-	qbman_pull_desc_set_storage(&pulldesc, dq_storage1,
-		(size_t)(DPAA2_VADDR_TO_IOVA(dq_storage1)), 1);
+	/* Reset and free virtual queues */
+	for (i = 0; i < qdma_dev->max_vqs; i++) {
+		if (qdma_dev->vqs[i].status_ring)
+			rte_ring_free(qdma_dev->vqs[i].status_ring);
+	}
+	if (qdma_dev->vqs)
+		rte_free(qdma_dev->vqs);
+	qdma_dev->vqs = NULL;
 
-	/* Check if the previous issued command is completed.
-	 * Also seems like the SWP is shared between the Ethernet Driver
-	 * and the SEC driver.
+	/* Reset per core info */
+	memset(&qdma_core_info, 0,
+		sizeof(struct qdma_per_core_info) * RTE_MAX_LCORE);
+
+	/* Free the FLE pool */
+	if (qdma_dev->fle_pool)
+		rte_mempool_free(qdma_dev->fle_pool);
+
+	/* Reset QDMA device structure */
+	qdma_dev->max_hw_queues_per_core = 0;
+	qdma_dev->fle_pool = NULL;
+	qdma_dev->fle_pool_count = 0;
+	qdma_dev->max_vqs = 0;
+
+	return 0;
+}
+
+static int
+dpaa2_qdma_configure(const struct rte_rawdev *rawdev,
+			 rte_rawdev_obj_t config,
+			 size_t config_size)
+{
+	char name[32]; /* RTE_MEMZONE_NAMESIZE = 32 */
+	struct rte_qdma_config *qdma_config = (struct rte_qdma_config *)config;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+
+	DPAA2_QDMA_FUNC_TRACE();
+
+	if (config_size != sizeof(*qdma_config))
+		return -EINVAL;
+
+	/* In case QDMA device is not in stopped state, return -EBUSY */
+	if (qdma_dev->state == 1) {
+		DPAA2_QDMA_ERR(
+			"Device is in running state. Stop before config.");
+		return -1;
+	}
+
+	/* Set max HW queue per core */
+	if (qdma_config->max_hw_queues_per_core > MAX_HW_QUEUE_PER_CORE) {
+		DPAA2_QDMA_ERR("H/W queues per core is more than: %d",
+			       MAX_HW_QUEUE_PER_CORE);
+		return -EINVAL;
+	}
+	qdma_dev->max_hw_queues_per_core =
+		qdma_config->max_hw_queues_per_core;
+
+	/* Allocate Virtual Queues */
+	sprintf(name, "qdma_%d_vq", rawdev->dev_id);
+	qdma_dev->vqs = rte_malloc(name,
+			(sizeof(struct qdma_virt_queue) * qdma_config->max_vqs),
+			RTE_CACHE_LINE_SIZE);
+	if (!qdma_dev->vqs) {
+		DPAA2_QDMA_ERR("qdma_virtual_queues allocation failed");
+		return -ENOMEM;
+	}
+	qdma_dev->max_vqs = qdma_config->max_vqs;
+
+	/* Allocate FLE pool; just append PID so that in case of
+	 * multiprocess, the pool's don't collide.
 	 */
-	while (!qbman_check_command_complete(dq_storage))
-		;
-	if (dq_storage == get_swp_active_dqs(q_storage->active_dpio_id))
-		clear_swp_active_dqs(q_storage->active_dpio_id);
+	snprintf(name, sizeof(name), "qdma_fle_pool%u",
+		 getpid());
+	qdma_dev->fle_pool = rte_mempool_create(name,
+			qdma_config->fle_pool_count, QDMA_FLE_POOL_SIZE,
+			QDMA_FLE_CACHE_SIZE(qdma_config->fle_pool_count), 0,
+			NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
+	if (!qdma_dev->fle_pool) {
+		DPAA2_QDMA_ERR("qdma_fle_pool create failed");
+		rte_free(qdma_dev->vqs);
+		qdma_dev->vqs = NULL;
+		return -ENOMEM;
+	}
+	qdma_dev->fle_pool_count = qdma_config->fle_pool_count;
 
-	pending = 1;
+	return 0;
+}
 
-	do {
-		/* Loop until the dq_storage is updated with
-		 * new token by QBMAN
-		 */
-		while (!qbman_check_new_result(dq_storage))
-			;
-		rte_prefetch0((void *)((size_t)(dq_storage + 2)));
-		/* Check whether Last Pull command is Expired and
-		 * setting Condition for Loop termination
-		 */
-		if (qbman_result_DQ_is_pull_complete(dq_storage)) {
-			pending = 0;
-			/* Check for valid frame. */
-			status = qbman_result_DQ_flags(dq_storage);
-			if (unlikely((status & QBMAN_DQ_STAT_VALIDFRAME) == 0))
-				continue;
-		}
-		fd = qbman_result_DQ_fd(dq_storage);
+static int
+dpaa2_qdma_start(struct rte_rawdev *rawdev)
+{
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+
+	DPAA2_QDMA_FUNC_TRACE();
+
+	qdma_dev->state = 1;
 
-		vqid = dpdmai_dev_get_job(dpdmai_dev->qdma_dev, fd,
-					  &job[num_rx]);
-		if (vq_id)
-			vq_id[num_rx] = vqid;
+	return 0;
+}
 
-		dq_storage++;
-		num_rx++;
-	} while (pending);
+static int
+check_devargs_handler(__rte_unused const char *key, const char *value,
+		      __rte_unused void *opaque)
+{
+	if (strcmp(value, "1"))
+		return -1;
 
-	if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
-		while (!qbman_check_command_complete(
-		       get_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)))
-			;
-		clear_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index);
-	}
-	/* issue a volatile dequeue command for next pull */
-	while (1) {
-		if (qbman_swp_pull(swp, &pulldesc)) {
-			DPAA2_QDMA_DP_WARN("VDQ command is not issued."
-					  "QBMAN is busy (2)\n");
-			continue;
-		}
-		break;
+	return 0;
+}
+
+static int
+dpaa2_get_devargs(struct rte_devargs *devargs, const char *key)
+{
+	struct rte_kvargs *kvlist;
+
+	if (!devargs)
+		return 0;
+
+	kvlist = rte_kvargs_parse(devargs->args, NULL);
+	if (!kvlist)
+		return 0;
+
+	if (!rte_kvargs_count(kvlist, key)) {
+		rte_kvargs_free(kvlist);
+		return 0;
 	}
 
-	q_storage->active_dqs = dq_storage1;
-	q_storage->active_dpio_id = DPAA2_PER_LCORE_DPIO->index;
-	set_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index, dq_storage1);
+	if (rte_kvargs_process(kvlist, key,
+			       check_devargs_handler, NULL) < 0) {
+		rte_kvargs_free(kvlist);
+		return 0;
+	}
+	rte_kvargs_free(kvlist);
 
-	return num_rx;
+	return 1;
 }
 
 static int
-dpdmai_dev_dequeue_multijob_no_prefetch(
-		struct dpaa2_dpdmai_dev *dpdmai_dev,
-		uint16_t rxq_id,
-		uint16_t *vq_id,
-		struct rte_qdma_job **job,
-		uint16_t nb_jobs)
+dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
+			  __rte_unused uint16_t queue_id,
+			  rte_rawdev_obj_t queue_conf,
+			  size_t conf_size)
 {
-	struct dpaa2_queue *rxq;
-	struct qbman_result *dq_storage;
-	struct qbman_pull_desc pulldesc;
-	struct qbman_swp *swp;
-	uint32_t fqid;
-	uint8_t status, pending;
-	uint8_t num_rx = 0;
-	const struct qbman_fd *fd;
-	uint16_t vqid;
-	int ret, next_pull = nb_jobs, num_pulled = 0;
+	char ring_name[32];
+	int i;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct rte_qdma_queue_config *q_config =
+		(struct rte_qdma_queue_config *)queue_conf;
 
-	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
-		ret = dpaa2_affine_qbman_swp();
-		if (ret) {
-			DPAA2_QDMA_ERR(
-				"Failed to allocate IO portal, tid: %d\n",
-				rte_gettid());
-			return 0;
+	DPAA2_QDMA_FUNC_TRACE();
+
+	if (conf_size != sizeof(*q_config))
+		return -EINVAL;
+
+	rte_spinlock_lock(&qdma_dev->lock);
+
+	/* Get a free Virtual Queue */
+	for (i = 0; i < qdma_dev->max_vqs; i++) {
+		if (qdma_dev->vqs[i].in_use == 0)
+			break;
+	}
+
+	/* Return in case no VQ is free */
+	if (i == qdma_dev->max_vqs) {
+		rte_spinlock_unlock(&qdma_dev->lock);
+		DPAA2_QDMA_ERR("Unable to get lock on QDMA device");
+		return -ENODEV;
+	}
+
+	if (q_config->flags & RTE_QDMA_VQ_EXCLUSIVE_PQ) {
+		/* Allocate HW queue for a VQ */
+		qdma_dev->vqs[i].hw_queue = alloc_hw_queue(q_config->lcore_id);
+		qdma_dev->vqs[i].exclusive_hw_queue = 1;
+	} else {
+		/* Allocate a Ring for Virtual Queue in VQ mode */
+		snprintf(ring_name, sizeof(ring_name), "status ring %d", i);
+		qdma_dev->vqs[i].status_ring = rte_ring_create(ring_name,
+			qdma_dev->fle_pool_count, rte_socket_id(), 0);
+		if (!qdma_dev->vqs[i].status_ring) {
+			DPAA2_QDMA_ERR("Status ring creation failed for vq");
+			rte_spinlock_unlock(&qdma_dev->lock);
+			return rte_errno;
 		}
+
+		/* Get a HW queue (shared) for a VQ */
+		qdma_dev->vqs[i].hw_queue = get_hw_queue(qdma_dev,
+						    q_config->lcore_id);
+		qdma_dev->vqs[i].exclusive_hw_queue = 0;
 	}
-	swp = DPAA2_PER_LCORE_PORTAL;
 
-	rxq = &(dpdmai_dev->rx_queue[rxq_id]);
-	fqid = rxq->fqid;
+	if (qdma_dev->vqs[i].hw_queue == NULL) {
+		DPAA2_QDMA_ERR("No H/W queue available for VQ");
+		if (qdma_dev->vqs[i].status_ring)
+			rte_ring_free(qdma_dev->vqs[i].status_ring);
+		qdma_dev->vqs[i].status_ring = NULL;
+		rte_spinlock_unlock(&qdma_dev->lock);
+		return -ENODEV;
+	}
 
-	do {
-		dq_storage = rxq->q_storage->dq_storage[0];
-		/* Prepare dequeue descriptor */
-		qbman_pull_desc_clear(&pulldesc);
-		qbman_pull_desc_set_fq(&pulldesc, fqid);
-		qbman_pull_desc_set_storage(&pulldesc, dq_storage,
-			(uint64_t)(DPAA2_VADDR_TO_IOVA(dq_storage)), 1);
+	qdma_dev->vqs[i].in_use = 1;
+	qdma_dev->vqs[i].lcore_id = q_config->lcore_id;
+	memset(&qdma_dev->vqs[i].rbp, 0, sizeof(struct rte_qdma_rbp));
 
-		if (next_pull > dpaa2_dqrr_size) {
-			qbman_pull_desc_set_numframes(&pulldesc,
-					dpaa2_dqrr_size);
-			next_pull -= dpaa2_dqrr_size;
-		} else {
-			qbman_pull_desc_set_numframes(&pulldesc, next_pull);
-			next_pull = 0;
-		}
+	if (q_config->flags & RTE_QDMA_VQ_FD_LONG_FORMAT) {
+		qdma_dev->vqs[i].set_fd = dpdmai_dev_set_fd_lf;
+		qdma_dev->vqs[i].get_job = dpdmai_dev_get_job_lf;
+	} else {
+		qdma_dev->vqs[i].set_fd = dpdmai_dev_set_fd_us;
+		qdma_dev->vqs[i].get_job = dpdmai_dev_get_job_us;
+	}
+	if (dpaa2_get_devargs(rawdev->device->devargs,
+			DPAA2_QDMA_NO_PREFETCH) ||
+			(getenv("DPAA2_NO_QDMA_PREFETCH_RX"))) {
+		/* If no prefetch is configured. */
+		qdma_dev->vqs[i].dequeue_job =
+				dpdmai_dev_dequeue_multijob_no_prefetch;
+		DPAA2_QDMA_INFO("No Prefetch RX Mode enabled");
+	} else {
+		qdma_dev->vqs[i].dequeue_job =
+			dpdmai_dev_dequeue_multijob_prefetch;
+	}
 
-		while (1) {
-			if (qbman_swp_pull(swp, &pulldesc)) {
-				DPAA2_QDMA_DP_WARN("VDQ command not issued. QBMAN busy");
-				/* Portal was busy, try again */
-				continue;
-			}
-			break;
-		}
+	qdma_dev->vqs[i].enqueue_job = dpdmai_dev_enqueue_multi;
 
-		rte_prefetch0((void *)((size_t)(dq_storage + 1)));
-		/* Check if the previous issued command is completed. */
-		while (!qbman_check_command_complete(dq_storage))
-			;
+	if (q_config->rbp != NULL)
+		memcpy(&qdma_dev->vqs[i].rbp, q_config->rbp,
+				sizeof(struct rte_qdma_rbp));
 
-		num_pulled = 0;
-		pending = 1;
+	rte_spinlock_unlock(&qdma_dev->lock);
 
-		do {
-			/* Loop until dq_storage is updated
-			 * with new token by QBMAN
-			 */
-			while (!qbman_check_new_result(dq_storage))
-				;
-			rte_prefetch0((void *)((size_t)(dq_storage + 2)));
+	return i;
+}
 
-			if (qbman_result_DQ_is_pull_complete(dq_storage)) {
-				pending = 0;
-				/* Check for valid frame. */
-				status = qbman_result_DQ_flags(dq_storage);
-				if (unlikely((status &
-					QBMAN_DQ_STAT_VALIDFRAME) == 0))
-					continue;
-			}
-			fd = qbman_result_DQ_fd(dq_storage);
+static int
+dpaa2_qdma_enqueue(struct rte_rawdev *rawdev,
+		  __rte_unused struct rte_rawdev_buf **buffers,
+		  unsigned int nb_jobs,
+		  rte_rawdev_obj_t context)
+{
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct rte_qdma_enqdeq *e_context =
+		(struct rte_qdma_enqdeq *)context;
+	struct qdma_virt_queue *qdma_vq =
+		&dpdmai_dev->qdma_dev->vqs[e_context->vq_id];
+	int ret;
 
-			vqid = dpdmai_dev_get_job(dpdmai_dev->qdma_dev, fd,
-						  &job[num_rx]);
-			if (vq_id)
-				vq_id[num_rx] = vqid;
+	/* Return error in case of wrong lcore_id */
+	if (rte_lcore_id() != qdma_vq->lcore_id) {
+		DPAA2_QDMA_ERR("QDMA enqueue for vqid %d on wrong core",
+				e_context->vq_id);
+		return -EINVAL;
+	}
 
-			dq_storage++;
-			num_rx++;
-			num_pulled++;
+	ret = qdma_vq->enqueue_job(qdma_vq, e_context->job, nb_jobs);
+	if (ret < 0) {
+		DPAA2_QDMA_ERR("DPDMAI device enqueue failed: %d", ret);
+		return ret;
+	}
 
-		} while (pending);
-	/* Last VDQ provided all packets and more packets are requested */
-	} while (next_pull && num_pulled == dpaa2_dqrr_size);
+	qdma_vq->num_enqueues += ret;
 
-	return num_rx;
+	return ret;
 }
 
 static int
-dpaa2_qdma_dequeue(__rte_unused struct rte_rawdev *rawdev,
+dpaa2_qdma_dequeue(struct rte_rawdev *rawdev,
 		   __rte_unused struct rte_rawdev_buf **buffers,
 		   unsigned int nb_jobs,
 		   rte_rawdev_obj_t cntxt)
 {
-	struct rte_qdma_enqdeq *context = (struct rte_qdma_enqdeq *)cntxt;
-	struct qdma_virt_queue *qdma_vq = &qdma_vqs[context->vq_id];
-	struct qdma_hw_queue *qdma_pq = qdma_vq->hw_queue;
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct rte_qdma_enqdeq *context =
+		(struct rte_qdma_enqdeq *)cntxt;
+	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[context->vq_id];
 	struct qdma_virt_queue *temp_qdma_vq;
-	struct dpaa2_dpdmai_dev *dpdmai_dev = qdma_pq->dpdmai_dev;
 	int ret = 0, i;
 	unsigned int ring_count;
 
@@ -1044,12 +1082,12 @@ dpaa2_qdma_dequeue(__rte_unused struct rte_rawdev *rawdev,
 		return 0;
 
 	if (qdma_vq->num_enqueues < (qdma_vq->num_dequeues + nb_jobs))
-		nb_jobs = (qdma_vq->num_enqueues -  qdma_vq->num_dequeues);
+		nb_jobs = (qdma_vq->num_enqueues - qdma_vq->num_dequeues);
 
 	if (qdma_vq->exclusive_hw_queue) {
 		/* In case of exclusive queue directly fetch from HW queue */
-		ret = dpdmai_dev_dequeue_multijob(dpdmai_dev, qdma_pq->queue_id,
-					 NULL, context->job, nb_jobs);
+		ret = qdma_vq->dequeue_job(qdma_vq, NULL,
+					context->job, nb_jobs);
 		if (ret < 0) {
 			DPAA2_QDMA_ERR(
 				"Dequeue from DPDMAI device failed: %d", ret);
@@ -1066,11 +1104,10 @@ dpaa2_qdma_dequeue(__rte_unused struct rte_rawdev *rawdev,
 		ring_count = rte_ring_count(qdma_vq->status_ring);
 		if (ring_count < nb_jobs) {
 			/* TODO - How to have right budget */
-			ret = dpdmai_dev_dequeue_multijob(dpdmai_dev,
-					qdma_pq->queue_id,
+			ret = qdma_vq->dequeue_job(qdma_vq,
 					temp_vq_id, context->job, nb_jobs);
 			for (i = 0; i < ret; i++) {
-				temp_qdma_vq = &qdma_vqs[temp_vq_id[i]];
+				temp_qdma_vq = &qdma_dev->vqs[temp_vq_id[i]];
 				rte_ring_enqueue(temp_qdma_vq->status_ring,
 					(void *)(context->job[i]));
 			}
@@ -1094,10 +1131,13 @@ dpaa2_qdma_dequeue(__rte_unused struct rte_rawdev *rawdev,
 }
 
 void
-rte_qdma_vq_stats(uint16_t vq_id,
-		  struct rte_qdma_vq_stats *vq_status)
+rte_qdma_vq_stats(struct rte_rawdev *rawdev,
+		uint16_t vq_id,
+		struct rte_qdma_vq_stats *vq_status)
 {
-	struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id];
+	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
+	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
+	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vq_id];
 
 	if (qdma_vq->in_use) {
 		vq_status->exclusive_hw_queue = qdma_vq->exclusive_hw_queue;
@@ -1116,7 +1156,7 @@ dpaa2_qdma_queue_release(struct rte_rawdev *rawdev,
 	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
 	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
 
-	struct qdma_virt_queue *qdma_vq = &qdma_vqs[vq_id];
+	struct qdma_virt_queue *qdma_vq = &qdma_dev->vqs[vq_id];
 
 	DPAA2_QDMA_FUNC_TRACE();
 
@@ -1129,8 +1169,8 @@ dpaa2_qdma_queue_release(struct rte_rawdev *rawdev,
 	if (qdma_vq->exclusive_hw_queue)
 		free_hw_queue(qdma_vq->hw_queue);
 	else {
-		if (qdma_vqs->status_ring)
-			rte_ring_free(qdma_vqs->status_ring);
+		if (qdma_vq->status_ring)
+			rte_ring_free(qdma_vq->status_ring);
 
 		put_hw_queue(qdma_vq->hw_queue);
 	}
@@ -1253,43 +1293,6 @@ dpaa2_dpdmai_dev_uninit(struct rte_rawdev *rawdev)
 	return 0;
 }
 
-static int
-check_devargs_handler(__rte_unused const char *key, const char *value,
-		      __rte_unused void *opaque)
-{
-	if (strcmp(value, "1"))
-		return -1;
-
-	return 0;
-}
-
-static int
-dpaa2_get_devargs(struct rte_devargs *devargs, const char *key)
-{
-	struct rte_kvargs *kvlist;
-
-	if (!devargs)
-		return 0;
-
-	kvlist = rte_kvargs_parse(devargs->args, NULL);
-	if (!kvlist)
-		return 0;
-
-	if (!rte_kvargs_count(kvlist, key)) {
-		rte_kvargs_free(kvlist);
-		return 0;
-	}
-
-	if (rte_kvargs_process(kvlist, key,
-			       check_devargs_handler, NULL) < 0) {
-		rte_kvargs_free(kvlist);
-		return 0;
-	}
-	rte_kvargs_free(kvlist);
-
-	return 1;
-}
-
 static int
 dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id)
 {
@@ -1393,17 +1396,6 @@ dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id)
 		goto init_err;
 	}
 
-	if (dpaa2_get_devargs(rawdev->device->devargs,
-		DPAA2_QDMA_NO_PREFETCH)) {
-		/* If no prefetch is configured. */
-		dpdmai_dev_dequeue_multijob =
-				dpdmai_dev_dequeue_multijob_no_prefetch;
-		DPAA2_QDMA_INFO("No Prefetch RX Mode enabled");
-	} else {
-		dpdmai_dev_dequeue_multijob =
-			dpdmai_dev_dequeue_multijob_prefetch;
-	}
-
 	if (!dpaa2_coherent_no_alloc_cache) {
 		if (dpaa2_svr_family == SVR_LX2160A) {
 			dpaa2_coherent_no_alloc_cache =
diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
index 3c112d28f..4265ee828 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
@@ -52,10 +52,11 @@ struct qdma_device {
 	 * This is limited by MAX_HW_QUEUE_PER_CORE
 	 */
 	uint16_t max_hw_queues_per_core;
+
+	/** VQ's of this device */
+	struct qdma_virt_queue *vqs;
 	/** Maximum number of VQ's */
 	uint16_t max_vqs;
-	/** mode of operation - physical(h/w) or virtual */
-	uint8_t mode;
 	/** Device state - started or stopped */
 	uint8_t state;
 	/** FLE pool for the device */
@@ -80,6 +81,26 @@ struct qdma_hw_queue {
 	uint32_t num_users;
 };
 
+struct qdma_virt_queue;
+
+typedef uint16_t (qdma_get_job_t)(struct qdma_virt_queue *qdma_vq,
+					const struct qbman_fd *fd,
+					struct rte_qdma_job **job);
+typedef int (qdma_set_fd_t)(struct qdma_virt_queue *qdma_vq,
+					struct qbman_fd *fd,
+					struct rte_qdma_job *job);
+
+typedef int (qdma_dequeue_multijob_t)(
+				struct qdma_virt_queue *qdma_vq,
+				uint16_t *vq_id,
+				struct rte_qdma_job **job,
+				uint16_t nb_jobs);
+
+typedef int (qdma_enqueue_multijob_t)(
+			struct qdma_virt_queue *qdma_vq,
+			struct rte_qdma_job **job,
+			uint16_t nb_jobs);
+
 /** Represents a QDMA virtual queue */
 struct qdma_virt_queue {
 	/** Status ring of the virtual queue */
@@ -98,6 +119,14 @@ struct qdma_virt_queue {
 	uint64_t num_enqueues;
 	/* Total number of dequeues from this VQ */
 	uint64_t num_dequeues;
+
+	uint16_t vq_id;
+
+	qdma_set_fd_t *set_fd;
+	qdma_get_job_t *get_job;
+
+	qdma_dequeue_multijob_t *dequeue_job;
+	qdma_enqueue_multijob_t *enqueue_job;
 };
 
 /** Represents a QDMA per core hw queues allocation in virtual mode */
@@ -176,4 +205,10 @@ struct dpaa2_dpdmai_dev {
 	struct qdma_device *qdma_dev;
 };
 
+static inline struct qdma_device *
+QDMA_DEV_OF_VQ(struct qdma_virt_queue *vq)
+{
+	return vq->hw_queue->dpdmai_dev->qdma_dev;
+}
+
 #endif /* __DPAA2_QDMA_H__ */
diff --git a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
index 71894d35e..ff4fc1d6c 100644
--- a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
@@ -50,6 +50,8 @@ enum {
  */
 #define RTE_QDMA_VQ_EXCLUSIVE_PQ	(1ULL)
 
+#define RTE_QDMA_VQ_FD_LONG_FORMAT		(1ULL << 1)
+
 /** States if the source addresses is physical. */
 #define RTE_QDMA_JOB_SRC_PHY		(1ULL)
 
@@ -68,10 +70,6 @@ struct rte_qdma_config {
 	uint16_t max_hw_queues_per_core;
 	/** Maximum number of VQ's to be used. */
 	uint16_t max_vqs;
-	/** mode of operation - physical(h/w) or virtual */
-	uint8_t mode;
-	/** FD format */
-	uint8_t format;
 	/**
 	 * User provides this as input to the driver as a size of the FLE pool.
 	 * FLE's (and corresponding source/destination descriptors) are
@@ -182,13 +180,16 @@ struct rte_qdma_queue_config {
 /**
  * Get a Virtual Queue statistics.
  *
+ * @param rawdev
+ *   Raw Device.
  * @param vq_id
  *   Virtual Queue ID.
  * @param vq_stats
  *   VQ statistics structure which will be filled in by the driver.
  */
 void
-rte_qdma_vq_stats(uint16_t vq_id,
-		  struct rte_qdma_vq_stats *vq_stats);
+rte_qdma_vq_stats(struct rte_rawdev *rawdev,
+		uint16_t vq_id,
+		struct rte_qdma_vq_stats *vq_stats);
 
 #endif /* __RTE_PMD_DPAA2_QDMA_H__*/
-- 
2.17.1


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH v2 4/7] raw/dpaa2_qdma: optimize IOVA conversion
  2020-10-15  9:47 ` [dpdk-dev] [PATCH v2 " Gagandeep Singh
                     ` (2 preceding siblings ...)
  2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 3/7] raw/dpaa2_qdma: refactor the code Gagandeep Singh
@ 2020-10-15  9:47   ` Gagandeep Singh
  2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 5/7] raw/dpaa2_qdma: support scatter gather in enqueue Gagandeep Singh
                     ` (2 subsequent siblings)
  6 siblings, 0 replies; 23+ messages in thread
From: Gagandeep Singh @ 2020-10-15  9:47 UTC (permalink / raw)
  To: dev, nipun.gupta, hemant.agrawal; +Cc: thomas, Jun Yang

From: Jun Yang <jun.yang@nxp.com>

rte_mempool_virt2iova is now used for converting with IOVA off.

Signed-off-by: Jun Yang <jun.yang@nxp.com>
---
 drivers/raw/dpaa2_qdma/dpaa2_qdma.c | 29 +++++++++++++++++++++++------
 drivers/raw/dpaa2_qdma/dpaa2_qdma.h |  5 +++++
 2 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
index 6eef7a57a..94dc7886a 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
@@ -221,7 +221,9 @@ static inline int dpdmai_dev_set_fd_lf(
 {
 	struct rte_qdma_rbp *rbp = &qdma_vq->rbp;
 	struct rte_qdma_job **ppjob;
+	void *elem;
 	struct qbman_fle *fle;
+	uint64_t elem_iova, fle_iova;
 	int ret = 0;
 	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
 
@@ -229,19 +231,29 @@ static inline int dpdmai_dev_set_fd_lf(
 	 * Get an FLE/SDD from FLE pool.
 	 * Note: IO metadata is before the FLE and SDD memory.
 	 */
-	ret = rte_mempool_get(qdma_dev->fle_pool, (void **)(&ppjob));
+	ret = rte_mempool_get(qdma_dev->fle_pool, (void **)(&elem));
 	if (ret) {
 		DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
 		return ret;
 	}
 
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+	elem_iova = rte_mempool_virt2iova(elem);
+#else
+	elem_iova = DPAA2_VADDR_TO_IOVA(elem);
+#endif
+
 	/* Set the metadata */
 	job->vq_id = qdma_vq->vq_id;
+	ppjob = (struct rte_qdma_job **)
+		((uintptr_t)(uint64_t)elem + QDMA_FLE_JOB_OFFSET);
 	*ppjob = job;
 
-	fle = (struct qbman_fle *)(ppjob + 1);
+	fle = (struct qbman_fle *)
+		((uintptr_t)(uint64_t)elem + QDMA_FLE_FLE_OFFSET);
+	fle_iova = elem_iova + QDMA_FLE_FLE_OFFSET;
 
-	DPAA2_SET_FD_ADDR(fd, DPAA2_VADDR_TO_IOVA(fle));
+	DPAA2_SET_FD_ADDR(fd, fle_iova);
 	DPAA2_SET_FD_COMPOUND_FMT(fd);
 	DPAA2_SET_FD_FRC(fd, QDMA_SER_CTX);
 
@@ -283,6 +295,8 @@ static inline uint16_t dpdmai_dev_get_job_lf(
 						const struct qbman_fd *fd,
 						struct rte_qdma_job **job)
 {
+	void *elem;
+	struct qbman_fle *fle;
 	struct rte_qdma_job **ppjob;
 	uint16_t vqid;
 	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
@@ -291,9 +305,12 @@ static inline uint16_t dpdmai_dev_get_job_lf(
 	 * Fetch metadata from FLE. job and vq_id were set
 	 * in metadata in the enqueue operation.
 	 */
-	ppjob = (struct rte_qdma_job **)
+	fle = (struct qbman_fle *)
 			DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd));
-	ppjob -= 1;
+	elem = (void *)((uintptr_t)(uint64_t)fle - QDMA_FLE_FLE_OFFSET);
+
+	ppjob = (struct rte_qdma_job **)
+		((uintptr_t)(uint64_t)elem + QDMA_FLE_JOB_OFFSET);
 
 	*job = (struct rte_qdma_job *)*ppjob;
 	(*job)->status = (DPAA2_GET_FD_ERR(fd) << 8) |
@@ -301,7 +318,7 @@ static inline uint16_t dpdmai_dev_get_job_lf(
 	vqid = (*job)->vq_id;
 
 	/* Free FLE to the pool */
-	rte_mempool_put(qdma_dev->fle_pool, (void *)ppjob);
+	rte_mempool_put(qdma_dev->fle_pool, elem);
 
 	return vqid;
 }
diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
index 4265ee828..ff7743fb5 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
@@ -17,6 +17,11 @@ struct rte_qdma_job;
 #define QDMA_FLE_POOL_SIZE (sizeof(struct rte_qdma_job *) + \
 		sizeof(struct qbman_fle) * DPAA2_QDMA_MAX_FLE + \
 		sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD)
+
+#define QDMA_FLE_JOB_OFFSET 0
+#define QDMA_FLE_FLE_OFFSET \
+		(QDMA_FLE_JOB_OFFSET + sizeof(struct rte_qdma_job *))
+
 /** FLE pool cache size */
 #define QDMA_FLE_CACHE_SIZE(_num) (_num/(RTE_MAX_LCORE * 2))
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH v2 5/7] raw/dpaa2_qdma: support scatter gather in enqueue
  2020-10-15  9:47 ` [dpdk-dev] [PATCH v2 " Gagandeep Singh
                     ` (3 preceding siblings ...)
  2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 4/7] raw/dpaa2_qdma: optimize IOVA conversion Gagandeep Singh
@ 2020-10-15  9:47   ` Gagandeep Singh
  2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 6/7] raw/dpaa2_qdma: support FLE pool per queue Gagandeep Singh
  2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 7/7] raw/dpaa2_qdma: support enqueue without response wait Gagandeep Singh
  6 siblings, 0 replies; 23+ messages in thread
From: Gagandeep Singh @ 2020-10-15  9:47 UTC (permalink / raw)
  To: dev, nipun.gupta, hemant.agrawal; +Cc: thomas, Jun Yang

From: Jun Yang <jun.yang@nxp.com>

This patch add support to add Scatter Gather support
for different jobs for qdma queues.
It also supports gathering  multiple enqueue jobs into SG enqueue job(s).

Signed-off-by: Jun Yang <jun.yang@nxp.com>
---
 drivers/bus/fslmc/portal/dpaa2_hw_pvt.h     |  18 +-
 drivers/raw/dpaa2_qdma/dpaa2_qdma.c         | 398 ++++++++++++++++----
 drivers/raw/dpaa2_qdma/dpaa2_qdma.h         |  75 +++-
 drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h |   2 +
 4 files changed, 412 insertions(+), 81 deletions(-)

diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
index 2dd53c63b..ac24f0145 100644
--- a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
+++ b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
@@ -210,12 +210,28 @@ struct dpaa2_dpcon_dev {
 };
 
 /* Refer to Table 7-3 in SEC BG */
+#define QBMAN_FLE_WORD4_FMT_SBF 0x0    /* Single buffer frame */
+#define QBMAN_FLE_WORD4_FMT_SGE 0x2 /* Scatter gather frame */
+
+struct qbman_fle_word4 {
+	uint32_t bpid:14; /* Frame buffer pool ID */
+	uint32_t ivp:1; /* Invalid Pool ID. */
+	uint32_t bmt:1; /* Bypass Memory Translation */
+	uint32_t offset:12; /* Frame offset */
+	uint32_t fmt:2; /* Frame Format */
+	uint32_t sl:1; /* Short Length */
+	uint32_t f:1; /* Final bit */
+};
+
 struct qbman_fle {
 	uint32_t addr_lo;
 	uint32_t addr_hi;
 	uint32_t length;
 	/* FMT must be 00, MSB is final bit  */
-	uint32_t fin_bpid_offset;
+	union {
+		uint32_t fin_bpid_offset;
+		struct qbman_fle_word4 word4;
+	};
 	uint32_t frc;
 	uint32_t reserved[3]; /* Not used currently */
 };
diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
index 94dc7886a..7b755cea7 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
@@ -116,17 +116,21 @@ qdma_populate_fd_ddr(phys_addr_t src, phys_addr_t dest,
 
 static void
 dpaa2_qdma_populate_fle(struct qbman_fle *fle,
+			uint64_t fle_iova,
 			struct rte_qdma_rbp *rbp,
 			uint64_t src, uint64_t dest,
-			size_t len, uint32_t flags)
+			size_t len, uint32_t flags, uint32_t fmt)
 {
 	struct qdma_sdd *sdd;
+	uint64_t sdd_iova;
 
-	sdd = (struct qdma_sdd *)((uint8_t *)(fle) +
-		(DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle)));
+	sdd = (struct qdma_sdd *)
+			((uintptr_t)(uint64_t)fle - QDMA_FLE_FLE_OFFSET +
+			QDMA_FLE_SDD_OFFSET);
+	sdd_iova = fle_iova - QDMA_FLE_FLE_OFFSET + QDMA_FLE_SDD_OFFSET;
 
 	/* first frame list to source descriptor */
-	DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(sdd));
+	DPAA2_SET_FLE_ADDR(fle, sdd_iova);
 	DPAA2_SET_FLE_LEN(fle, (2 * (sizeof(struct qdma_sdd))));
 
 	/* source and destination descriptor */
@@ -164,20 +168,26 @@ dpaa2_qdma_populate_fle(struct qbman_fle *fle,
 	/* source frame list to source buffer */
 	if (flags & RTE_QDMA_JOB_SRC_PHY) {
 		DPAA2_SET_FLE_ADDR(fle, src);
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
 		DPAA2_SET_FLE_BMT(fle);
+#endif
 	} else {
 		DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(src));
 	}
+	fle->word4.fmt = fmt;
 	DPAA2_SET_FLE_LEN(fle, len);
 
 	fle++;
 	/* destination frame list to destination buffer */
 	if (flags & RTE_QDMA_JOB_DEST_PHY) {
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
 		DPAA2_SET_FLE_BMT(fle);
+#endif
 		DPAA2_SET_FLE_ADDR(fle, dest);
 	} else {
 		DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(dest));
 	}
+	fle->word4.fmt = fmt;
 	DPAA2_SET_FLE_LEN(fle, len);
 
 	/* Final bit: 1, for last frame list */
@@ -187,44 +197,169 @@ dpaa2_qdma_populate_fle(struct qbman_fle *fle,
 static inline int dpdmai_dev_set_fd_us(
 		struct qdma_virt_queue *qdma_vq,
 		struct qbman_fd *fd,
-		struct rte_qdma_job *job)
+		struct rte_qdma_job **job,
+		uint16_t nb_jobs)
 {
 	struct rte_qdma_rbp *rbp = &qdma_vq->rbp;
 	struct rte_qdma_job **ppjob;
 	size_t iova;
-	int ret = 0;
+	int ret = 0, loop;
+
+	for (loop = 0; loop < nb_jobs; loop++) {
+		if (job[loop]->src & QDMA_RBP_UPPER_ADDRESS_MASK)
+			iova = (size_t)job[loop]->dest;
+		else
+			iova = (size_t)job[loop]->src;
+
+		/* Set the metadata */
+		job[loop]->vq_id = qdma_vq->vq_id;
+		ppjob = (struct rte_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1;
+		*ppjob = job[loop];
+
+		if ((rbp->drbp == 1) || (rbp->srbp == 1))
+			ret = qdma_populate_fd_pci((phys_addr_t)job[loop]->src,
+						(phys_addr_t)job[loop]->dest,
+						job[loop]->len, &fd[loop], rbp);
+		else
+			ret = qdma_populate_fd_ddr((phys_addr_t)job[loop]->src,
+						(phys_addr_t)job[loop]->dest,
+						job[loop]->len, &fd[loop]);
+	}
 
-	if (job->src & QDMA_RBP_UPPER_ADDRESS_MASK)
-		iova = (size_t)job->dest;
-	else
-		iova = (size_t)job->src;
+	return ret;
+}
 
-	/* Set the metadata */
-	job->vq_id = qdma_vq->vq_id;
-	ppjob = (struct rte_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1;
-	*ppjob = job;
+static uint32_t qdma_populate_sg_entry(
+		struct rte_qdma_job **jobs,
+		struct qdma_sg_entry *src_sge,
+		struct qdma_sg_entry *dst_sge,
+		uint16_t nb_jobs)
+{
+	uint16_t i;
+	uint32_t total_len = 0;
+	uint64_t iova;
+
+	for (i = 0; i < nb_jobs; i++) {
+		/* source SG */
+		if (likely(jobs[i]->flags & RTE_QDMA_JOB_SRC_PHY)) {
+			src_sge->addr_lo = (uint32_t)jobs[i]->src;
+			src_sge->addr_hi = (jobs[i]->src >> 32);
+		} else {
+			iova = DPAA2_VADDR_TO_IOVA(jobs[i]->src);
+			src_sge->addr_lo = (uint32_t)iova;
+			src_sge->addr_hi = iova >> 32;
+		}
+		src_sge->data_len.data_len_sl0 = jobs[i]->len;
+		src_sge->ctrl.sl = QDMA_SG_SL_LONG;
+		src_sge->ctrl.fmt = QDMA_SG_FMT_SDB;
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+		src_sge->ctrl.bmt = QDMA_SG_BMT_ENABLE;
+#else
+		src_sge->ctrl.bmt = QDMA_SG_BMT_DISABLE;
+#endif
+		/* destination SG */
+		if (likely(jobs[i]->flags & RTE_QDMA_JOB_DEST_PHY)) {
+			dst_sge->addr_lo = (uint32_t)jobs[i]->dest;
+			dst_sge->addr_hi = (jobs[i]->dest >> 32);
+		} else {
+			iova = DPAA2_VADDR_TO_IOVA(jobs[i]->dest);
+			dst_sge->addr_lo = (uint32_t)iova;
+			dst_sge->addr_hi = iova >> 32;
+		}
+		dst_sge->data_len.data_len_sl0 = jobs[i]->len;
+		dst_sge->ctrl.sl = QDMA_SG_SL_LONG;
+		dst_sge->ctrl.fmt = QDMA_SG_FMT_SDB;
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+		dst_sge->ctrl.bmt = QDMA_SG_BMT_ENABLE;
+#else
+		dst_sge->ctrl.bmt = QDMA_SG_BMT_DISABLE;
+#endif
+		total_len += jobs[i]->len;
 
-	if ((rbp->drbp == 1) || (rbp->srbp == 1))
-		ret = qdma_populate_fd_pci((phys_addr_t) job->src,
-					   (phys_addr_t) job->dest,
-					   job->len, fd, rbp);
-	else
-		ret = qdma_populate_fd_ddr((phys_addr_t) job->src,
-					   (phys_addr_t) job->dest,
-					   job->len, fd);
-	return ret;
+		if (i == (nb_jobs - 1)) {
+			src_sge->ctrl.f = QDMA_SG_F;
+			dst_sge->ctrl.f = QDMA_SG_F;
+		} else {
+			src_sge->ctrl.f = 0;
+			dst_sge->ctrl.f = 0;
+		}
+		src_sge++;
+		dst_sge++;
+	}
+
+	return total_len;
 }
-static inline int dpdmai_dev_set_fd_lf(
+
+static inline int dpdmai_dev_set_multi_fd_lf(
 		struct qdma_virt_queue *qdma_vq,
 		struct qbman_fd *fd,
-		struct rte_qdma_job *job)
+		struct rte_qdma_job **job,
+		uint16_t nb_jobs)
 {
 	struct rte_qdma_rbp *rbp = &qdma_vq->rbp;
 	struct rte_qdma_job **ppjob;
-	void *elem;
+	uint16_t i;
+	int ret;
+	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
+	void *elem[RTE_QDMA_BURST_NB_MAX];
 	struct qbman_fle *fle;
 	uint64_t elem_iova, fle_iova;
-	int ret = 0;
+
+	ret = rte_mempool_get_bulk(qdma_dev->fle_pool, elem, nb_jobs);
+	if (ret) {
+		DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
+		return ret;
+	}
+
+	for (i = 0; i < nb_jobs; i++) {
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+		elem_iova = rte_mempool_virt2iova(elem[i]);
+#else
+		elem_iova = DPAA2_VADDR_TO_IOVA(elem[i]);
+#endif
+
+		*((uint16_t *)
+		((uintptr_t)(uint64_t)elem[i] + QDMA_FLE_JOB_NB_OFFSET)) = 1;
+
+		ppjob = (struct rte_qdma_job **)
+			((uintptr_t)(uint64_t)elem[i] + QDMA_FLE_JOBS_OFFSET);
+		*ppjob = job[i];
+
+		job[i]->vq_id = qdma_vq->vq_id;
+
+		fle = (struct qbman_fle *)
+			((uintptr_t)(uint64_t)elem[i] + QDMA_FLE_FLE_OFFSET);
+		fle_iova = elem_iova + QDMA_FLE_FLE_OFFSET;
+
+		DPAA2_SET_FD_ADDR(&fd[i], fle_iova);
+		DPAA2_SET_FD_COMPOUND_FMT(&fd[i]);
+		DPAA2_SET_FD_FRC(&fd[i], QDMA_SER_CTX);
+
+		memset(fle, 0, DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle) +
+			DPAA2_QDMA_MAX_SDD * sizeof(struct qdma_sdd));
+
+		dpaa2_qdma_populate_fle(fle, fle_iova, rbp,
+				job[i]->src, job[i]->dest, job[i]->len,
+				job[i]->flags, QBMAN_FLE_WORD4_FMT_SBF);
+	}
+
+	return 0;
+}
+
+static inline int dpdmai_dev_set_sg_fd_lf(
+		struct qdma_virt_queue *qdma_vq,
+		struct qbman_fd *fd,
+		struct rte_qdma_job **job,
+		uint16_t nb_jobs)
+{
+	struct rte_qdma_rbp *rbp = &qdma_vq->rbp;
+	struct rte_qdma_job **ppjob;
+	void *elem;
+	struct qbman_fle *fle;
+	uint64_t elem_iova, fle_iova, src, dst;
+	int ret = 0, i;
+	struct qdma_sg_entry *src_sge, *dst_sge;
+	uint32_t len, fmt, flags;
 	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
 
 	/*
@@ -244,10 +379,15 @@ static inline int dpdmai_dev_set_fd_lf(
 #endif
 
 	/* Set the metadata */
-	job->vq_id = qdma_vq->vq_id;
+	/* Save job context. */
+	*((uint16_t *)
+	((uintptr_t)(uint64_t)elem + QDMA_FLE_JOB_NB_OFFSET)) = nb_jobs;
 	ppjob = (struct rte_qdma_job **)
-		((uintptr_t)(uint64_t)elem + QDMA_FLE_JOB_OFFSET);
-	*ppjob = job;
+		((uintptr_t)(uint64_t)elem + QDMA_FLE_JOBS_OFFSET);
+	for (i = 0; i < nb_jobs; i++)
+		ppjob[i] = job[i];
+
+	ppjob[0]->vq_id = qdma_vq->vq_id;
 
 	fle = (struct qbman_fle *)
 		((uintptr_t)(uint64_t)elem + QDMA_FLE_FLE_OFFSET);
@@ -258,9 +398,29 @@ static inline int dpdmai_dev_set_fd_lf(
 	DPAA2_SET_FD_FRC(fd, QDMA_SER_CTX);
 
 	/* Populate FLE */
-	memset(fle, 0, QDMA_FLE_POOL_SIZE);
-	dpaa2_qdma_populate_fle(fle, rbp, job->src, job->dest,
-				job->len, job->flags);
+	if (likely(nb_jobs > 1)) {
+		src_sge = (struct qdma_sg_entry *)
+			((uintptr_t)(uint64_t)elem + QDMA_FLE_SG_ENTRY_OFFSET);
+		dst_sge = src_sge + DPAA2_QDMA_MAX_SG_NB;
+		src = elem_iova + QDMA_FLE_SG_ENTRY_OFFSET;
+		dst = src +
+			DPAA2_QDMA_MAX_SG_NB * sizeof(struct qdma_sg_entry);
+		len = qdma_populate_sg_entry(job, src_sge, dst_sge, nb_jobs);
+		fmt = QBMAN_FLE_WORD4_FMT_SGE;
+		flags = RTE_QDMA_JOB_SRC_PHY | RTE_QDMA_JOB_DEST_PHY;
+	} else {
+		src = job[0]->src;
+		dst = job[0]->dest;
+		len = job[0]->len;
+		fmt = QBMAN_FLE_WORD4_FMT_SBF;
+		flags = job[0]->flags;
+	}
+
+	memset(fle, 0, DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle) +
+			DPAA2_QDMA_MAX_SDD * sizeof(struct qdma_sdd));
+
+	dpaa2_qdma_populate_fle(fle, fle_iova, rbp,
+					src, dst, len, flags, fmt);
 
 	return 0;
 }
@@ -268,7 +428,7 @@ static inline int dpdmai_dev_set_fd_lf(
 static inline uint16_t dpdmai_dev_get_job_us(
 				struct qdma_virt_queue *qdma_vq __rte_unused,
 				const struct qbman_fd *fd,
-				struct rte_qdma_job **job)
+				struct rte_qdma_job **job, uint16_t *nb_jobs)
 {
 	uint16_t vqid;
 	size_t iova;
@@ -286,6 +446,7 @@ static inline uint16_t dpdmai_dev_get_job_us(
 	(*job)->status = (fd->simple_pci.acc_err << 8) |
 					(fd->simple_pci.error);
 	vqid = (*job)->vq_id;
+	*nb_jobs = 1;
 
 	return vqid;
 }
@@ -293,12 +454,12 @@ static inline uint16_t dpdmai_dev_get_job_us(
 static inline uint16_t dpdmai_dev_get_job_lf(
 						struct qdma_virt_queue *qdma_vq,
 						const struct qbman_fd *fd,
-						struct rte_qdma_job **job)
+						struct rte_qdma_job **job,
+						uint16_t *nb_jobs)
 {
-	void *elem;
 	struct qbman_fle *fle;
-	struct rte_qdma_job **ppjob;
-	uint16_t vqid;
+	struct rte_qdma_job **ppjob = NULL;
+	uint16_t i, status;
 	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
 
 	/*
@@ -307,20 +468,24 @@ static inline uint16_t dpdmai_dev_get_job_lf(
 	 */
 	fle = (struct qbman_fle *)
 			DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd));
-	elem = (void *)((uintptr_t)(uint64_t)fle - QDMA_FLE_FLE_OFFSET);
+	*nb_jobs = *((uint16_t *)((uintptr_t)(uint64_t)fle -
+				QDMA_FLE_FLE_OFFSET + QDMA_FLE_JOB_NB_OFFSET));
+	status = (DPAA2_GET_FD_ERR(fd) << 8) | (DPAA2_GET_FD_FRC(fd) & 0xFF);
 
-	ppjob = (struct rte_qdma_job **)
-		((uintptr_t)(uint64_t)elem + QDMA_FLE_JOB_OFFSET);
+	ppjob = (struct rte_qdma_job **)((uintptr_t)(uint64_t)fle -
+				QDMA_FLE_FLE_OFFSET + QDMA_FLE_JOBS_OFFSET);
 
-	*job = (struct rte_qdma_job *)*ppjob;
-	(*job)->status = (DPAA2_GET_FD_ERR(fd) << 8) |
-			 (DPAA2_GET_FD_FRC(fd) & 0xFF);
-	vqid = (*job)->vq_id;
+	for (i = 0; i < (*nb_jobs); i++) {
+		job[i] = ppjob[i];
+		job[i]->status = status;
+	}
 
 	/* Free FLE to the pool */
-	rte_mempool_put(qdma_dev->fle_pool, elem);
+	rte_mempool_put(qdma_dev->fle_pool,
+			(void *)
+			((uintptr_t)(uint64_t)fle - QDMA_FLE_FLE_OFFSET));
 
-	return vqid;
+	return job[0]->vq_id;
 }
 
 /* Function to receive a QDMA job for a given device and queue*/
@@ -344,9 +509,16 @@ dpdmai_dev_dequeue_multijob_prefetch(
 	uint8_t status, pending;
 	uint8_t num_rx = 0;
 	const struct qbman_fd *fd;
-	uint16_t vqid;
+	uint16_t vqid, num_rx_ret;
 	int ret, pull_size;
 
+	if (qdma_vq->flags & RTE_QDMA_VQ_FD_SG_FORMAT) {
+		/** Make sure there are enough space to get jobs.*/
+		if (unlikely(nb_jobs < DPAA2_QDMA_MAX_SG_NB))
+			return -EINVAL;
+		nb_jobs = 1;
+	}
+
 	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
 		ret = dpaa2_affine_qbman_swp();
 		if (ret) {
@@ -440,12 +612,13 @@ dpdmai_dev_dequeue_multijob_prefetch(
 		}
 		fd = qbman_result_DQ_fd(dq_storage);
 
-		vqid = qdma_vq->get_job(qdma_vq, fd, &job[num_rx]);
+		vqid = qdma_vq->get_job(qdma_vq, fd, &job[num_rx],
+								&num_rx_ret);
 		if (vq_id)
 			vq_id[num_rx] = vqid;
 
 		dq_storage++;
-		num_rx++;
+		num_rx += num_rx_ret;
 	} while (pending);
 
 	if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
@@ -490,8 +663,17 @@ dpdmai_dev_dequeue_multijob_no_prefetch(
 	uint8_t status, pending;
 	uint8_t num_rx = 0;
 	const struct qbman_fd *fd;
-	uint16_t vqid;
-	int ret, next_pull = nb_jobs, num_pulled = 0;
+	uint16_t vqid, num_rx_ret;
+	int ret, next_pull, num_pulled = 0;
+
+	if (qdma_vq->flags & RTE_QDMA_VQ_FD_SG_FORMAT) {
+		/** Make sure there are enough space to get jobs.*/
+		if (unlikely(nb_jobs < DPAA2_QDMA_MAX_SG_NB))
+			return -EINVAL;
+		nb_jobs = 1;
+	}
+
+	next_pull = nb_jobs;
 
 	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
 		ret = dpaa2_affine_qbman_swp();
@@ -560,12 +742,13 @@ dpdmai_dev_dequeue_multijob_no_prefetch(
 			}
 			fd = qbman_result_DQ_fd(dq_storage);
 
-			vqid = qdma_vq->get_job(qdma_vq, fd, &job[num_rx]);
+			vqid = qdma_vq->get_job(qdma_vq, fd,
+						&job[num_rx], &num_rx_ret);
 			if (vq_id)
 				vq_id[num_rx] = vqid;
 
 			dq_storage++;
-			num_rx++;
+			num_rx += num_rx_ret;
 			num_pulled++;
 
 		} while (pending);
@@ -592,6 +775,7 @@ dpdmai_dev_enqueue_multi(
 	int ret;
 	uint32_t num_to_send = 0;
 	uint16_t num_tx = 0;
+	uint32_t enqueue_loop, retry_count, loop;
 
 	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
 		ret = dpaa2_affine_qbman_swp();
@@ -612,45 +796,87 @@ dpdmai_dev_enqueue_multi(
 	qbman_eq_desc_set_no_orp(&eqdesc, 0);
 	qbman_eq_desc_set_response(&eqdesc, 0, 0);
 
+	if (qdma_vq->flags & RTE_QDMA_VQ_FD_SG_FORMAT) {
+		uint16_t fd_nb;
+		uint16_t sg_entry_nb = nb_jobs > DPAA2_QDMA_MAX_SG_NB ?
+						DPAA2_QDMA_MAX_SG_NB : nb_jobs;
+		uint16_t job_idx = 0;
+		uint16_t fd_sg_nb[8];
+		uint16_t nb_jobs_ret = 0;
+
+		if (nb_jobs % DPAA2_QDMA_MAX_SG_NB)
+			fd_nb = nb_jobs / DPAA2_QDMA_MAX_SG_NB + 1;
+		else
+			fd_nb = nb_jobs / DPAA2_QDMA_MAX_SG_NB;
+
+		memset(&fd[0], 0, sizeof(struct qbman_fd) * fd_nb);
+
+		for (loop = 0; loop < fd_nb; loop++) {
+			ret = qdma_vq->set_fd(qdma_vq, &fd[loop], &job[job_idx],
+					      sg_entry_nb);
+			if (unlikely(ret < 0))
+				return 0;
+			fd_sg_nb[loop] = sg_entry_nb;
+			nb_jobs -= sg_entry_nb;
+			job_idx += sg_entry_nb;
+			sg_entry_nb = nb_jobs > DPAA2_QDMA_MAX_SG_NB ?
+						DPAA2_QDMA_MAX_SG_NB : nb_jobs;
+		}
+
+		/* Enqueue the packet to the QBMAN */
+		enqueue_loop = 0; retry_count = 0;
+
+		while (enqueue_loop < fd_nb) {
+			ret = qbman_swp_enqueue_multiple(swp,
+					&eqdesc, &fd[enqueue_loop],
+					NULL, fd_nb - enqueue_loop);
+			if (unlikely(ret < 0)) {
+				retry_count++;
+				if (retry_count > DPAA2_MAX_TX_RETRY_COUNT)
+					return nb_jobs_ret;
+			} else {
+				for (loop = 0; loop < (uint32_t)ret; loop++)
+					nb_jobs_ret +=
+						fd_sg_nb[enqueue_loop + loop];
+				enqueue_loop += ret;
+				retry_count = 0;
+			}
+		}
+
+		return nb_jobs_ret;
+	}
+
 	memset(fd, 0, nb_jobs * sizeof(struct qbman_fd));
 
 	while (nb_jobs > 0) {
-		uint32_t loop;
-
 		num_to_send = (nb_jobs > dpaa2_eqcr_size) ?
 			dpaa2_eqcr_size : nb_jobs;
 
-		for (loop = 0; loop < num_to_send; loop++) {
-			ret = qdma_vq->set_fd(qdma_vq, &fd[loop], job[num_tx]);
-			if (ret < 0) {
-				/* Set nb_jobs to loop, so outer while loop
-				 * breaks out.
-				 */
-				nb_jobs = loop;
-				break;
-			}
-
-			num_tx++;
-		}
+		ret = qdma_vq->set_fd(qdma_vq, &fd[num_tx],
+						&job[num_tx], num_to_send);
+		if (unlikely(ret < 0))
+			break;
 
 		/* Enqueue the packet to the QBMAN */
-		uint32_t enqueue_loop = 0, retry_count = 0;
+		enqueue_loop = 0; retry_count = 0;
+		loop = num_to_send;
 
 		while (enqueue_loop < loop) {
 			ret = qbman_swp_enqueue_multiple(swp,
 						&eqdesc,
-						&fd[enqueue_loop],
+						&fd[num_tx + enqueue_loop],
 						NULL,
 						loop - enqueue_loop);
 			if (unlikely(ret < 0)) {
 				retry_count++;
 				if (retry_count > DPAA2_MAX_TX_RETRY_COUNT)
-					return num_tx - (loop - enqueue_loop);
+					return num_tx;
 			} else {
 				enqueue_loop += ret;
 				retry_count = 0;
 			}
 		}
+		num_tx += num_to_send;
 		nb_jobs -= loop;
 	}
 	return num_tx;
@@ -977,6 +1203,21 @@ dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
 		return -ENODEV;
 	}
 
+	if (q_config->flags & RTE_QDMA_VQ_FD_SG_FORMAT) {
+		if (!(q_config->flags & RTE_QDMA_VQ_EXCLUSIVE_PQ)) {
+			DPAA2_QDMA_ERR(
+				"qDMA SG format only supports physical queue!");
+			rte_spinlock_unlock(&qdma_dev->lock);
+			return -ENODEV;
+		}
+		if (!(q_config->flags & RTE_QDMA_VQ_FD_LONG_FORMAT)) {
+			DPAA2_QDMA_ERR(
+				"qDMA SG format only supports long FD format!");
+			rte_spinlock_unlock(&qdma_dev->lock);
+			return -ENODEV;
+		}
+	}
+
 	if (q_config->flags & RTE_QDMA_VQ_EXCLUSIVE_PQ) {
 		/* Allocate HW queue for a VQ */
 		qdma_dev->vqs[i].hw_queue = alloc_hw_queue(q_config->lcore_id);
@@ -1007,12 +1248,16 @@ dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
 		return -ENODEV;
 	}
 
+	qdma_dev->vqs[i].flags = q_config->flags;
 	qdma_dev->vqs[i].in_use = 1;
 	qdma_dev->vqs[i].lcore_id = q_config->lcore_id;
 	memset(&qdma_dev->vqs[i].rbp, 0, sizeof(struct rte_qdma_rbp));
 
 	if (q_config->flags & RTE_QDMA_VQ_FD_LONG_FORMAT) {
-		qdma_dev->vqs[i].set_fd = dpdmai_dev_set_fd_lf;
+		if (q_config->flags & RTE_QDMA_VQ_FD_SG_FORMAT)
+			qdma_dev->vqs[i].set_fd = dpdmai_dev_set_sg_fd_lf;
+		else
+			qdma_dev->vqs[i].set_fd = dpdmai_dev_set_multi_fd_lf;
 		qdma_dev->vqs[i].get_job = dpdmai_dev_get_job_lf;
 	} else {
 		qdma_dev->vqs[i].set_fd = dpdmai_dev_set_fd_us;
@@ -1087,6 +1332,12 @@ dpaa2_qdma_dequeue(struct rte_rawdev *rawdev,
 	int ret = 0, i;
 	unsigned int ring_count;
 
+	if (qdma_vq->flags & RTE_QDMA_VQ_FD_SG_FORMAT) {
+		/** Make sure there are enough space to get jobs.*/
+		if (unlikely(nb_jobs < DPAA2_QDMA_MAX_SG_NB))
+			return -EINVAL;
+	}
+
 	/* Return error in case of wrong lcore_id */
 	if (rte_lcore_id() != (unsigned int)(qdma_vq->lcore_id)) {
 		DPAA2_QDMA_WARN("QDMA dequeue for vqid %d on wrong core",
@@ -1098,7 +1349,8 @@ dpaa2_qdma_dequeue(struct rte_rawdev *rawdev,
 	if (qdma_vq->num_enqueues == qdma_vq->num_dequeues)
 		return 0;
 
-	if (qdma_vq->num_enqueues < (qdma_vq->num_dequeues + nb_jobs))
+	if (!(qdma_vq->flags & RTE_QDMA_VQ_FD_SG_FORMAT) &&
+		qdma_vq->num_enqueues < (qdma_vq->num_dequeues + nb_jobs))
 		nb_jobs = (qdma_vq->num_enqueues - qdma_vq->num_dequeues);
 
 	if (qdma_vq->exclusive_hw_queue) {
diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
index ff7743fb5..43a01d56f 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
@@ -11,16 +11,37 @@ struct rte_qdma_job;
 #define DPAA2_QDMA_MAX_FLE 3
 #define DPAA2_QDMA_MAX_SDD 2
 
+#define DPAA2_QDMA_MAX_SG_NB 64
+
 #define DPAA2_DPDMAI_MAX_QUEUES	8
 
-/** FLE pool size: 3 Frame list + 2 source/destination descriptor */
-#define QDMA_FLE_POOL_SIZE (sizeof(struct rte_qdma_job *) + \
+/** FLE pool size: job number(uint64_t) +
+ * 3 Frame list + 2 source/destination descriptor  +
+ * 32 (src + dst) sg entries + 32 jobs pointers.
+ */
+
+#define QDMA_FLE_POOL_SIZE (sizeof(uint64_t) + \
 		sizeof(struct qbman_fle) * DPAA2_QDMA_MAX_FLE + \
-		sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD)
+		sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD + \
+		sizeof(struct qdma_sg_entry) * DPAA2_QDMA_MAX_SG_NB * 2 + \
+		sizeof(struct rte_qdma_job *) * DPAA2_QDMA_MAX_SG_NB)
+
+#define QDMA_FLE_JOB_NB_OFFSET 0
 
-#define QDMA_FLE_JOB_OFFSET 0
 #define QDMA_FLE_FLE_OFFSET \
-		(QDMA_FLE_JOB_OFFSET + sizeof(struct rte_qdma_job *))
+		(QDMA_FLE_JOB_NB_OFFSET + sizeof(uint64_t))
+
+#define QDMA_FLE_SDD_OFFSET \
+		(QDMA_FLE_FLE_OFFSET + \
+		sizeof(struct qbman_fle) * DPAA2_QDMA_MAX_FLE)
+
+#define QDMA_FLE_SG_ENTRY_OFFSET \
+		(QDMA_FLE_SDD_OFFSET + \
+		sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD)
+
+#define QDMA_FLE_JOBS_OFFSET \
+		(QDMA_FLE_SG_ENTRY_OFFSET + \
+		sizeof(struct qdma_sg_entry) * DPAA2_QDMA_MAX_SG_NB * 2)
 
 /** FLE pool cache size */
 #define QDMA_FLE_CACHE_SIZE(_num) (_num/(RTE_MAX_LCORE * 2))
@@ -90,10 +111,12 @@ struct qdma_virt_queue;
 
 typedef uint16_t (qdma_get_job_t)(struct qdma_virt_queue *qdma_vq,
 					const struct qbman_fd *fd,
-					struct rte_qdma_job **job);
+					struct rte_qdma_job **job,
+					uint16_t *nb_jobs);
 typedef int (qdma_set_fd_t)(struct qdma_virt_queue *qdma_vq,
 					struct qbman_fd *fd,
-					struct rte_qdma_job *job);
+					struct rte_qdma_job **job,
+					uint16_t nb_jobs);
 
 typedef int (qdma_dequeue_multijob_t)(
 				struct qdma_virt_queue *qdma_vq,
@@ -126,6 +149,7 @@ struct qdma_virt_queue {
 	uint64_t num_dequeues;
 
 	uint16_t vq_id;
+	uint32_t flags;
 
 	qdma_set_fd_t *set_fd;
 	qdma_get_job_t *get_job;
@@ -191,6 +215,43 @@ struct qdma_sdd {
 	};
 } __rte_packed;
 
+#define QDMA_SG_FMT_SDB	0x0 /* single data buffer */
+#define QDMA_SG_FMT_FDS	0x1 /* frame data section */
+#define QDMA_SG_FMT_SGTE	0x2 /* SGT extension */
+#define QDMA_SG_SL_SHORT	0x1 /* short length */
+#define QDMA_SG_SL_LONG	0x0 /* long length */
+#define QDMA_SG_F	0x1 /* last sg entry */
+#define QDMA_SG_BMT_ENABLE 0x1
+#define QDMA_SG_BMT_DISABLE 0x0
+
+struct qdma_sg_entry {
+	uint32_t addr_lo;		/* address 0:31 */
+	uint32_t addr_hi:17;	/* address 32:48 */
+	uint32_t rsv:15;
+	union {
+		uint32_t data_len_sl0;	/* SL=0, the long format */
+		struct {
+			uint32_t len:17;	/* SL=1, the short format */
+			uint32_t reserve:3;
+			uint32_t sf:1;
+			uint32_t sr:1;
+			uint32_t size:10;	/* buff size */
+		} data_len_sl1;
+	} data_len;					/* AVAIL_LENGTH */
+	union {
+		uint32_t ctrl_fields;
+		struct {
+			uint32_t bpid:14;
+			uint32_t ivp:1;
+			uint32_t bmt:1;
+			uint32_t offset:12;
+			uint32_t fmt:2;
+			uint32_t sl:1;
+			uint32_t f:1;
+		} ctrl;
+	};
+} __attribute__((__packed__));
+
 /** Represents a DPDMAI raw device */
 struct dpaa2_dpdmai_dev {
 	/** Pointer to Next device instance */
diff --git a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
index ff4fc1d6c..cfec303c8 100644
--- a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
@@ -52,6 +52,8 @@ enum {
 
 #define RTE_QDMA_VQ_FD_LONG_FORMAT		(1ULL << 1)
 
+#define RTE_QDMA_VQ_FD_SG_FORMAT		(1ULL << 2)
+
 /** States if the source addresses is physical. */
 #define RTE_QDMA_JOB_SRC_PHY		(1ULL)
 
-- 
2.17.1


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH v2 6/7] raw/dpaa2_qdma: support FLE pool per queue
  2020-10-15  9:47 ` [dpdk-dev] [PATCH v2 " Gagandeep Singh
                     ` (4 preceding siblings ...)
  2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 5/7] raw/dpaa2_qdma: support scatter gather in enqueue Gagandeep Singh
@ 2020-10-15  9:47   ` Gagandeep Singh
  2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 7/7] raw/dpaa2_qdma: support enqueue without response wait Gagandeep Singh
  6 siblings, 0 replies; 23+ messages in thread
From: Gagandeep Singh @ 2020-10-15  9:47 UTC (permalink / raw)
  To: dev, nipun.gupta, hemant.agrawal; +Cc: thomas, Jun Yang

From: Jun Yang <jun.yang@nxp.com>

Don't mix SG/none-SG with same FLE pool format,
otherwise, it impacts none-SG performance.

In order to support SG queue and none-SG queue
with different FLE pool element formats, associate
FLE pool with queue instead of device.

Signed-off-by: Jun Yang <jun.yang@nxp.com>
---
 drivers/raw/dpaa2_qdma/dpaa2_qdma.c         | 113 ++++++++++++--------
 drivers/raw/dpaa2_qdma/dpaa2_qdma.h         |  28 +++--
 drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h |   2 +-
 3 files changed, 90 insertions(+), 53 deletions(-)

diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
index 7b755cea7..b363139de 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
@@ -300,12 +300,11 @@ static inline int dpdmai_dev_set_multi_fd_lf(
 	struct rte_qdma_job **ppjob;
 	uint16_t i;
 	int ret;
-	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
 	void *elem[RTE_QDMA_BURST_NB_MAX];
 	struct qbman_fle *fle;
 	uint64_t elem_iova, fle_iova;
 
-	ret = rte_mempool_get_bulk(qdma_dev->fle_pool, elem, nb_jobs);
+	ret = rte_mempool_get_bulk(qdma_vq->fle_pool, elem, nb_jobs);
 	if (ret) {
 		DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
 		return ret;
@@ -318,11 +317,9 @@ static inline int dpdmai_dev_set_multi_fd_lf(
 		elem_iova = DPAA2_VADDR_TO_IOVA(elem[i]);
 #endif
 
-		*((uint16_t *)
-		((uintptr_t)(uint64_t)elem[i] + QDMA_FLE_JOB_NB_OFFSET)) = 1;
-
 		ppjob = (struct rte_qdma_job **)
-			((uintptr_t)(uint64_t)elem[i] + QDMA_FLE_JOBS_OFFSET);
+			((uintptr_t)(uint64_t)elem[i] +
+			 QDMA_FLE_SINGLE_JOB_OFFSET);
 		*ppjob = job[i];
 
 		job[i]->vq_id = qdma_vq->vq_id;
@@ -360,13 +357,12 @@ static inline int dpdmai_dev_set_sg_fd_lf(
 	int ret = 0, i;
 	struct qdma_sg_entry *src_sge, *dst_sge;
 	uint32_t len, fmt, flags;
-	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
 
 	/*
 	 * Get an FLE/SDD from FLE pool.
 	 * Note: IO metadata is before the FLE and SDD memory.
 	 */
-	ret = rte_mempool_get(qdma_dev->fle_pool, (void **)(&elem));
+	ret = rte_mempool_get(qdma_vq->fle_pool, (void **)(&elem));
 	if (ret) {
 		DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
 		return ret;
@@ -383,7 +379,7 @@ static inline int dpdmai_dev_set_sg_fd_lf(
 	*((uint16_t *)
 	((uintptr_t)(uint64_t)elem + QDMA_FLE_JOB_NB_OFFSET)) = nb_jobs;
 	ppjob = (struct rte_qdma_job **)
-		((uintptr_t)(uint64_t)elem + QDMA_FLE_JOBS_OFFSET);
+		((uintptr_t)(uint64_t)elem + QDMA_FLE_SG_JOBS_OFFSET);
 	for (i = 0; i < nb_jobs; i++)
 		ppjob[i] = job[i];
 
@@ -451,7 +447,41 @@ static inline uint16_t dpdmai_dev_get_job_us(
 	return vqid;
 }
 
-static inline uint16_t dpdmai_dev_get_job_lf(
+static inline uint16_t dpdmai_dev_get_single_job_lf(
+						struct qdma_virt_queue *qdma_vq,
+						const struct qbman_fd *fd,
+						struct rte_qdma_job **job,
+						uint16_t *nb_jobs)
+{
+	struct qbman_fle *fle;
+	struct rte_qdma_job **ppjob = NULL;
+	uint16_t status;
+
+	/*
+	 * Fetch metadata from FLE. job and vq_id were set
+	 * in metadata in the enqueue operation.
+	 */
+	fle = (struct qbman_fle *)
+			DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd));
+
+	*nb_jobs = 1;
+	ppjob = (struct rte_qdma_job **)((uintptr_t)(uint64_t)fle -
+			QDMA_FLE_FLE_OFFSET + QDMA_FLE_SINGLE_JOB_OFFSET);
+
+	status = (DPAA2_GET_FD_ERR(fd) << 8) | (DPAA2_GET_FD_FRC(fd) & 0xFF);
+
+	*job = *ppjob;
+	(*job)->status = status;
+
+	/* Free FLE to the pool */
+	rte_mempool_put(qdma_vq->fle_pool,
+			(void *)
+			((uintptr_t)(uint64_t)fle - QDMA_FLE_FLE_OFFSET));
+
+	return (*job)->vq_id;
+}
+
+static inline uint16_t dpdmai_dev_get_sg_job_lf(
 						struct qdma_virt_queue *qdma_vq,
 						const struct qbman_fd *fd,
 						struct rte_qdma_job **job,
@@ -460,7 +490,6 @@ static inline uint16_t dpdmai_dev_get_job_lf(
 	struct qbman_fle *fle;
 	struct rte_qdma_job **ppjob = NULL;
 	uint16_t i, status;
-	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
 
 	/*
 	 * Fetch metadata from FLE. job and vq_id were set
@@ -470,10 +499,9 @@ static inline uint16_t dpdmai_dev_get_job_lf(
 			DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd));
 	*nb_jobs = *((uint16_t *)((uintptr_t)(uint64_t)fle -
 				QDMA_FLE_FLE_OFFSET + QDMA_FLE_JOB_NB_OFFSET));
-	status = (DPAA2_GET_FD_ERR(fd) << 8) | (DPAA2_GET_FD_FRC(fd) & 0xFF);
-
 	ppjob = (struct rte_qdma_job **)((uintptr_t)(uint64_t)fle -
-				QDMA_FLE_FLE_OFFSET + QDMA_FLE_JOBS_OFFSET);
+				QDMA_FLE_FLE_OFFSET + QDMA_FLE_SG_JOBS_OFFSET);
+	status = (DPAA2_GET_FD_ERR(fd) << 8) | (DPAA2_GET_FD_FRC(fd) & 0xFF);
 
 	for (i = 0; i < (*nb_jobs); i++) {
 		job[i] = ppjob[i];
@@ -481,7 +509,7 @@ static inline uint16_t dpdmai_dev_get_job_lf(
 	}
 
 	/* Free FLE to the pool */
-	rte_mempool_put(qdma_dev->fle_pool,
+	rte_mempool_put(qdma_vq->fle_pool,
 			(void *)
 			((uintptr_t)(uint64_t)fle - QDMA_FLE_FLE_OFFSET));
 
@@ -1045,14 +1073,9 @@ dpaa2_qdma_reset(struct rte_rawdev *rawdev)
 	memset(&qdma_core_info, 0,
 		sizeof(struct qdma_per_core_info) * RTE_MAX_LCORE);
 
-	/* Free the FLE pool */
-	if (qdma_dev->fle_pool)
-		rte_mempool_free(qdma_dev->fle_pool);
-
 	/* Reset QDMA device structure */
 	qdma_dev->max_hw_queues_per_core = 0;
-	qdma_dev->fle_pool = NULL;
-	qdma_dev->fle_pool_count = 0;
+	qdma_dev->fle_queue_pool_cnt = 0;
 	qdma_dev->max_vqs = 0;
 
 	return 0;
@@ -1099,23 +1122,7 @@ dpaa2_qdma_configure(const struct rte_rawdev *rawdev,
 		return -ENOMEM;
 	}
 	qdma_dev->max_vqs = qdma_config->max_vqs;
-
-	/* Allocate FLE pool; just append PID so that in case of
-	 * multiprocess, the pool's don't collide.
-	 */
-	snprintf(name, sizeof(name), "qdma_fle_pool%u",
-		 getpid());
-	qdma_dev->fle_pool = rte_mempool_create(name,
-			qdma_config->fle_pool_count, QDMA_FLE_POOL_SIZE,
-			QDMA_FLE_CACHE_SIZE(qdma_config->fle_pool_count), 0,
-			NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
-	if (!qdma_dev->fle_pool) {
-		DPAA2_QDMA_ERR("qdma_fle_pool create failed");
-		rte_free(qdma_dev->vqs);
-		qdma_dev->vqs = NULL;
-		return -ENOMEM;
-	}
-	qdma_dev->fle_pool_count = qdma_config->fle_pool_count;
+	qdma_dev->fle_queue_pool_cnt = qdma_config->fle_queue_pool_cnt;
 
 	return 0;
 }
@@ -1177,11 +1184,13 @@ dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
 			  size_t conf_size)
 {
 	char ring_name[32];
+	char pool_name[64];
 	int i;
 	struct dpaa2_dpdmai_dev *dpdmai_dev = rawdev->dev_private;
 	struct qdma_device *qdma_dev = dpdmai_dev->qdma_dev;
 	struct rte_qdma_queue_config *q_config =
 		(struct rte_qdma_queue_config *)queue_conf;
+	uint32_t pool_size;
 
 	DPAA2_QDMA_FUNC_TRACE();
 
@@ -1216,6 +1225,9 @@ dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
 			rte_spinlock_unlock(&qdma_dev->lock);
 			return -ENODEV;
 		}
+		pool_size = QDMA_FLE_SG_POOL_SIZE;
+	} else {
+		pool_size = QDMA_FLE_SINGLE_POOL_SIZE;
 	}
 
 	if (q_config->flags & RTE_QDMA_VQ_EXCLUSIVE_PQ) {
@@ -1226,7 +1238,7 @@ dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
 		/* Allocate a Ring for Virtual Queue in VQ mode */
 		snprintf(ring_name, sizeof(ring_name), "status ring %d", i);
 		qdma_dev->vqs[i].status_ring = rte_ring_create(ring_name,
-			qdma_dev->fle_pool_count, rte_socket_id(), 0);
+			qdma_dev->fle_queue_pool_cnt, rte_socket_id(), 0);
 		if (!qdma_dev->vqs[i].status_ring) {
 			DPAA2_QDMA_ERR("Status ring creation failed for vq");
 			rte_spinlock_unlock(&qdma_dev->lock);
@@ -1248,17 +1260,31 @@ dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
 		return -ENODEV;
 	}
 
+	snprintf(pool_name, sizeof(pool_name),
+		"qdma_fle_pool%u_queue%d", getpid(), i);
+	qdma_dev->vqs[i].fle_pool = rte_mempool_create(pool_name,
+			qdma_dev->fle_queue_pool_cnt, pool_size,
+			QDMA_FLE_CACHE_SIZE(qdma_dev->fle_queue_pool_cnt), 0,
+			NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
+	if (!qdma_dev->vqs[i].fle_pool) {
+		DPAA2_QDMA_ERR("qdma_fle_pool create failed");
+		rte_spinlock_unlock(&qdma_dev->lock);
+		return -ENOMEM;
+	}
+
 	qdma_dev->vqs[i].flags = q_config->flags;
 	qdma_dev->vqs[i].in_use = 1;
 	qdma_dev->vqs[i].lcore_id = q_config->lcore_id;
 	memset(&qdma_dev->vqs[i].rbp, 0, sizeof(struct rte_qdma_rbp));
 
 	if (q_config->flags & RTE_QDMA_VQ_FD_LONG_FORMAT) {
-		if (q_config->flags & RTE_QDMA_VQ_FD_SG_FORMAT)
+		if (q_config->flags & RTE_QDMA_VQ_FD_SG_FORMAT) {
 			qdma_dev->vqs[i].set_fd = dpdmai_dev_set_sg_fd_lf;
-		else
+			qdma_dev->vqs[i].get_job = dpdmai_dev_get_sg_job_lf;
+		} else {
 			qdma_dev->vqs[i].set_fd = dpdmai_dev_set_multi_fd_lf;
-		qdma_dev->vqs[i].get_job = dpdmai_dev_get_job_lf;
+			qdma_dev->vqs[i].get_job = dpdmai_dev_get_single_job_lf;
+		}
 	} else {
 		qdma_dev->vqs[i].set_fd = dpdmai_dev_set_fd_us;
 		qdma_dev->vqs[i].get_job = dpdmai_dev_get_job_us;
@@ -1444,6 +1470,9 @@ dpaa2_qdma_queue_release(struct rte_rawdev *rawdev,
 		put_hw_queue(qdma_vq->hw_queue);
 	}
 
+	if (qdma_vq->fle_pool)
+		rte_mempool_free(qdma_vq->fle_pool);
+
 	memset(qdma_vq, 0, sizeof(struct qdma_virt_queue));
 
 	rte_spinlock_unlock(&qdma_dev->lock);
diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
index 43a01d56f..0892a190e 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
@@ -15,19 +15,27 @@ struct rte_qdma_job;
 
 #define DPAA2_DPDMAI_MAX_QUEUES	8
 
-/** FLE pool size: job number(uint64_t) +
- * 3 Frame list + 2 source/destination descriptor  +
- * 32 (src + dst) sg entries + 32 jobs pointers.
+/** FLE single job pool size: job pointer(uint64_t) +
+ * 3 Frame list + 2 source/destination descriptor.
  */
+#define QDMA_FLE_SINGLE_POOL_SIZE (sizeof(uint64_t) + \
+			sizeof(struct qbman_fle) * DPAA2_QDMA_MAX_FLE + \
+			sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD)
 
-#define QDMA_FLE_POOL_SIZE (sizeof(uint64_t) + \
+/** FLE sg jobs pool size: job number(uint64_t) +
+ * 3 Frame list + 2 source/destination descriptor  +
+ * 64 (src + dst) sg entries + 64 jobs pointers.
+ */
+#define QDMA_FLE_SG_POOL_SIZE (sizeof(uint64_t) + \
 		sizeof(struct qbman_fle) * DPAA2_QDMA_MAX_FLE + \
 		sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD + \
-		sizeof(struct qdma_sg_entry) * DPAA2_QDMA_MAX_SG_NB * 2 + \
+		sizeof(struct qdma_sg_entry) * (DPAA2_QDMA_MAX_SG_NB * 2) + \
 		sizeof(struct rte_qdma_job *) * DPAA2_QDMA_MAX_SG_NB)
 
 #define QDMA_FLE_JOB_NB_OFFSET 0
 
+#define QDMA_FLE_SINGLE_JOB_OFFSET 0
+
 #define QDMA_FLE_FLE_OFFSET \
 		(QDMA_FLE_JOB_NB_OFFSET + sizeof(uint64_t))
 
@@ -39,7 +47,7 @@ struct rte_qdma_job;
 		(QDMA_FLE_SDD_OFFSET + \
 		sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD)
 
-#define QDMA_FLE_JOBS_OFFSET \
+#define QDMA_FLE_SG_JOBS_OFFSET \
 		(QDMA_FLE_SG_ENTRY_OFFSET + \
 		sizeof(struct qdma_sg_entry) * DPAA2_QDMA_MAX_SG_NB * 2)
 
@@ -85,10 +93,8 @@ struct qdma_device {
 	uint16_t max_vqs;
 	/** Device state - started or stopped */
 	uint8_t state;
-	/** FLE pool for the device */
-	struct rte_mempool *fle_pool;
-	/** FLE pool size */
-	int fle_pool_count;
+	/** FLE queue pool size */
+	int fle_queue_pool_cnt;
 	/** A lock to QDMA device whenever required */
 	rte_spinlock_t lock;
 };
@@ -135,6 +141,8 @@ struct qdma_virt_queue {
 	struct rte_ring *status_ring;
 	/** Associated hw queue */
 	struct qdma_hw_queue *hw_queue;
+	/** FLE pool for the queue */
+	struct rte_mempool *fle_pool;
 	/** Route by port */
 	struct rte_qdma_rbp rbp;
 	/** Associated lcore id */
diff --git a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
index cfec303c8..3cd41676c 100644
--- a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
@@ -80,7 +80,7 @@ struct rte_qdma_config {
 	 * maximum number of inflight jobs on the QDMA device. This should
 	 * be power of 2.
 	 */
-	int fle_pool_count;
+	int fle_queue_pool_cnt;
 };
 
 struct rte_qdma_rbp {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [dpdk-dev] [PATCH v2 7/7] raw/dpaa2_qdma: support enqueue without response wait
  2020-10-15  9:47 ` [dpdk-dev] [PATCH v2 " Gagandeep Singh
                     ` (5 preceding siblings ...)
  2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 6/7] raw/dpaa2_qdma: support FLE pool per queue Gagandeep Singh
@ 2020-10-15  9:47   ` Gagandeep Singh
  6 siblings, 0 replies; 23+ messages in thread
From: Gagandeep Singh @ 2020-10-15  9:47 UTC (permalink / raw)
  To: dev, nipun.gupta, hemant.agrawal; +Cc: thomas, Jun Yang

From: Jun Yang <jun.yang@nxp.com>

In this condition, user needs to check if dma transfer is completed
by its own logic.

qDMA FLE pool is not used in this condition since there is no chance to put
FLE back to pool without dequeue response.

User application is responsible to transfer FLE memory to qDMA driver
by qdma job descriptor and maintain it as well.

Signed-off-by: Jun Yang <jun.yang@nxp.com>
---
 drivers/raw/dpaa2_qdma/dpaa2_qdma.c         | 86 +++++++++++++++++----
 drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h |  7 ++
 2 files changed, 79 insertions(+), 14 deletions(-)

diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
index b363139de..c961e18d6 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
@@ -46,7 +46,7 @@ static struct qdma_per_core_info qdma_core_info[RTE_MAX_LCORE];
 static inline int
 qdma_populate_fd_pci(phys_addr_t src, phys_addr_t dest,
 			uint32_t len, struct qbman_fd *fd,
-			struct rte_qdma_rbp *rbp)
+			struct rte_qdma_rbp *rbp, int ser)
 {
 	fd->simple_pci.saddr_lo = lower_32_bits((uint64_t) (src));
 	fd->simple_pci.saddr_hi = upper_32_bits((uint64_t) (src));
@@ -56,7 +56,7 @@ qdma_populate_fd_pci(phys_addr_t src, phys_addr_t dest,
 	fd->simple_pci.bmt = 1;
 	fd->simple_pci.fmt = 3;
 	fd->simple_pci.sl = 1;
-	fd->simple_pci.ser = 1;
+	fd->simple_pci.ser = ser;
 
 	fd->simple_pci.sportid = rbp->sportid;	/*pcie 3 */
 	fd->simple_pci.srbp = rbp->srbp;
@@ -81,7 +81,7 @@ qdma_populate_fd_pci(phys_addr_t src, phys_addr_t dest,
 
 static inline int
 qdma_populate_fd_ddr(phys_addr_t src, phys_addr_t dest,
-			uint32_t len, struct qbman_fd *fd)
+			uint32_t len, struct qbman_fd *fd, int ser)
 {
 	fd->simple_ddr.saddr_lo = lower_32_bits((uint64_t) (src));
 	fd->simple_ddr.saddr_hi = upper_32_bits((uint64_t) (src));
@@ -91,7 +91,7 @@ qdma_populate_fd_ddr(phys_addr_t src, phys_addr_t dest,
 	fd->simple_ddr.bmt = 1;
 	fd->simple_ddr.fmt = 3;
 	fd->simple_ddr.sl = 1;
-	fd->simple_ddr.ser = 1;
+	fd->simple_ddr.ser = ser;
 	/**
 	 * src If RBP=0 {NS,RDTTYPE[3:0]}: 0_1011
 	 * Coherent copy of cacheable memory,
@@ -204,6 +204,8 @@ static inline int dpdmai_dev_set_fd_us(
 	struct rte_qdma_job **ppjob;
 	size_t iova;
 	int ret = 0, loop;
+	int ser = (qdma_vq->flags & RTE_QDMA_VQ_NO_RESPONSE) ?
+				0 : 1;
 
 	for (loop = 0; loop < nb_jobs; loop++) {
 		if (job[loop]->src & QDMA_RBP_UPPER_ADDRESS_MASK)
@@ -218,12 +220,12 @@ static inline int dpdmai_dev_set_fd_us(
 
 		if ((rbp->drbp == 1) || (rbp->srbp == 1))
 			ret = qdma_populate_fd_pci((phys_addr_t)job[loop]->src,
-						(phys_addr_t)job[loop]->dest,
-						job[loop]->len, &fd[loop], rbp);
+					(phys_addr_t)job[loop]->dest,
+					job[loop]->len, &fd[loop], rbp, ser);
 		else
 			ret = qdma_populate_fd_ddr((phys_addr_t)job[loop]->src,
-						(phys_addr_t)job[loop]->dest,
-						job[loop]->len, &fd[loop]);
+					(phys_addr_t)job[loop]->dest,
+					job[loop]->len, &fd[loop], ser);
 	}
 
 	return ret;
@@ -290,6 +292,52 @@ static uint32_t qdma_populate_sg_entry(
 	return total_len;
 }
 
+static inline int dpdmai_dev_set_multi_fd_lf_no_rsp(
+		struct qdma_virt_queue *qdma_vq,
+		struct qbman_fd *fd,
+		struct rte_qdma_job **job,
+		uint16_t nb_jobs)
+{
+	struct rte_qdma_rbp *rbp = &qdma_vq->rbp;
+	struct rte_qdma_job **ppjob;
+	uint16_t i;
+	void *elem;
+	struct qbman_fle *fle;
+	uint64_t elem_iova, fle_iova;
+
+	for (i = 0; i < nb_jobs; i++) {
+		elem = job[i]->usr_elem;
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+		elem_iova = rte_mempool_virt2iova(elem);
+#else
+		elem_iova = DPAA2_VADDR_TO_IOVA(elem);
+#endif
+
+		ppjob = (struct rte_qdma_job **)
+			((uintptr_t)(uint64_t)elem +
+			 QDMA_FLE_SINGLE_JOB_OFFSET);
+		*ppjob = job[i];
+
+		job[i]->vq_id = qdma_vq->vq_id;
+
+		fle = (struct qbman_fle *)
+			((uintptr_t)(uint64_t)elem + QDMA_FLE_FLE_OFFSET);
+		fle_iova = elem_iova + QDMA_FLE_FLE_OFFSET;
+
+		DPAA2_SET_FD_ADDR(&fd[i], fle_iova);
+		DPAA2_SET_FD_COMPOUND_FMT(&fd[i]);
+
+		memset(fle, 0, DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle) +
+				DPAA2_QDMA_MAX_SDD * sizeof(struct qdma_sdd));
+
+		dpaa2_qdma_populate_fle(fle, fle_iova, rbp,
+			job[i]->src, job[i]->dest, job[i]->len,
+			job[i]->flags, QBMAN_FLE_WORD4_FMT_SBF);
+	}
+
+	return 0;
+}
+
 static inline int dpdmai_dev_set_multi_fd_lf(
 		struct qdma_virt_queue *qdma_vq,
 		struct qbman_fd *fd,
@@ -362,10 +410,14 @@ static inline int dpdmai_dev_set_sg_fd_lf(
 	 * Get an FLE/SDD from FLE pool.
 	 * Note: IO metadata is before the FLE and SDD memory.
 	 */
-	ret = rte_mempool_get(qdma_vq->fle_pool, (void **)(&elem));
-	if (ret) {
-		DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
-		return ret;
+	if (qdma_vq->flags & RTE_QDMA_VQ_NO_RESPONSE) {
+		elem = job[0]->usr_elem;
+	} else {
+		ret = rte_mempool_get(qdma_vq->fle_pool, &elem);
+		if (ret) {
+			DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
+			return ret;
+		}
 	}
 
 #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
@@ -391,7 +443,8 @@ static inline int dpdmai_dev_set_sg_fd_lf(
 
 	DPAA2_SET_FD_ADDR(fd, fle_iova);
 	DPAA2_SET_FD_COMPOUND_FMT(fd);
-	DPAA2_SET_FD_FRC(fd, QDMA_SER_CTX);
+	if (!(qdma_vq->flags & RTE_QDMA_VQ_NO_RESPONSE))
+		DPAA2_SET_FD_FRC(fd, QDMA_SER_CTX);
 
 	/* Populate FLE */
 	if (likely(nb_jobs > 1)) {
@@ -1282,7 +1335,12 @@ dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
 			qdma_dev->vqs[i].set_fd = dpdmai_dev_set_sg_fd_lf;
 			qdma_dev->vqs[i].get_job = dpdmai_dev_get_sg_job_lf;
 		} else {
-			qdma_dev->vqs[i].set_fd = dpdmai_dev_set_multi_fd_lf;
+			if (q_config->flags & RTE_QDMA_VQ_NO_RESPONSE)
+				qdma_dev->vqs[i].set_fd =
+					dpdmai_dev_set_multi_fd_lf_no_rsp;
+			else
+				qdma_dev->vqs[i].set_fd =
+					dpdmai_dev_set_multi_fd_lf;
 			qdma_dev->vqs[i].get_job = dpdmai_dev_get_single_job_lf;
 		}
 	} else {
diff --git a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
index 3cd41676c..cc1ac2545 100644
--- a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
@@ -54,6 +54,8 @@ enum {
 
 #define RTE_QDMA_VQ_FD_SG_FORMAT		(1ULL << 2)
 
+#define RTE_QDMA_VQ_NO_RESPONSE			(1ULL << 3)
+
 /** States if the source addresses is physical. */
 #define RTE_QDMA_JOB_SRC_PHY		(1ULL)
 
@@ -154,6 +156,11 @@ struct rte_qdma_job {
 	 */
 	uint16_t status;
 	uint16_t vq_id;
+	/**
+	 * FLE pool element maintained by user, in case no qDMA response.
+	 * Note: the address must be allocated from DPDK memory pool.
+	 */
+	void *usr_elem;
 };
 
 struct rte_qdma_enqdeq {
-- 
2.17.1


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [dpdk-dev] [PATCH v2 1/7] raw/dpaa2_qdma: change DPAA2 QDMA APIs to rawdev ops
  2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 1/7] raw/dpaa2_qdma: change DPAA2 QDMA APIs to rawdev ops Gagandeep Singh
@ 2020-10-19 11:45     ` Thomas Monjalon
  0 siblings, 0 replies; 23+ messages in thread
From: Thomas Monjalon @ 2020-10-19 11:45 UTC (permalink / raw)
  To: Gagandeep Singh
  Cc: dev, nipun.gupta, hemant.agrawal, akhil.goyal, bruce.richardson, mdr

15/10/2020 11:47, Gagandeep Singh:
> dpaa2_qdma was partially using direct pmd APIs.
> This patch changes that and adapt the driver to use
> more of the rawdev APIs
> 
> Signed-off-by: Gagandeep Singh <g.singh@nxp.com>
> ---
>  drivers/raw/dpaa2_qdma/dpaa2_qdma.c         | 339 ++++++++++----------
>  drivers/raw/dpaa2_qdma/dpaa2_qdma.h         |   3 +-
>  drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h | 207 ++----------
>  3 files changed, 195 insertions(+), 354 deletions(-)

You missed removing all the symbols from the .map.

Note also that it is an unannounced API breakage.
I take the decision to merge it anyway.
We will have to be stricter next time.

Applied



^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2020-10-19 11:45 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-07  9:25 [dpdk-dev] [PATCH 0/7] raw/dpaa2_qdma: driver enhancement Gagandeep Singh
2020-09-07  9:25 ` [dpdk-dev] [PATCH 1/7] raw/dpaa2_qdma: change DPAA2 QDMA APIs to rawdev ops Gagandeep Singh
2020-09-25 10:53   ` Hemant Agrawal
2020-10-06 21:36   ` Thomas Monjalon
2020-10-14 10:27     ` Gagandeep Singh
2020-10-14 11:37       ` Bruce Richardson
2020-10-14 12:43         ` Ferruh Yigit
2020-09-07  9:25 ` [dpdk-dev] [PATCH 2/7] raw/dpaa2_qdma: memset to only required memory Gagandeep Singh
2020-09-07  9:26 ` [dpdk-dev] [PATCH 3/7] raw/dpaa2_qdma: refactor the code Gagandeep Singh
2020-09-07  9:26 ` [dpdk-dev] [PATCH 4/7] raw/dpaa2_qdma: optimize IOVA conversion Gagandeep Singh
2020-09-07  9:26 ` [dpdk-dev] [PATCH 5/7] raw/dpaa2_qdma: support scatter gather in enqueue Gagandeep Singh
2020-09-07  9:26 ` [dpdk-dev] [PATCH 6/7] raw/dpaa2_qdma: support FLE pool per queue Gagandeep Singh
2020-09-07  9:26 ` [dpdk-dev] [PATCH 7/7] raw/dpaa2_qdma: support enqueue without response wait Gagandeep Singh
2020-09-25 10:54 ` [dpdk-dev] [PATCH 0/7] raw/dpaa2_qdma: driver enhancement Hemant Agrawal
2020-10-15  9:47 ` [dpdk-dev] [PATCH v2 " Gagandeep Singh
2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 1/7] raw/dpaa2_qdma: change DPAA2 QDMA APIs to rawdev ops Gagandeep Singh
2020-10-19 11:45     ` Thomas Monjalon
2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 2/7] raw/dpaa2_qdma: memset to only required memory Gagandeep Singh
2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 3/7] raw/dpaa2_qdma: refactor the code Gagandeep Singh
2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 4/7] raw/dpaa2_qdma: optimize IOVA conversion Gagandeep Singh
2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 5/7] raw/dpaa2_qdma: support scatter gather in enqueue Gagandeep Singh
2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 6/7] raw/dpaa2_qdma: support FLE pool per queue Gagandeep Singh
2020-10-15  9:47   ` [dpdk-dev] [PATCH v2 7/7] raw/dpaa2_qdma: support enqueue without response wait Gagandeep Singh

DPDK patches and discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ https://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git