From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 782CEA04DB; Thu, 15 Oct 2020 11:48:50 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 2DCC41DDD7; Thu, 15 Oct 2020 11:47:29 +0200 (CEST) Received: from inva021.nxp.com (inva021.nxp.com [92.121.34.21]) by dpdk.org (Postfix) with ESMTP id C83E51DD7E for ; Thu, 15 Oct 2020 11:47:21 +0200 (CEST) Received: from inva021.nxp.com (localhost [127.0.0.1]) by inva021.eu-rdc02.nxp.com (Postfix) with ESMTP id AA4A4200584; Thu, 15 Oct 2020 11:47:21 +0200 (CEST) Received: from invc005.ap-rdc01.nxp.com (invc005.ap-rdc01.nxp.com [165.114.16.14]) by inva021.eu-rdc02.nxp.com (Postfix) with ESMTP id 2F799200582; Thu, 15 Oct 2020 11:47:19 +0200 (CEST) Received: from lsv11086.swis.cn-sha01.nxp.com (lsv11086.swis.cn-sha01.nxp.com [92.121.210.87]) by invc005.ap-rdc01.nxp.com (Postfix) with ESMTP id 766D04031C; Thu, 15 Oct 2020 11:47:15 +0200 (CEST) From: Gagandeep Singh To: dev@dpdk.org, nipun.gupta@nxp.com, hemant.agrawal@nxp.com Cc: thomas@monjalon.net, Jun Yang Date: Thu, 15 Oct 2020 17:47:06 +0800 Message-Id: <1602755228-25535-6-git-send-email-g.singh@nxp.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1602755228-25535-1-git-send-email-g.singh@nxp.com> References: <1599470764-30569-1-git-send-email-g.singh@nxp.com> <1602755228-25535-1-git-send-email-g.singh@nxp.com> X-Virus-Scanned: ClamAV using ClamSMTP Subject: [dpdk-dev] [PATCH v2 5/7] raw/dpaa2_qdma: support scatter gather in enqueue X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: Jun Yang This patch add support to add Scatter Gather support for different jobs for qdma queues. It also supports gathering multiple enqueue jobs into SG enqueue job(s). Signed-off-by: Jun Yang --- drivers/bus/fslmc/portal/dpaa2_hw_pvt.h | 18 +- drivers/raw/dpaa2_qdma/dpaa2_qdma.c | 398 ++++++++++++++++---- drivers/raw/dpaa2_qdma/dpaa2_qdma.h | 75 +++- drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h | 2 + 4 files changed, 412 insertions(+), 81 deletions(-) diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h index 2dd53c63b..ac24f0145 100644 --- a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h +++ b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h @@ -210,12 +210,28 @@ struct dpaa2_dpcon_dev { }; /* Refer to Table 7-3 in SEC BG */ +#define QBMAN_FLE_WORD4_FMT_SBF 0x0 /* Single buffer frame */ +#define QBMAN_FLE_WORD4_FMT_SGE 0x2 /* Scatter gather frame */ + +struct qbman_fle_word4 { + uint32_t bpid:14; /* Frame buffer pool ID */ + uint32_t ivp:1; /* Invalid Pool ID. */ + uint32_t bmt:1; /* Bypass Memory Translation */ + uint32_t offset:12; /* Frame offset */ + uint32_t fmt:2; /* Frame Format */ + uint32_t sl:1; /* Short Length */ + uint32_t f:1; /* Final bit */ +}; + struct qbman_fle { uint32_t addr_lo; uint32_t addr_hi; uint32_t length; /* FMT must be 00, MSB is final bit */ - uint32_t fin_bpid_offset; + union { + uint32_t fin_bpid_offset; + struct qbman_fle_word4 word4; + }; uint32_t frc; uint32_t reserved[3]; /* Not used currently */ }; diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c index 94dc7886a..7b755cea7 100644 --- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c +++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c @@ -116,17 +116,21 @@ qdma_populate_fd_ddr(phys_addr_t src, phys_addr_t dest, static void dpaa2_qdma_populate_fle(struct qbman_fle *fle, + uint64_t fle_iova, struct rte_qdma_rbp *rbp, uint64_t src, uint64_t dest, - size_t len, uint32_t flags) + size_t len, uint32_t flags, uint32_t fmt) { struct qdma_sdd *sdd; + uint64_t sdd_iova; - sdd = (struct qdma_sdd *)((uint8_t *)(fle) + - (DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle))); + sdd = (struct qdma_sdd *) + ((uintptr_t)(uint64_t)fle - QDMA_FLE_FLE_OFFSET + + QDMA_FLE_SDD_OFFSET); + sdd_iova = fle_iova - QDMA_FLE_FLE_OFFSET + QDMA_FLE_SDD_OFFSET; /* first frame list to source descriptor */ - DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(sdd)); + DPAA2_SET_FLE_ADDR(fle, sdd_iova); DPAA2_SET_FLE_LEN(fle, (2 * (sizeof(struct qdma_sdd)))); /* source and destination descriptor */ @@ -164,20 +168,26 @@ dpaa2_qdma_populate_fle(struct qbman_fle *fle, /* source frame list to source buffer */ if (flags & RTE_QDMA_JOB_SRC_PHY) { DPAA2_SET_FLE_ADDR(fle, src); +#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA DPAA2_SET_FLE_BMT(fle); +#endif } else { DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(src)); } + fle->word4.fmt = fmt; DPAA2_SET_FLE_LEN(fle, len); fle++; /* destination frame list to destination buffer */ if (flags & RTE_QDMA_JOB_DEST_PHY) { +#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA DPAA2_SET_FLE_BMT(fle); +#endif DPAA2_SET_FLE_ADDR(fle, dest); } else { DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(dest)); } + fle->word4.fmt = fmt; DPAA2_SET_FLE_LEN(fle, len); /* Final bit: 1, for last frame list */ @@ -187,44 +197,169 @@ dpaa2_qdma_populate_fle(struct qbman_fle *fle, static inline int dpdmai_dev_set_fd_us( struct qdma_virt_queue *qdma_vq, struct qbman_fd *fd, - struct rte_qdma_job *job) + struct rte_qdma_job **job, + uint16_t nb_jobs) { struct rte_qdma_rbp *rbp = &qdma_vq->rbp; struct rte_qdma_job **ppjob; size_t iova; - int ret = 0; + int ret = 0, loop; + + for (loop = 0; loop < nb_jobs; loop++) { + if (job[loop]->src & QDMA_RBP_UPPER_ADDRESS_MASK) + iova = (size_t)job[loop]->dest; + else + iova = (size_t)job[loop]->src; + + /* Set the metadata */ + job[loop]->vq_id = qdma_vq->vq_id; + ppjob = (struct rte_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1; + *ppjob = job[loop]; + + if ((rbp->drbp == 1) || (rbp->srbp == 1)) + ret = qdma_populate_fd_pci((phys_addr_t)job[loop]->src, + (phys_addr_t)job[loop]->dest, + job[loop]->len, &fd[loop], rbp); + else + ret = qdma_populate_fd_ddr((phys_addr_t)job[loop]->src, + (phys_addr_t)job[loop]->dest, + job[loop]->len, &fd[loop]); + } - if (job->src & QDMA_RBP_UPPER_ADDRESS_MASK) - iova = (size_t)job->dest; - else - iova = (size_t)job->src; + return ret; +} - /* Set the metadata */ - job->vq_id = qdma_vq->vq_id; - ppjob = (struct rte_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1; - *ppjob = job; +static uint32_t qdma_populate_sg_entry( + struct rte_qdma_job **jobs, + struct qdma_sg_entry *src_sge, + struct qdma_sg_entry *dst_sge, + uint16_t nb_jobs) +{ + uint16_t i; + uint32_t total_len = 0; + uint64_t iova; + + for (i = 0; i < nb_jobs; i++) { + /* source SG */ + if (likely(jobs[i]->flags & RTE_QDMA_JOB_SRC_PHY)) { + src_sge->addr_lo = (uint32_t)jobs[i]->src; + src_sge->addr_hi = (jobs[i]->src >> 32); + } else { + iova = DPAA2_VADDR_TO_IOVA(jobs[i]->src); + src_sge->addr_lo = (uint32_t)iova; + src_sge->addr_hi = iova >> 32; + } + src_sge->data_len.data_len_sl0 = jobs[i]->len; + src_sge->ctrl.sl = QDMA_SG_SL_LONG; + src_sge->ctrl.fmt = QDMA_SG_FMT_SDB; +#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA + src_sge->ctrl.bmt = QDMA_SG_BMT_ENABLE; +#else + src_sge->ctrl.bmt = QDMA_SG_BMT_DISABLE; +#endif + /* destination SG */ + if (likely(jobs[i]->flags & RTE_QDMA_JOB_DEST_PHY)) { + dst_sge->addr_lo = (uint32_t)jobs[i]->dest; + dst_sge->addr_hi = (jobs[i]->dest >> 32); + } else { + iova = DPAA2_VADDR_TO_IOVA(jobs[i]->dest); + dst_sge->addr_lo = (uint32_t)iova; + dst_sge->addr_hi = iova >> 32; + } + dst_sge->data_len.data_len_sl0 = jobs[i]->len; + dst_sge->ctrl.sl = QDMA_SG_SL_LONG; + dst_sge->ctrl.fmt = QDMA_SG_FMT_SDB; +#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA + dst_sge->ctrl.bmt = QDMA_SG_BMT_ENABLE; +#else + dst_sge->ctrl.bmt = QDMA_SG_BMT_DISABLE; +#endif + total_len += jobs[i]->len; - if ((rbp->drbp == 1) || (rbp->srbp == 1)) - ret = qdma_populate_fd_pci((phys_addr_t) job->src, - (phys_addr_t) job->dest, - job->len, fd, rbp); - else - ret = qdma_populate_fd_ddr((phys_addr_t) job->src, - (phys_addr_t) job->dest, - job->len, fd); - return ret; + if (i == (nb_jobs - 1)) { + src_sge->ctrl.f = QDMA_SG_F; + dst_sge->ctrl.f = QDMA_SG_F; + } else { + src_sge->ctrl.f = 0; + dst_sge->ctrl.f = 0; + } + src_sge++; + dst_sge++; + } + + return total_len; } -static inline int dpdmai_dev_set_fd_lf( + +static inline int dpdmai_dev_set_multi_fd_lf( struct qdma_virt_queue *qdma_vq, struct qbman_fd *fd, - struct rte_qdma_job *job) + struct rte_qdma_job **job, + uint16_t nb_jobs) { struct rte_qdma_rbp *rbp = &qdma_vq->rbp; struct rte_qdma_job **ppjob; - void *elem; + uint16_t i; + int ret; + struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq); + void *elem[RTE_QDMA_BURST_NB_MAX]; struct qbman_fle *fle; uint64_t elem_iova, fle_iova; - int ret = 0; + + ret = rte_mempool_get_bulk(qdma_dev->fle_pool, elem, nb_jobs); + if (ret) { + DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE"); + return ret; + } + + for (i = 0; i < nb_jobs; i++) { +#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA + elem_iova = rte_mempool_virt2iova(elem[i]); +#else + elem_iova = DPAA2_VADDR_TO_IOVA(elem[i]); +#endif + + *((uint16_t *) + ((uintptr_t)(uint64_t)elem[i] + QDMA_FLE_JOB_NB_OFFSET)) = 1; + + ppjob = (struct rte_qdma_job **) + ((uintptr_t)(uint64_t)elem[i] + QDMA_FLE_JOBS_OFFSET); + *ppjob = job[i]; + + job[i]->vq_id = qdma_vq->vq_id; + + fle = (struct qbman_fle *) + ((uintptr_t)(uint64_t)elem[i] + QDMA_FLE_FLE_OFFSET); + fle_iova = elem_iova + QDMA_FLE_FLE_OFFSET; + + DPAA2_SET_FD_ADDR(&fd[i], fle_iova); + DPAA2_SET_FD_COMPOUND_FMT(&fd[i]); + DPAA2_SET_FD_FRC(&fd[i], QDMA_SER_CTX); + + memset(fle, 0, DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle) + + DPAA2_QDMA_MAX_SDD * sizeof(struct qdma_sdd)); + + dpaa2_qdma_populate_fle(fle, fle_iova, rbp, + job[i]->src, job[i]->dest, job[i]->len, + job[i]->flags, QBMAN_FLE_WORD4_FMT_SBF); + } + + return 0; +} + +static inline int dpdmai_dev_set_sg_fd_lf( + struct qdma_virt_queue *qdma_vq, + struct qbman_fd *fd, + struct rte_qdma_job **job, + uint16_t nb_jobs) +{ + struct rte_qdma_rbp *rbp = &qdma_vq->rbp; + struct rte_qdma_job **ppjob; + void *elem; + struct qbman_fle *fle; + uint64_t elem_iova, fle_iova, src, dst; + int ret = 0, i; + struct qdma_sg_entry *src_sge, *dst_sge; + uint32_t len, fmt, flags; struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq); /* @@ -244,10 +379,15 @@ static inline int dpdmai_dev_set_fd_lf( #endif /* Set the metadata */ - job->vq_id = qdma_vq->vq_id; + /* Save job context. */ + *((uint16_t *) + ((uintptr_t)(uint64_t)elem + QDMA_FLE_JOB_NB_OFFSET)) = nb_jobs; ppjob = (struct rte_qdma_job **) - ((uintptr_t)(uint64_t)elem + QDMA_FLE_JOB_OFFSET); - *ppjob = job; + ((uintptr_t)(uint64_t)elem + QDMA_FLE_JOBS_OFFSET); + for (i = 0; i < nb_jobs; i++) + ppjob[i] = job[i]; + + ppjob[0]->vq_id = qdma_vq->vq_id; fle = (struct qbman_fle *) ((uintptr_t)(uint64_t)elem + QDMA_FLE_FLE_OFFSET); @@ -258,9 +398,29 @@ static inline int dpdmai_dev_set_fd_lf( DPAA2_SET_FD_FRC(fd, QDMA_SER_CTX); /* Populate FLE */ - memset(fle, 0, QDMA_FLE_POOL_SIZE); - dpaa2_qdma_populate_fle(fle, rbp, job->src, job->dest, - job->len, job->flags); + if (likely(nb_jobs > 1)) { + src_sge = (struct qdma_sg_entry *) + ((uintptr_t)(uint64_t)elem + QDMA_FLE_SG_ENTRY_OFFSET); + dst_sge = src_sge + DPAA2_QDMA_MAX_SG_NB; + src = elem_iova + QDMA_FLE_SG_ENTRY_OFFSET; + dst = src + + DPAA2_QDMA_MAX_SG_NB * sizeof(struct qdma_sg_entry); + len = qdma_populate_sg_entry(job, src_sge, dst_sge, nb_jobs); + fmt = QBMAN_FLE_WORD4_FMT_SGE; + flags = RTE_QDMA_JOB_SRC_PHY | RTE_QDMA_JOB_DEST_PHY; + } else { + src = job[0]->src; + dst = job[0]->dest; + len = job[0]->len; + fmt = QBMAN_FLE_WORD4_FMT_SBF; + flags = job[0]->flags; + } + + memset(fle, 0, DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle) + + DPAA2_QDMA_MAX_SDD * sizeof(struct qdma_sdd)); + + dpaa2_qdma_populate_fle(fle, fle_iova, rbp, + src, dst, len, flags, fmt); return 0; } @@ -268,7 +428,7 @@ static inline int dpdmai_dev_set_fd_lf( static inline uint16_t dpdmai_dev_get_job_us( struct qdma_virt_queue *qdma_vq __rte_unused, const struct qbman_fd *fd, - struct rte_qdma_job **job) + struct rte_qdma_job **job, uint16_t *nb_jobs) { uint16_t vqid; size_t iova; @@ -286,6 +446,7 @@ static inline uint16_t dpdmai_dev_get_job_us( (*job)->status = (fd->simple_pci.acc_err << 8) | (fd->simple_pci.error); vqid = (*job)->vq_id; + *nb_jobs = 1; return vqid; } @@ -293,12 +454,12 @@ static inline uint16_t dpdmai_dev_get_job_us( static inline uint16_t dpdmai_dev_get_job_lf( struct qdma_virt_queue *qdma_vq, const struct qbman_fd *fd, - struct rte_qdma_job **job) + struct rte_qdma_job **job, + uint16_t *nb_jobs) { - void *elem; struct qbman_fle *fle; - struct rte_qdma_job **ppjob; - uint16_t vqid; + struct rte_qdma_job **ppjob = NULL; + uint16_t i, status; struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq); /* @@ -307,20 +468,24 @@ static inline uint16_t dpdmai_dev_get_job_lf( */ fle = (struct qbman_fle *) DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd)); - elem = (void *)((uintptr_t)(uint64_t)fle - QDMA_FLE_FLE_OFFSET); + *nb_jobs = *((uint16_t *)((uintptr_t)(uint64_t)fle - + QDMA_FLE_FLE_OFFSET + QDMA_FLE_JOB_NB_OFFSET)); + status = (DPAA2_GET_FD_ERR(fd) << 8) | (DPAA2_GET_FD_FRC(fd) & 0xFF); - ppjob = (struct rte_qdma_job **) - ((uintptr_t)(uint64_t)elem + QDMA_FLE_JOB_OFFSET); + ppjob = (struct rte_qdma_job **)((uintptr_t)(uint64_t)fle - + QDMA_FLE_FLE_OFFSET + QDMA_FLE_JOBS_OFFSET); - *job = (struct rte_qdma_job *)*ppjob; - (*job)->status = (DPAA2_GET_FD_ERR(fd) << 8) | - (DPAA2_GET_FD_FRC(fd) & 0xFF); - vqid = (*job)->vq_id; + for (i = 0; i < (*nb_jobs); i++) { + job[i] = ppjob[i]; + job[i]->status = status; + } /* Free FLE to the pool */ - rte_mempool_put(qdma_dev->fle_pool, elem); + rte_mempool_put(qdma_dev->fle_pool, + (void *) + ((uintptr_t)(uint64_t)fle - QDMA_FLE_FLE_OFFSET)); - return vqid; + return job[0]->vq_id; } /* Function to receive a QDMA job for a given device and queue*/ @@ -344,9 +509,16 @@ dpdmai_dev_dequeue_multijob_prefetch( uint8_t status, pending; uint8_t num_rx = 0; const struct qbman_fd *fd; - uint16_t vqid; + uint16_t vqid, num_rx_ret; int ret, pull_size; + if (qdma_vq->flags & RTE_QDMA_VQ_FD_SG_FORMAT) { + /** Make sure there are enough space to get jobs.*/ + if (unlikely(nb_jobs < DPAA2_QDMA_MAX_SG_NB)) + return -EINVAL; + nb_jobs = 1; + } + if (unlikely(!DPAA2_PER_LCORE_DPIO)) { ret = dpaa2_affine_qbman_swp(); if (ret) { @@ -440,12 +612,13 @@ dpdmai_dev_dequeue_multijob_prefetch( } fd = qbman_result_DQ_fd(dq_storage); - vqid = qdma_vq->get_job(qdma_vq, fd, &job[num_rx]); + vqid = qdma_vq->get_job(qdma_vq, fd, &job[num_rx], + &num_rx_ret); if (vq_id) vq_id[num_rx] = vqid; dq_storage++; - num_rx++; + num_rx += num_rx_ret; } while (pending); if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) { @@ -490,8 +663,17 @@ dpdmai_dev_dequeue_multijob_no_prefetch( uint8_t status, pending; uint8_t num_rx = 0; const struct qbman_fd *fd; - uint16_t vqid; - int ret, next_pull = nb_jobs, num_pulled = 0; + uint16_t vqid, num_rx_ret; + int ret, next_pull, num_pulled = 0; + + if (qdma_vq->flags & RTE_QDMA_VQ_FD_SG_FORMAT) { + /** Make sure there are enough space to get jobs.*/ + if (unlikely(nb_jobs < DPAA2_QDMA_MAX_SG_NB)) + return -EINVAL; + nb_jobs = 1; + } + + next_pull = nb_jobs; if (unlikely(!DPAA2_PER_LCORE_DPIO)) { ret = dpaa2_affine_qbman_swp(); @@ -560,12 +742,13 @@ dpdmai_dev_dequeue_multijob_no_prefetch( } fd = qbman_result_DQ_fd(dq_storage); - vqid = qdma_vq->get_job(qdma_vq, fd, &job[num_rx]); + vqid = qdma_vq->get_job(qdma_vq, fd, + &job[num_rx], &num_rx_ret); if (vq_id) vq_id[num_rx] = vqid; dq_storage++; - num_rx++; + num_rx += num_rx_ret; num_pulled++; } while (pending); @@ -592,6 +775,7 @@ dpdmai_dev_enqueue_multi( int ret; uint32_t num_to_send = 0; uint16_t num_tx = 0; + uint32_t enqueue_loop, retry_count, loop; if (unlikely(!DPAA2_PER_LCORE_DPIO)) { ret = dpaa2_affine_qbman_swp(); @@ -612,45 +796,87 @@ dpdmai_dev_enqueue_multi( qbman_eq_desc_set_no_orp(&eqdesc, 0); qbman_eq_desc_set_response(&eqdesc, 0, 0); + if (qdma_vq->flags & RTE_QDMA_VQ_FD_SG_FORMAT) { + uint16_t fd_nb; + uint16_t sg_entry_nb = nb_jobs > DPAA2_QDMA_MAX_SG_NB ? + DPAA2_QDMA_MAX_SG_NB : nb_jobs; + uint16_t job_idx = 0; + uint16_t fd_sg_nb[8]; + uint16_t nb_jobs_ret = 0; + + if (nb_jobs % DPAA2_QDMA_MAX_SG_NB) + fd_nb = nb_jobs / DPAA2_QDMA_MAX_SG_NB + 1; + else + fd_nb = nb_jobs / DPAA2_QDMA_MAX_SG_NB; + + memset(&fd[0], 0, sizeof(struct qbman_fd) * fd_nb); + + for (loop = 0; loop < fd_nb; loop++) { + ret = qdma_vq->set_fd(qdma_vq, &fd[loop], &job[job_idx], + sg_entry_nb); + if (unlikely(ret < 0)) + return 0; + fd_sg_nb[loop] = sg_entry_nb; + nb_jobs -= sg_entry_nb; + job_idx += sg_entry_nb; + sg_entry_nb = nb_jobs > DPAA2_QDMA_MAX_SG_NB ? + DPAA2_QDMA_MAX_SG_NB : nb_jobs; + } + + /* Enqueue the packet to the QBMAN */ + enqueue_loop = 0; retry_count = 0; + + while (enqueue_loop < fd_nb) { + ret = qbman_swp_enqueue_multiple(swp, + &eqdesc, &fd[enqueue_loop], + NULL, fd_nb - enqueue_loop); + if (unlikely(ret < 0)) { + retry_count++; + if (retry_count > DPAA2_MAX_TX_RETRY_COUNT) + return nb_jobs_ret; + } else { + for (loop = 0; loop < (uint32_t)ret; loop++) + nb_jobs_ret += + fd_sg_nb[enqueue_loop + loop]; + enqueue_loop += ret; + retry_count = 0; + } + } + + return nb_jobs_ret; + } + memset(fd, 0, nb_jobs * sizeof(struct qbman_fd)); while (nb_jobs > 0) { - uint32_t loop; - num_to_send = (nb_jobs > dpaa2_eqcr_size) ? dpaa2_eqcr_size : nb_jobs; - for (loop = 0; loop < num_to_send; loop++) { - ret = qdma_vq->set_fd(qdma_vq, &fd[loop], job[num_tx]); - if (ret < 0) { - /* Set nb_jobs to loop, so outer while loop - * breaks out. - */ - nb_jobs = loop; - break; - } - - num_tx++; - } + ret = qdma_vq->set_fd(qdma_vq, &fd[num_tx], + &job[num_tx], num_to_send); + if (unlikely(ret < 0)) + break; /* Enqueue the packet to the QBMAN */ - uint32_t enqueue_loop = 0, retry_count = 0; + enqueue_loop = 0; retry_count = 0; + loop = num_to_send; while (enqueue_loop < loop) { ret = qbman_swp_enqueue_multiple(swp, &eqdesc, - &fd[enqueue_loop], + &fd[num_tx + enqueue_loop], NULL, loop - enqueue_loop); if (unlikely(ret < 0)) { retry_count++; if (retry_count > DPAA2_MAX_TX_RETRY_COUNT) - return num_tx - (loop - enqueue_loop); + return num_tx; } else { enqueue_loop += ret; retry_count = 0; } } + num_tx += num_to_send; nb_jobs -= loop; } return num_tx; @@ -977,6 +1203,21 @@ dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev, return -ENODEV; } + if (q_config->flags & RTE_QDMA_VQ_FD_SG_FORMAT) { + if (!(q_config->flags & RTE_QDMA_VQ_EXCLUSIVE_PQ)) { + DPAA2_QDMA_ERR( + "qDMA SG format only supports physical queue!"); + rte_spinlock_unlock(&qdma_dev->lock); + return -ENODEV; + } + if (!(q_config->flags & RTE_QDMA_VQ_FD_LONG_FORMAT)) { + DPAA2_QDMA_ERR( + "qDMA SG format only supports long FD format!"); + rte_spinlock_unlock(&qdma_dev->lock); + return -ENODEV; + } + } + if (q_config->flags & RTE_QDMA_VQ_EXCLUSIVE_PQ) { /* Allocate HW queue for a VQ */ qdma_dev->vqs[i].hw_queue = alloc_hw_queue(q_config->lcore_id); @@ -1007,12 +1248,16 @@ dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev, return -ENODEV; } + qdma_dev->vqs[i].flags = q_config->flags; qdma_dev->vqs[i].in_use = 1; qdma_dev->vqs[i].lcore_id = q_config->lcore_id; memset(&qdma_dev->vqs[i].rbp, 0, sizeof(struct rte_qdma_rbp)); if (q_config->flags & RTE_QDMA_VQ_FD_LONG_FORMAT) { - qdma_dev->vqs[i].set_fd = dpdmai_dev_set_fd_lf; + if (q_config->flags & RTE_QDMA_VQ_FD_SG_FORMAT) + qdma_dev->vqs[i].set_fd = dpdmai_dev_set_sg_fd_lf; + else + qdma_dev->vqs[i].set_fd = dpdmai_dev_set_multi_fd_lf; qdma_dev->vqs[i].get_job = dpdmai_dev_get_job_lf; } else { qdma_dev->vqs[i].set_fd = dpdmai_dev_set_fd_us; @@ -1087,6 +1332,12 @@ dpaa2_qdma_dequeue(struct rte_rawdev *rawdev, int ret = 0, i; unsigned int ring_count; + if (qdma_vq->flags & RTE_QDMA_VQ_FD_SG_FORMAT) { + /** Make sure there are enough space to get jobs.*/ + if (unlikely(nb_jobs < DPAA2_QDMA_MAX_SG_NB)) + return -EINVAL; + } + /* Return error in case of wrong lcore_id */ if (rte_lcore_id() != (unsigned int)(qdma_vq->lcore_id)) { DPAA2_QDMA_WARN("QDMA dequeue for vqid %d on wrong core", @@ -1098,7 +1349,8 @@ dpaa2_qdma_dequeue(struct rte_rawdev *rawdev, if (qdma_vq->num_enqueues == qdma_vq->num_dequeues) return 0; - if (qdma_vq->num_enqueues < (qdma_vq->num_dequeues + nb_jobs)) + if (!(qdma_vq->flags & RTE_QDMA_VQ_FD_SG_FORMAT) && + qdma_vq->num_enqueues < (qdma_vq->num_dequeues + nb_jobs)) nb_jobs = (qdma_vq->num_enqueues - qdma_vq->num_dequeues); if (qdma_vq->exclusive_hw_queue) { diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h index ff7743fb5..43a01d56f 100644 --- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h +++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h @@ -11,16 +11,37 @@ struct rte_qdma_job; #define DPAA2_QDMA_MAX_FLE 3 #define DPAA2_QDMA_MAX_SDD 2 +#define DPAA2_QDMA_MAX_SG_NB 64 + #define DPAA2_DPDMAI_MAX_QUEUES 8 -/** FLE pool size: 3 Frame list + 2 source/destination descriptor */ -#define QDMA_FLE_POOL_SIZE (sizeof(struct rte_qdma_job *) + \ +/** FLE pool size: job number(uint64_t) + + * 3 Frame list + 2 source/destination descriptor + + * 32 (src + dst) sg entries + 32 jobs pointers. + */ + +#define QDMA_FLE_POOL_SIZE (sizeof(uint64_t) + \ sizeof(struct qbman_fle) * DPAA2_QDMA_MAX_FLE + \ - sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD) + sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD + \ + sizeof(struct qdma_sg_entry) * DPAA2_QDMA_MAX_SG_NB * 2 + \ + sizeof(struct rte_qdma_job *) * DPAA2_QDMA_MAX_SG_NB) + +#define QDMA_FLE_JOB_NB_OFFSET 0 -#define QDMA_FLE_JOB_OFFSET 0 #define QDMA_FLE_FLE_OFFSET \ - (QDMA_FLE_JOB_OFFSET + sizeof(struct rte_qdma_job *)) + (QDMA_FLE_JOB_NB_OFFSET + sizeof(uint64_t)) + +#define QDMA_FLE_SDD_OFFSET \ + (QDMA_FLE_FLE_OFFSET + \ + sizeof(struct qbman_fle) * DPAA2_QDMA_MAX_FLE) + +#define QDMA_FLE_SG_ENTRY_OFFSET \ + (QDMA_FLE_SDD_OFFSET + \ + sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD) + +#define QDMA_FLE_JOBS_OFFSET \ + (QDMA_FLE_SG_ENTRY_OFFSET + \ + sizeof(struct qdma_sg_entry) * DPAA2_QDMA_MAX_SG_NB * 2) /** FLE pool cache size */ #define QDMA_FLE_CACHE_SIZE(_num) (_num/(RTE_MAX_LCORE * 2)) @@ -90,10 +111,12 @@ struct qdma_virt_queue; typedef uint16_t (qdma_get_job_t)(struct qdma_virt_queue *qdma_vq, const struct qbman_fd *fd, - struct rte_qdma_job **job); + struct rte_qdma_job **job, + uint16_t *nb_jobs); typedef int (qdma_set_fd_t)(struct qdma_virt_queue *qdma_vq, struct qbman_fd *fd, - struct rte_qdma_job *job); + struct rte_qdma_job **job, + uint16_t nb_jobs); typedef int (qdma_dequeue_multijob_t)( struct qdma_virt_queue *qdma_vq, @@ -126,6 +149,7 @@ struct qdma_virt_queue { uint64_t num_dequeues; uint16_t vq_id; + uint32_t flags; qdma_set_fd_t *set_fd; qdma_get_job_t *get_job; @@ -191,6 +215,43 @@ struct qdma_sdd { }; } __rte_packed; +#define QDMA_SG_FMT_SDB 0x0 /* single data buffer */ +#define QDMA_SG_FMT_FDS 0x1 /* frame data section */ +#define QDMA_SG_FMT_SGTE 0x2 /* SGT extension */ +#define QDMA_SG_SL_SHORT 0x1 /* short length */ +#define QDMA_SG_SL_LONG 0x0 /* long length */ +#define QDMA_SG_F 0x1 /* last sg entry */ +#define QDMA_SG_BMT_ENABLE 0x1 +#define QDMA_SG_BMT_DISABLE 0x0 + +struct qdma_sg_entry { + uint32_t addr_lo; /* address 0:31 */ + uint32_t addr_hi:17; /* address 32:48 */ + uint32_t rsv:15; + union { + uint32_t data_len_sl0; /* SL=0, the long format */ + struct { + uint32_t len:17; /* SL=1, the short format */ + uint32_t reserve:3; + uint32_t sf:1; + uint32_t sr:1; + uint32_t size:10; /* buff size */ + } data_len_sl1; + } data_len; /* AVAIL_LENGTH */ + union { + uint32_t ctrl_fields; + struct { + uint32_t bpid:14; + uint32_t ivp:1; + uint32_t bmt:1; + uint32_t offset:12; + uint32_t fmt:2; + uint32_t sl:1; + uint32_t f:1; + } ctrl; + }; +} __attribute__((__packed__)); + /** Represents a DPDMAI raw device */ struct dpaa2_dpdmai_dev { /** Pointer to Next device instance */ diff --git a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h index ff4fc1d6c..cfec303c8 100644 --- a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h +++ b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h @@ -52,6 +52,8 @@ enum { #define RTE_QDMA_VQ_FD_LONG_FORMAT (1ULL << 1) +#define RTE_QDMA_VQ_FD_SG_FORMAT (1ULL << 2) + /** States if the source addresses is physical. */ #define RTE_QDMA_JOB_SRC_PHY (1ULL) -- 2.17.1