DPDK patches and discussions
 help / color / mirror / Atom feed
* [PATCH v2 00/10] acc baseband PMD fix and updates for 24.11
@ 2024-10-03 20:49 Hernan Vargas
  2024-10-03 20:49 ` [PATCH v2 01/10] baseband/acc: fix access to deallocated mem Hernan Vargas
                   ` (9 more replies)
  0 siblings, 10 replies; 13+ messages in thread
From: Hernan Vargas @ 2024-10-03 20:49 UTC (permalink / raw)
  To: dev, gakhil, trix, maxime.coquelin
  Cc: nicolas.chautru, qi.z.zhang, Hernan Vargas

This series targets 24.11.
v2: Rebased to the latest next-baseband-for-main which includes needed rte_bbdev lib updates.
v1: It includes a memory access fix, refactoring of queue allocation and general improvements.

Hernan Vargas (10):
  baseband/acc: fix access to deallocated mem
  baseband/acc: queue allocation refactor
  baseband/acc: configure max queues per device
  baseband/acc: future proof structure comparison
  baseband/acc: enhance SW ring alignment
  baseband/acc: remove soft output bypass
  baseband/acc: algorithm tuning for LDPC decoder
  baseband/acc: remove check on HARQ memory
  baseband/acc: reset ring data valid bit
  baseband/acc: cosmetic changes

 drivers/baseband/acc/acc_common.h     |  16 +-
 drivers/baseband/acc/rte_acc100_pmd.c |  38 +--
 drivers/baseband/acc/rte_vrb_pmd.c    | 347 ++++++++++++++++----------
 3 files changed, 224 insertions(+), 177 deletions(-)

-- 
2.37.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v2 01/10] baseband/acc: fix access to deallocated mem
  2024-10-03 20:49 [PATCH v2 00/10] acc baseband PMD fix and updates for 24.11 Hernan Vargas
@ 2024-10-03 20:49 ` Hernan Vargas
  2024-10-03 20:49 ` [PATCH v2 02/10] baseband/acc: queue allocation refactor Hernan Vargas
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 13+ messages in thread
From: Hernan Vargas @ 2024-10-03 20:49 UTC (permalink / raw)
  To: dev, gakhil, trix, maxime.coquelin
  Cc: nicolas.chautru, qi.z.zhang, Hernan Vargas, stable

Prevent op_addr access during queue_stop operation, as this memory may
have been deallocated.

Fixes: e640f6cdfa84 ("baseband/acc200: add LDPC processing")
Cc: stable@dpdk.org

Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/baseband/acc/rte_acc100_pmd.c | 36 ----------------------
 drivers/baseband/acc/rte_vrb_pmd.c    | 44 +--------------------------
 2 files changed, 1 insertion(+), 79 deletions(-)

diff --git a/drivers/baseband/acc/rte_acc100_pmd.c b/drivers/baseband/acc/rte_acc100_pmd.c
index 98ce39548900..e3a523946448 100644
--- a/drivers/baseband/acc/rte_acc100_pmd.c
+++ b/drivers/baseband/acc/rte_acc100_pmd.c
@@ -837,51 +837,15 @@ acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
 	return ret;
 }
 
-static inline void
-acc100_print_op(struct rte_bbdev_dec_op *op, enum rte_bbdev_op_type op_type,
-		uint16_t index)
-{
-	if (op == NULL)
-		return;
-	if (op_type == RTE_BBDEV_OP_LDPC_DEC)
-		rte_bbdev_log(DEBUG,
-			"  Op 5GUL %d %d %d %d %d %d %d %d %d %d %d %d",
-			index,
-			op->ldpc_dec.basegraph, op->ldpc_dec.z_c,
-			op->ldpc_dec.n_cb, op->ldpc_dec.q_m,
-			op->ldpc_dec.n_filler, op->ldpc_dec.cb_params.e,
-			op->ldpc_dec.op_flags, op->ldpc_dec.rv_index,
-			op->ldpc_dec.iter_max, op->ldpc_dec.iter_count,
-			op->ldpc_dec.harq_combined_input.length
-			);
-	else if (op_type == RTE_BBDEV_OP_LDPC_ENC) {
-		struct rte_bbdev_enc_op *op_dl = (struct rte_bbdev_enc_op *) op;
-		rte_bbdev_log(DEBUG,
-			"  Op 5GDL %d %d %d %d %d %d %d %d %d",
-			index,
-			op_dl->ldpc_enc.basegraph, op_dl->ldpc_enc.z_c,
-			op_dl->ldpc_enc.n_cb, op_dl->ldpc_enc.q_m,
-			op_dl->ldpc_enc.n_filler, op_dl->ldpc_enc.cb_params.e,
-			op_dl->ldpc_enc.op_flags, op_dl->ldpc_enc.rv_index
-			);
-	}
-}
-
 static int
 acc100_queue_stop(struct rte_bbdev *dev, uint16_t queue_id)
 {
 	struct acc_queue *q;
-	struct rte_bbdev_dec_op *op;
-	uint16_t i;
 
 	q = dev->data->queues[queue_id].queue_private;
 	rte_bbdev_log(INFO, "Queue Stop %d H/T/D %d %d %x OpType %d",
 			queue_id, q->sw_ring_head, q->sw_ring_tail,
 			q->sw_ring_depth, q->op_type);
-	for (i = 0; i < q->sw_ring_depth; ++i) {
-		op = (q->ring_addr + i)->req.op_addr;
-		acc100_print_op(op, q->op_type, i);
-	}
 	/* ignore all operations in flight and clear counters */
 	q->sw_ring_tail = q->sw_ring_head;
 	q->aq_enqueued = 0;
diff --git a/drivers/baseband/acc/rte_vrb_pmd.c b/drivers/baseband/acc/rte_vrb_pmd.c
index 4c532d92fa25..bae01e563826 100644
--- a/drivers/baseband/acc/rte_vrb_pmd.c
+++ b/drivers/baseband/acc/rte_vrb_pmd.c
@@ -1047,58 +1047,16 @@ vrb_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
 	return ret;
 }
 
-static inline void
-vrb_print_op(struct rte_bbdev_dec_op *op, enum rte_bbdev_op_type op_type,
-		uint16_t index)
-{
-	if (op == NULL)
-		return;
-	if (op_type == RTE_BBDEV_OP_LDPC_DEC)
-		rte_bbdev_log(INFO,
-			"  Op 5GUL %d %d %d %d %d %d %d %d %d %d %d %d",
-			index,
-			op->ldpc_dec.basegraph, op->ldpc_dec.z_c,
-			op->ldpc_dec.n_cb, op->ldpc_dec.q_m,
-			op->ldpc_dec.n_filler, op->ldpc_dec.cb_params.e,
-			op->ldpc_dec.op_flags, op->ldpc_dec.rv_index,
-			op->ldpc_dec.iter_max, op->ldpc_dec.iter_count,
-			op->ldpc_dec.harq_combined_input.length
-			);
-	else if (op_type == RTE_BBDEV_OP_LDPC_ENC) {
-		struct rte_bbdev_enc_op *op_dl = (struct rte_bbdev_enc_op *) op;
-		rte_bbdev_log(INFO,
-			"  Op 5GDL %d %d %d %d %d %d %d %d %d",
-			index,
-			op_dl->ldpc_enc.basegraph, op_dl->ldpc_enc.z_c,
-			op_dl->ldpc_enc.n_cb, op_dl->ldpc_enc.q_m,
-			op_dl->ldpc_enc.n_filler, op_dl->ldpc_enc.cb_params.e,
-			op_dl->ldpc_enc.op_flags, op_dl->ldpc_enc.rv_index
-			);
-	} else if (op_type == RTE_BBDEV_OP_MLDTS) {
-		struct rte_bbdev_mldts_op *op_mldts = (struct rte_bbdev_mldts_op *) op;
-		rte_bbdev_log(INFO, "  Op MLD %d RBs %d NL %d Rp %d %d %x\n",
-				index,
-				op_mldts->mldts.num_rbs, op_mldts->mldts.num_layers,
-				op_mldts->mldts.r_rep,
-				op_mldts->mldts.c_rep, op_mldts->mldts.op_flags);
-	}
-}
-
 /* Stop queue and clear counters. */
 static int
 vrb_queue_stop(struct rte_bbdev *dev, uint16_t queue_id)
 {
 	struct acc_queue *q;
-	struct rte_bbdev_dec_op *op;
-	uint16_t i;
+
 	q = dev->data->queues[queue_id].queue_private;
 	rte_bbdev_log(INFO, "Queue Stop %d H/T/D %d %d %x OpType %d",
 			queue_id, q->sw_ring_head, q->sw_ring_tail,
 			q->sw_ring_depth, q->op_type);
-	for (i = 0; i < q->sw_ring_depth; ++i) {
-		op = (q->ring_addr + i)->req.op_addr;
-		vrb_print_op(op, q->op_type, i);
-	}
 	/* ignore all operations in flight and clear counters */
 	q->sw_ring_tail = q->sw_ring_head;
 	q->aq_enqueued = 0;
-- 
2.37.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v2 02/10] baseband/acc: queue allocation refactor
  2024-10-03 20:49 [PATCH v2 00/10] acc baseband PMD fix and updates for 24.11 Hernan Vargas
  2024-10-03 20:49 ` [PATCH v2 01/10] baseband/acc: fix access to deallocated mem Hernan Vargas
@ 2024-10-03 20:49 ` Hernan Vargas
  2024-10-04 12:08   ` Maxime Coquelin
  2024-10-03 20:49 ` [PATCH v2 03/10] baseband/acc: configure max queues per device Hernan Vargas
                   ` (7 subsequent siblings)
  9 siblings, 1 reply; 13+ messages in thread
From: Hernan Vargas @ 2024-10-03 20:49 UTC (permalink / raw)
  To: dev, gakhil, trix, maxime.coquelin
  Cc: nicolas.chautru, qi.z.zhang, Hernan Vargas

Refactor to manage queue memory per operation more flexibly for VRB
devices.

Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
---
 drivers/baseband/acc/acc_common.h  |   5 +
 drivers/baseband/acc/rte_vrb_pmd.c | 214 ++++++++++++++++++++---------
 2 files changed, 157 insertions(+), 62 deletions(-)

diff --git a/drivers/baseband/acc/acc_common.h b/drivers/baseband/acc/acc_common.h
index b1f81e73e68d..adbac0dcca70 100644
--- a/drivers/baseband/acc/acc_common.h
+++ b/drivers/baseband/acc/acc_common.h
@@ -149,6 +149,8 @@
 #define VRB2_VF_ID_SHIFT     6
 
 #define ACC_MAX_FFT_WIN      16
+#define ACC_MAX_RING_BUFFER  64
+#define VRB2_MAX_Q_PER_OP 256
 
 extern int acc_common_logtype;
 
@@ -581,6 +583,9 @@ struct acc_device {
 	void *sw_rings_base;  /* Base addr of un-aligned memory for sw rings */
 	void *sw_rings;  /* 64MBs of 64MB aligned memory for sw rings */
 	rte_iova_t sw_rings_iova;  /* IOVA address of sw_rings */
+	void *sw_rings_array[ACC_MAX_RING_BUFFER];  /* Array of aligned memory for sw rings. */
+	rte_iova_t sw_rings_iova_array[ACC_MAX_RING_BUFFER];  /* Array of sw_rings IOVA. */
+	uint32_t queue_index[ACC_MAX_RING_BUFFER]; /* Tracking queue index per ring buffer. */
 	/* Virtual address of the info memory routed to the this function under
 	 * operation, whether it is PF or VF.
 	 * HW may DMA information data at this location asynchronously
diff --git a/drivers/baseband/acc/rte_vrb_pmd.c b/drivers/baseband/acc/rte_vrb_pmd.c
index bae01e563826..2c62a5b3e329 100644
--- a/drivers/baseband/acc/rte_vrb_pmd.c
+++ b/drivers/baseband/acc/rte_vrb_pmd.c
@@ -281,7 +281,7 @@ fetch_acc_config(struct rte_bbdev *dev)
 		/* Check the depth of the AQs. */
 		reg_len0 = acc_reg_read(d, d->reg_addr->depth_log0_offset);
 		reg_len1 = acc_reg_read(d, d->reg_addr->depth_log1_offset);
-		for (acc = 0; acc < NUM_ACC; acc++) {
+		for (acc = 0; acc < VRB1_NUM_ACCS; acc++) {
 			qtopFromAcc(&q_top, acc, acc_conf);
 			if (q_top->first_qgroup_index < ACC_NUM_QGRPS_PER_WORD)
 				q_top->aq_depth_log2 =
@@ -290,7 +290,7 @@ fetch_acc_config(struct rte_bbdev *dev)
 				q_top->aq_depth_log2 = (reg_len1 >> ((q_top->first_qgroup_index -
 						ACC_NUM_QGRPS_PER_WORD) * 4)) & 0xF;
 		}
-	} else {
+	} else if (d->device_variant == VRB2_VARIANT) {
 		reg0 = acc_reg_read(d, d->reg_addr->qman_group_func);
 		reg1 = acc_reg_read(d, d->reg_addr->qman_group_func + 4);
 		reg2 = acc_reg_read(d, d->reg_addr->qman_group_func + 8);
@@ -308,7 +308,7 @@ fetch_acc_config(struct rte_bbdev *dev)
 					idx = (reg2 >> ((qg % ACC_NUM_QGRPS_PER_WORD) * 4)) & 0x7;
 				else
 					idx = (reg3 >> ((qg % ACC_NUM_QGRPS_PER_WORD) * 4)) & 0x7;
-				if (idx < VRB_NUM_ACCS) {
+				if (idx < VRB2_NUM_ACCS) {
 					acc = qman_func_id[idx];
 					updateQtop(acc, qg, acc_conf, d);
 				}
@@ -321,7 +321,7 @@ fetch_acc_config(struct rte_bbdev *dev)
 		reg_len2 = acc_reg_read(d, d->reg_addr->depth_log0_offset + 8);
 		reg_len3 = acc_reg_read(d, d->reg_addr->depth_log0_offset + 12);
 
-		for (acc = 0; acc < NUM_ACC; acc++) {
+		for (acc = 0; acc < VRB2_NUM_ACCS; acc++) {
 			qtopFromAcc(&q_top, acc, acc_conf);
 			if (q_top->first_qgroup_index / ACC_NUM_QGRPS_PER_WORD == 0)
 				q_top->aq_depth_log2 = (reg_len0 >> ((q_top->first_qgroup_index %
@@ -543,6 +543,7 @@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id)
 {
 	uint32_t phys_low, phys_high, value;
 	struct acc_device *d = dev->data->dev_private;
+	uint16_t queues_per_op, i;
 	int ret;
 
 	if (d->pf_device && !d->acc_conf.pf_mode_en) {
@@ -564,27 +565,37 @@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id)
 		return -ENODEV;
 	}
 
-	alloc_sw_rings_min_mem(dev, d, num_queues, socket_id);
+	if (d->device_variant == VRB1_VARIANT) {
+		alloc_sw_rings_min_mem(dev, d, num_queues, socket_id);
 
-	/* If minimal memory space approach failed, then allocate
-	 * the 2 * 64MB block for the sw rings.
-	 */
-	if (d->sw_rings == NULL)
-		alloc_2x64mb_sw_rings_mem(dev, d, socket_id);
+		/* If minimal memory space approach failed, then allocate
+		 * the 2 * 64MB block for the sw rings.
+		 */
+		if (d->sw_rings == NULL)
+			alloc_2x64mb_sw_rings_mem(dev, d, socket_id);
 
-	if (d->sw_rings == NULL) {
-		rte_bbdev_log(NOTICE,
-				"Failure allocating sw_rings memory");
-		return -ENOMEM;
+		if (d->sw_rings == NULL) {
+			rte_bbdev_log(NOTICE, "Failure allocating sw_rings memory");
+			return -ENOMEM;
+		}
+	} else if (d->device_variant == VRB2_VARIANT) {
+		queues_per_op = RTE_MIN(VRB2_MAX_Q_PER_OP, num_queues);
+		for (i = 0; i <= RTE_BBDEV_OP_MLDTS; i++) {
+			alloc_sw_rings_min_mem(dev, d, queues_per_op, socket_id);
+			if (d->sw_rings == NULL) {
+				rte_bbdev_log(NOTICE, "Failure allocating sw_rings memory %d", i);
+				return -ENOMEM;
+			}
+			/* Moves the pointer to the relevant array. */
+			d->sw_rings_array[i] = d->sw_rings;
+			d->sw_rings_iova_array[i] = d->sw_rings_iova;
+			d->sw_rings = NULL;
+			d->sw_rings_base = NULL;
+			d->sw_rings_iova = 0;
+			d->queue_index[i] = 0;
+		}
 	}
 
-	/* Configure device with the base address for DMA descriptor rings.
-	 * Same descriptor rings used for UL and DL DMA Engines.
-	 * Note : Assuming only VF0 bundle is used for PF mode.
-	 */
-	phys_high = (uint32_t)(d->sw_rings_iova >> 32);
-	phys_low  = (uint32_t)(d->sw_rings_iova & ~(ACC_SIZE_64MBYTE-1));
-
 	/* Read the populated cfg from device registers. */
 	fetch_acc_config(dev);
 
@@ -599,20 +610,60 @@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id)
 	if (d->pf_device)
 		acc_reg_write(d, VRB1_PfDmaAxiControl, 1);
 
-	acc_reg_write(d, d->reg_addr->dma_ring_ul5g_hi, phys_high);
-	acc_reg_write(d, d->reg_addr->dma_ring_ul5g_lo, phys_low);
-	acc_reg_write(d, d->reg_addr->dma_ring_dl5g_hi, phys_high);
-	acc_reg_write(d, d->reg_addr->dma_ring_dl5g_lo, phys_low);
-	acc_reg_write(d, d->reg_addr->dma_ring_ul4g_hi, phys_high);
-	acc_reg_write(d, d->reg_addr->dma_ring_ul4g_lo, phys_low);
-	acc_reg_write(d, d->reg_addr->dma_ring_dl4g_hi, phys_high);
-	acc_reg_write(d, d->reg_addr->dma_ring_dl4g_lo, phys_low);
-	acc_reg_write(d, d->reg_addr->dma_ring_fft_hi, phys_high);
-	acc_reg_write(d, d->reg_addr->dma_ring_fft_lo, phys_low);
-	if (d->device_variant == VRB2_VARIANT) {
-		acc_reg_write(d, d->reg_addr->dma_ring_mld_hi, phys_high);
-		acc_reg_write(d, d->reg_addr->dma_ring_mld_lo, phys_low);
+	if (d->device_variant == VRB1_VARIANT) {
+		/* Configure device with the base address for DMA descriptor rings.
+		 * Same descriptor rings used for UL and DL DMA Engines.
+		 * Note : Assuming only VF0 bundle is used for PF mode.
+		 */
+		phys_high = (uint32_t)(d->sw_rings_iova >> 32);
+		phys_low  = (uint32_t)(d->sw_rings_iova & ~(ACC_SIZE_64MBYTE-1));
+		acc_reg_write(d, d->reg_addr->dma_ring_ul5g_hi, phys_high);
+		acc_reg_write(d, d->reg_addr->dma_ring_ul5g_lo, phys_low);
+		acc_reg_write(d, d->reg_addr->dma_ring_dl5g_hi, phys_high);
+		acc_reg_write(d, d->reg_addr->dma_ring_dl5g_lo, phys_low);
+		acc_reg_write(d, d->reg_addr->dma_ring_ul4g_hi, phys_high);
+		acc_reg_write(d, d->reg_addr->dma_ring_ul4g_lo, phys_low);
+		acc_reg_write(d, d->reg_addr->dma_ring_dl4g_hi, phys_high);
+		acc_reg_write(d, d->reg_addr->dma_ring_dl4g_lo, phys_low);
+		acc_reg_write(d, d->reg_addr->dma_ring_fft_hi, phys_high);
+		acc_reg_write(d, d->reg_addr->dma_ring_fft_lo, phys_low);
+	} else if (d->device_variant == VRB2_VARIANT) {
+		/* Configure device with the base address for DMA descriptor rings.
+		 * Different ring buffer used for each operation type.
+		 * Note : Assuming only VF0 bundle is used for PF mode.
+		 */
+		acc_reg_write(d, d->reg_addr->dma_ring_ul5g_hi,
+				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_LDPC_DEC] >> 32));
+		acc_reg_write(d, d->reg_addr->dma_ring_ul5g_lo,
+				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_LDPC_DEC]
+				& ~(ACC_SIZE_64MBYTE - 1)));
+		acc_reg_write(d, d->reg_addr->dma_ring_dl5g_hi,
+				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_LDPC_ENC] >> 32));
+		acc_reg_write(d, d->reg_addr->dma_ring_dl5g_lo,
+				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_LDPC_ENC]
+				& ~(ACC_SIZE_64MBYTE - 1)));
+		acc_reg_write(d, d->reg_addr->dma_ring_ul4g_hi,
+				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_TURBO_DEC] >> 32));
+		acc_reg_write(d, d->reg_addr->dma_ring_ul4g_lo,
+				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_TURBO_DEC]
+				& ~(ACC_SIZE_64MBYTE - 1)));
+		acc_reg_write(d, d->reg_addr->dma_ring_dl4g_hi,
+				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_TURBO_ENC] >> 32));
+		acc_reg_write(d, d->reg_addr->dma_ring_dl4g_lo,
+				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_TURBO_ENC]
+				& ~(ACC_SIZE_64MBYTE - 1)));
+		acc_reg_write(d, d->reg_addr->dma_ring_fft_hi,
+				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_FFT] >> 32));
+		acc_reg_write(d, d->reg_addr->dma_ring_fft_lo,
+				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_FFT]
+				& ~(ACC_SIZE_64MBYTE - 1)));
+		acc_reg_write(d, d->reg_addr->dma_ring_mld_hi,
+				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_MLDTS] >> 32));
+		acc_reg_write(d, d->reg_addr->dma_ring_mld_lo,
+				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_MLDTS]
+				& ~(ACC_SIZE_64MBYTE - 1)));
 	}
+
 	/*
 	 * Configure Ring Size to the max queue ring size
 	 * (used for wrapping purpose).
@@ -636,19 +687,21 @@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id)
 
 	phys_high = (uint32_t)(d->tail_ptr_iova >> 32);
 	phys_low  = (uint32_t)(d->tail_ptr_iova);
-	acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_hi, phys_high);
-	acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_lo, phys_low);
-	acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_hi, phys_high);
-	acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_lo, phys_low);
-	acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_hi, phys_high);
-	acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_lo, phys_low);
-	acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_hi, phys_high);
-	acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_lo, phys_low);
-	acc_reg_write(d, d->reg_addr->tail_ptrs_fft_hi, phys_high);
-	acc_reg_write(d, d->reg_addr->tail_ptrs_fft_lo, phys_low);
-	if (d->device_variant == VRB2_VARIANT) {
-		acc_reg_write(d, d->reg_addr->tail_ptrs_mld_hi, phys_high);
-		acc_reg_write(d, d->reg_addr->tail_ptrs_mld_lo, phys_low);
+	{
+		acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_hi, phys_high);
+		acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_lo, phys_low);
+		acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_hi, phys_high);
+		acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_lo, phys_low);
+		acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_hi, phys_high);
+		acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_lo, phys_low);
+		acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_hi, phys_high);
+		acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_lo, phys_low);
+		acc_reg_write(d, d->reg_addr->tail_ptrs_fft_hi, phys_high);
+		acc_reg_write(d, d->reg_addr->tail_ptrs_fft_lo, phys_low);
+		if (d->device_variant == VRB2_VARIANT) {
+			acc_reg_write(d, d->reg_addr->tail_ptrs_mld_hi, phys_high);
+			acc_reg_write(d, d->reg_addr->tail_ptrs_mld_lo, phys_low);
+		}
 	}
 
 	ret = allocate_info_ring(dev);
@@ -684,8 +737,13 @@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id)
 	rte_free(d->tail_ptrs);
 	d->tail_ptrs = NULL;
 free_sw_rings:
-	rte_free(d->sw_rings_base);
-	d->sw_rings = NULL;
+	if (d->device_variant == VRB1_VARIANT) {
+		rte_free(d->sw_rings_base);
+		d->sw_rings = NULL;
+	} else if (d->device_variant == VRB2_VARIANT) {
+		for (i = 0; i <= RTE_BBDEV_OP_MLDTS; i++)
+			rte_free(d->sw_rings_array[i]);
+	}
 
 	return ret;
 }
@@ -809,17 +867,34 @@ vrb_intr_enable(struct rte_bbdev *dev)
 static int
 vrb_dev_close(struct rte_bbdev *dev)
 {
+	int i;
 	struct acc_device *d = dev->data->dev_private;
+
 	vrb_check_ir(d);
-	if (d->sw_rings_base != NULL) {
-		rte_free(d->tail_ptrs);
-		rte_free(d->info_ring);
-		rte_free(d->sw_rings_base);
-		rte_free(d->harq_layout);
-		d->tail_ptrs = NULL;
-		d->info_ring = NULL;
-		d->sw_rings_base = NULL;
-		d->harq_layout = NULL;
+	if (d->device_variant == VRB1_VARIANT) {
+		if (d->sw_rings_base != NULL) {
+			rte_free(d->tail_ptrs);
+			rte_free(d->info_ring);
+			rte_free(d->sw_rings_base);
+			rte_free(d->harq_layout);
+			d->tail_ptrs = NULL;
+			d->info_ring = NULL;
+			d->sw_rings_base = NULL;
+			d->harq_layout = NULL;
+		}
+	} else if (d->device_variant == VRB2_VARIANT) {
+		if (d->sw_rings_array[1] != NULL) {
+			rte_free(d->tail_ptrs);
+			rte_free(d->info_ring);
+			rte_free(d->harq_layout);
+			d->tail_ptrs = NULL;
+			d->info_ring = NULL;
+			d->harq_layout = NULL;
+			for (i = 0; i <= RTE_BBDEV_OP_MLDTS; i++) {
+				rte_free(d->sw_rings_array[i]);
+				d->sw_rings_array[i] = NULL;
+			}
+		}
 	}
 	/* Ensure all in flight HW transactions are completed. */
 	usleep(ACC_LONG_WAIT);
@@ -890,8 +965,16 @@ vrb_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
 	}
 
 	q->d = d;
-	q->ring_addr = RTE_PTR_ADD(d->sw_rings, (d->sw_ring_size * queue_id));
-	q->ring_addr_iova = d->sw_rings_iova + (d->sw_ring_size * queue_id);
+	if (d->device_variant == VRB1_VARIANT) {
+		q->ring_addr = RTE_PTR_ADD(d->sw_rings, (d->sw_ring_size * queue_id));
+		q->ring_addr_iova = d->sw_rings_iova + (d->sw_ring_size * queue_id);
+	} else if (d->device_variant == VRB2_VARIANT) {
+		q->ring_addr = RTE_PTR_ADD(d->sw_rings_array[conf->op_type],
+				(d->sw_ring_size * d->queue_index[conf->op_type]));
+		q->ring_addr_iova = d->sw_rings_iova_array[conf->op_type] +
+				(d->sw_ring_size * d->queue_index[conf->op_type]);
+		d->queue_index[conf->op_type]++;
+	}
 
 	/* Prepare the Ring with default descriptor format. */
 	union acc_dma_desc *desc = NULL;
@@ -1347,8 +1430,14 @@ vrb_dev_info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info)
 	dev_info->queue_priority[RTE_BBDEV_OP_FFT] = d->acc_conf.q_fft.num_qgroups;
 	dev_info->queue_priority[RTE_BBDEV_OP_MLDTS] = d->acc_conf.q_mld.num_qgroups;
 	dev_info->max_num_queues = 0;
-	for (i = RTE_BBDEV_OP_NONE; i <= RTE_BBDEV_OP_MLDTS; i++)
+	for (i = RTE_BBDEV_OP_NONE; i <= RTE_BBDEV_OP_MLDTS; i++) {
+		if (unlikely(dev_info->num_queues[i] > VRB2_MAX_Q_PER_OP)) {
+			rte_bbdev_log(ERR, "Unexpected number of queues %d exposed for op %d",
+					dev_info->num_queues[i], i);
+			dev_info->num_queues[i] = VRB2_MAX_Q_PER_OP;
+		}
 		dev_info->max_num_queues += dev_info->num_queues[i];
+	}
 	dev_info->queue_size_lim = ACC_MAX_QUEUE_DEPTH;
 	dev_info->hardware_accelerated = true;
 	dev_info->max_dl_queue_priority =
@@ -4239,7 +4328,8 @@ vrb_bbdev_init(struct rte_bbdev *dev, struct rte_pci_driver *drv)
 			d->reg_addr = &vrb1_pf_reg_addr;
 		else
 			d->reg_addr = &vrb1_vf_reg_addr;
-	} else {
+	} else if ((pci_dev->id.device_id == RTE_VRB2_PF_DEVICE_ID) ||
+			(pci_dev->id.device_id == RTE_VRB2_VF_DEVICE_ID)) {
 		d->device_variant = VRB2_VARIANT;
 		d->queue_offset = vrb2_queue_offset;
 		d->num_qgroups = VRB2_NUM_QGRPS;
-- 
2.37.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v2 03/10] baseband/acc: configure max queues per device
  2024-10-03 20:49 [PATCH v2 00/10] acc baseband PMD fix and updates for 24.11 Hernan Vargas
  2024-10-03 20:49 ` [PATCH v2 01/10] baseband/acc: fix access to deallocated mem Hernan Vargas
  2024-10-03 20:49 ` [PATCH v2 02/10] baseband/acc: queue allocation refactor Hernan Vargas
@ 2024-10-03 20:49 ` Hernan Vargas
  2024-10-03 20:49 ` [PATCH v2 04/10] baseband/acc: future proof structure comparison Hernan Vargas
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 13+ messages in thread
From: Hernan Vargas @ 2024-10-03 20:49 UTC (permalink / raw)
  To: dev, gakhil, trix, maxime.coquelin
  Cc: nicolas.chautru, qi.z.zhang, Hernan Vargas

Configure max_queues based on the number of queue groups and numbers of
AQS per device variant.

Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
---
 drivers/baseband/acc/rte_vrb_pmd.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/baseband/acc/rte_vrb_pmd.c b/drivers/baseband/acc/rte_vrb_pmd.c
index 2c62a5b3e329..26335d55ba3b 100644
--- a/drivers/baseband/acc/rte_vrb_pmd.c
+++ b/drivers/baseband/acc/rte_vrb_pmd.c
@@ -544,7 +544,7 @@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id)
 	uint32_t phys_low, phys_high, value;
 	struct acc_device *d = dev->data->dev_private;
 	uint16_t queues_per_op, i;
-	int ret;
+	int ret, max_queues = 0;
 
 	if (d->pf_device && !d->acc_conf.pf_mode_en) {
 		rte_bbdev_log(NOTICE,
@@ -671,10 +671,15 @@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id)
 	value = log2_basic(d->sw_ring_size / ACC_RING_SIZE_GRANULARITY);
 	acc_reg_write(d, d->reg_addr->ring_size, value);
 
+	if (d->device_variant == VRB1_VARIANT)
+		max_queues = VRB1_NUM_QGRPS * VRB1_NUM_AQS;
+	else if (d->device_variant == VRB2_VARIANT)
+		max_queues = VRB2_NUM_QGRPS * VRB2_NUM_AQS;
+
 	/* Configure tail pointer for use when SDONE enabled. */
 	if (d->tail_ptrs == NULL)
 		d->tail_ptrs = rte_zmalloc_socket(dev->device->driver->name,
-				VRB_MAX_QGRPS * VRB_MAX_AQS * sizeof(uint32_t),
+				max_queues * sizeof(uint32_t),
 				RTE_CACHE_LINE_SIZE, socket_id);
 	if (d->tail_ptrs == NULL) {
 		rte_bbdev_log(ERR, "Failed to allocate tail ptr for %s:%u",
-- 
2.37.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v2 04/10] baseband/acc: future proof structure comparison
  2024-10-03 20:49 [PATCH v2 00/10] acc baseband PMD fix and updates for 24.11 Hernan Vargas
                   ` (2 preceding siblings ...)
  2024-10-03 20:49 ` [PATCH v2 03/10] baseband/acc: configure max queues per device Hernan Vargas
@ 2024-10-03 20:49 ` Hernan Vargas
  2024-10-03 20:49 ` [PATCH v2 05/10] baseband/acc: enhance SW ring alignment Hernan Vargas
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 13+ messages in thread
From: Hernan Vargas @ 2024-10-03 20:49 UTC (permalink / raw)
  To: dev, gakhil, trix, maxime.coquelin
  Cc: nicolas.chautru, qi.z.zhang, Hernan Vargas

Some implementation in the PMD is based on some size assumption from
the bbdev structure, which should use sizeof instead to be more future
proof in case these structures change.

Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
---
 drivers/baseband/acc/acc_common.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/baseband/acc/acc_common.h b/drivers/baseband/acc/acc_common.h
index adbac0dcca70..0d1c26166ff2 100644
--- a/drivers/baseband/acc/acc_common.h
+++ b/drivers/baseband/acc/acc_common.h
@@ -95,8 +95,8 @@
 #define ACC_COMPANION_PTRS             8
 #define ACC_FCW_VER                    2
 #define ACC_MUX_5GDL_DESC              6
-#define ACC_CMP_ENC_SIZE               20
-#define ACC_CMP_DEC_SIZE               24
+#define ACC_CMP_ENC_SIZE               (sizeof(struct rte_bbdev_op_ldpc_enc) - ACC_ENC_OFFSET)
+#define ACC_CMP_DEC_SIZE               (sizeof(struct rte_bbdev_op_ldpc_dec) - ACC_DEC_OFFSET)
 #define ACC_ENC_OFFSET                (32)
 #define ACC_DEC_OFFSET                (80)
 #define ACC_LIMIT_DL_MUX_BITS          534
-- 
2.37.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v2 05/10] baseband/acc: enhance SW ring alignment
  2024-10-03 20:49 [PATCH v2 00/10] acc baseband PMD fix and updates for 24.11 Hernan Vargas
                   ` (3 preceding siblings ...)
  2024-10-03 20:49 ` [PATCH v2 04/10] baseband/acc: future proof structure comparison Hernan Vargas
@ 2024-10-03 20:49 ` Hernan Vargas
  2024-10-03 20:49 ` [PATCH v2 06/10] baseband/acc: remove soft output bypass Hernan Vargas
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 13+ messages in thread
From: Hernan Vargas @ 2024-10-03 20:49 UTC (permalink / raw)
  To: dev, gakhil, trix, maxime.coquelin
  Cc: nicolas.chautru, qi.z.zhang, Hernan Vargas

Calculate the aligned total size required for queue rings, ensuring that
the size is a power of two for proper memory allocation.

Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
---
 drivers/baseband/acc/acc_common.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/baseband/acc/acc_common.h b/drivers/baseband/acc/acc_common.h
index 0d1c26166ff2..8ac1ca001c1d 100644
--- a/drivers/baseband/acc/acc_common.h
+++ b/drivers/baseband/acc/acc_common.h
@@ -767,19 +767,20 @@ alloc_sw_rings_min_mem(struct rte_bbdev *dev, struct acc_device *d,
 	int i = 0;
 	uint32_t q_sw_ring_size = ACC_MAX_QUEUE_DEPTH * get_desc_len();
 	uint32_t dev_sw_ring_size = q_sw_ring_size * num_queues;
-	/* Free first in case this is a reconfiguration */
+	uint32_t alignment = q_sw_ring_size * rte_align32pow2(num_queues);
+	/* Free first in case this is dev_sw_ring_size, q_sw_ring_size, socket); reconfiguration */
 	rte_free(d->sw_rings_base);
 
 	/* Find an aligned block of memory to store sw rings */
 	while (i < ACC_SW_RING_MEM_ALLOC_ATTEMPTS) {
 		/*
 		 * sw_ring allocated memory is guaranteed to be aligned to
-		 * q_sw_ring_size at the condition that the requested size is
+		 * alignment at the condition that the requested size is
 		 * less than the page size
 		 */
 		sw_rings_base = rte_zmalloc_socket(
 				dev->device->driver->name,
-				dev_sw_ring_size, q_sw_ring_size, socket);
+				dev_sw_ring_size, alignment, socket);
 
 		if (sw_rings_base == NULL) {
 			rte_acc_log(ERR,
-- 
2.37.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v2 06/10] baseband/acc: remove soft output bypass
  2024-10-03 20:49 [PATCH v2 00/10] acc baseband PMD fix and updates for 24.11 Hernan Vargas
                   ` (4 preceding siblings ...)
  2024-10-03 20:49 ` [PATCH v2 05/10] baseband/acc: enhance SW ring alignment Hernan Vargas
@ 2024-10-03 20:49 ` Hernan Vargas
  2024-10-03 20:49 ` [PATCH v2 07/10] baseband/acc: algorithm tuning for LDPC decoder Hernan Vargas
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 13+ messages in thread
From: Hernan Vargas @ 2024-10-03 20:49 UTC (permalink / raw)
  To: dev, gakhil, trix, maxime.coquelin
  Cc: nicolas.chautru, qi.z.zhang, Hernan Vargas

Removing soft output bypass capability due to device limitations.

Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
---
 drivers/baseband/acc/rte_vrb_pmd.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/baseband/acc/rte_vrb_pmd.c b/drivers/baseband/acc/rte_vrb_pmd.c
index 26335d55ba3b..88201d11de88 100644
--- a/drivers/baseband/acc/rte_vrb_pmd.c
+++ b/drivers/baseband/acc/rte_vrb_pmd.c
@@ -1359,7 +1359,6 @@ vrb_dev_info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info)
 				RTE_BBDEV_LDPC_HARQ_4BIT_COMPRESSION |
 				RTE_BBDEV_LDPC_LLR_COMPRESSION |
 				RTE_BBDEV_LDPC_SOFT_OUT_ENABLE |
-				RTE_BBDEV_LDPC_SOFT_OUT_RM_BYPASS |
 				RTE_BBDEV_LDPC_SOFT_OUT_DEINTERLEAVER_BYPASS |
 				RTE_BBDEV_LDPC_DEC_INTERRUPTS,
 			.llr_size = 8,
@@ -1736,18 +1735,18 @@ vrb_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc_fcw_ld *fcw,
 		fcw->so_en = check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_SOFT_OUT_ENABLE);
 		fcw->so_bypass_intlv = check_bit(op->ldpc_dec.op_flags,
 				RTE_BBDEV_LDPC_SOFT_OUT_DEINTERLEAVER_BYPASS);
-		fcw->so_bypass_rm = check_bit(op->ldpc_dec.op_flags,
-				RTE_BBDEV_LDPC_SOFT_OUT_RM_BYPASS);
+		fcw->so_bypass_rm = 0;
 		fcw->minsum_offset = 1;
 		fcw->dec_llrclip   = 2;
 	}
 
 	/*
-	 * These are all implicitly set
+	 * These are all implicitly set:
 	 * fcw->synd_post = 0;
 	 * fcw->dec_convllr = 0;
 	 * fcw->hcout_convllr = 0;
 	 * fcw->hcout_size1 = 0;
+	 * fcw->so_it = 0;
 	 * fcw->hcout_offset = 0;
 	 * fcw->negstop_th = 0;
 	 * fcw->negstop_it = 0;
-- 
2.37.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v2 07/10] baseband/acc: algorithm tuning for LDPC decoder
  2024-10-03 20:49 [PATCH v2 00/10] acc baseband PMD fix and updates for 24.11 Hernan Vargas
                   ` (5 preceding siblings ...)
  2024-10-03 20:49 ` [PATCH v2 06/10] baseband/acc: remove soft output bypass Hernan Vargas
@ 2024-10-03 20:49 ` Hernan Vargas
  2024-10-03 20:49 ` [PATCH v2 08/10] baseband/acc: remove check on HARQ memory Hernan Vargas
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 13+ messages in thread
From: Hernan Vargas @ 2024-10-03 20:49 UTC (permalink / raw)
  To: dev, gakhil, trix, maxime.coquelin
  Cc: nicolas.chautru, qi.z.zhang, Hernan Vargas

Reverting to MS1 version of the algorithm to improve MU1 fading
conditions.

Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
---
 drivers/baseband/acc/rte_vrb_pmd.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/baseband/acc/rte_vrb_pmd.c b/drivers/baseband/acc/rte_vrb_pmd.c
index 88201d11de88..865a050e1b19 100644
--- a/drivers/baseband/acc/rte_vrb_pmd.c
+++ b/drivers/baseband/acc/rte_vrb_pmd.c
@@ -1362,7 +1362,7 @@ vrb_dev_info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info)
 				RTE_BBDEV_LDPC_SOFT_OUT_DEINTERLEAVER_BYPASS |
 				RTE_BBDEV_LDPC_DEC_INTERRUPTS,
 			.llr_size = 8,
-			.llr_decimals = 2,
+			.llr_decimals = 1,
 			.num_buffers_src =
 					RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
 			.num_buffers_hard_out =
@@ -1736,8 +1736,8 @@ vrb_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc_fcw_ld *fcw,
 		fcw->so_bypass_intlv = check_bit(op->ldpc_dec.op_flags,
 				RTE_BBDEV_LDPC_SOFT_OUT_DEINTERLEAVER_BYPASS);
 		fcw->so_bypass_rm = 0;
-		fcw->minsum_offset = 1;
-		fcw->dec_llrclip   = 2;
+		fcw->minsum_offset = 0;
+		fcw->dec_llrclip   = 0;
 	}
 
 	/*
-- 
2.37.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v2 08/10] baseband/acc: remove check on HARQ memory
  2024-10-03 20:49 [PATCH v2 00/10] acc baseband PMD fix and updates for 24.11 Hernan Vargas
                   ` (6 preceding siblings ...)
  2024-10-03 20:49 ` [PATCH v2 07/10] baseband/acc: algorithm tuning for LDPC decoder Hernan Vargas
@ 2024-10-03 20:49 ` Hernan Vargas
  2024-10-03 20:49 ` [PATCH v2 09/10] baseband/acc: reset ring data valid bit Hernan Vargas
  2024-10-03 20:49 ` [PATCH v2 10/10] baseband/acc: cosmetic changes Hernan Vargas
  9 siblings, 0 replies; 13+ messages in thread
From: Hernan Vargas @ 2024-10-03 20:49 UTC (permalink / raw)
  To: dev, gakhil, trix, maxime.coquelin
  Cc: nicolas.chautru, qi.z.zhang, Hernan Vargas

Automatically reset HARQ memory to prevent errors and simplify usage.
In a way we can assume that the HARQ output operation will always
overwrite the buffer, so we can reset this from the driver to prevent
an error being reported when application fails to do this explicitly.

Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
---
 drivers/baseband/acc/rte_vrb_pmd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/baseband/acc/rte_vrb_pmd.c b/drivers/baseband/acc/rte_vrb_pmd.c
index 865a050e1b19..27c8bdca3d08 100644
--- a/drivers/baseband/acc/rte_vrb_pmd.c
+++ b/drivers/baseband/acc/rte_vrb_pmd.c
@@ -2595,8 +2595,9 @@ vrb_enqueue_ldpc_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
 	/* Hard output. */
 	mbuf_append(h_output_head, h_output, h_out_length);
 	if (op->ldpc_dec.harq_combined_output.length > 0) {
-		/* Push the HARQ output into host memory. */
+		/* Push the HARQ output into host memory overwriting existing data. */
 		struct rte_mbuf *hq_output_head, *hq_output;
+		op->ldpc_dec.harq_combined_output.data->data_len = 0;
 		hq_output_head = op->ldpc_dec.harq_combined_output.data;
 		hq_output = op->ldpc_dec.harq_combined_output.data;
 		hq_len = op->ldpc_dec.harq_combined_output.length;
-- 
2.37.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v2 09/10] baseband/acc: reset ring data valid bit
  2024-10-03 20:49 [PATCH v2 00/10] acc baseband PMD fix and updates for 24.11 Hernan Vargas
                   ` (7 preceding siblings ...)
  2024-10-03 20:49 ` [PATCH v2 08/10] baseband/acc: remove check on HARQ memory Hernan Vargas
@ 2024-10-03 20:49 ` Hernan Vargas
  2024-10-03 20:49 ` [PATCH v2 10/10] baseband/acc: cosmetic changes Hernan Vargas
  9 siblings, 0 replies; 13+ messages in thread
From: Hernan Vargas @ 2024-10-03 20:49 UTC (permalink / raw)
  To: dev, gakhil, trix, maxime.coquelin
  Cc: nicolas.chautru, qi.z.zhang, Hernan Vargas

Reset only the valid bit to keep info ring data notably for dumping.

Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
---
 drivers/baseband/acc/rte_vrb_pmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/baseband/acc/rte_vrb_pmd.c b/drivers/baseband/acc/rte_vrb_pmd.c
index 27c8bdca3d08..5f7568a4b7ea 100644
--- a/drivers/baseband/acc/rte_vrb_pmd.c
+++ b/drivers/baseband/acc/rte_vrb_pmd.c
@@ -411,7 +411,7 @@ vrb_check_ir(struct acc_device *acc_dev)
 			rte_bbdev_log(WARNING, "InfoRing: ITR:%d Info:0x%x",
 					int_nb, ring_data->detailed_info);
 			/* Initialize Info Ring entry and move forward. */
-			ring_data->val = 0;
+			ring_data->valid = 0;
 		}
 		info_ring_head++;
 		ring_data = acc_dev->info_ring + (info_ring_head & ACC_INFO_RING_MASK);
-- 
2.37.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v2 10/10] baseband/acc: cosmetic changes
  2024-10-03 20:49 [PATCH v2 00/10] acc baseband PMD fix and updates for 24.11 Hernan Vargas
                   ` (8 preceding siblings ...)
  2024-10-03 20:49 ` [PATCH v2 09/10] baseband/acc: reset ring data valid bit Hernan Vargas
@ 2024-10-03 20:49 ` Hernan Vargas
  9 siblings, 0 replies; 13+ messages in thread
From: Hernan Vargas @ 2024-10-03 20:49 UTC (permalink / raw)
  To: dev, gakhil, trix, maxime.coquelin
  Cc: nicolas.chautru, qi.z.zhang, Hernan Vargas

Cosmetic code changes.
No functional impact.

Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
---
 drivers/baseband/acc/rte_acc100_pmd.c |  2 +-
 drivers/baseband/acc/rte_vrb_pmd.c    | 62 +++++++++++++++++++--------
 2 files changed, 44 insertions(+), 20 deletions(-)

diff --git a/drivers/baseband/acc/rte_acc100_pmd.c b/drivers/baseband/acc/rte_acc100_pmd.c
index e3a523946448..c33e2758b100 100644
--- a/drivers/baseband/acc/rte_acc100_pmd.c
+++ b/drivers/baseband/acc/rte_acc100_pmd.c
@@ -4199,7 +4199,7 @@ poweron_cleanup(struct rte_bbdev *bbdev, struct acc_device *d,
 		acc_reg_write(d, HWPfQmgrIngressAq + 0x100, enq_req.val);
 		usleep(ACC_LONG_WAIT * 100);
 		if (desc->req.word0 != 2)
-			rte_bbdev_log(WARNING, "DMA Response %#"PRIx32, desc->req.word0);
+			rte_bbdev_log(WARNING, "DMA Response %#"PRIx32"", desc->req.word0);
 	}
 
 	/* Reset LDPC Cores */
diff --git a/drivers/baseband/acc/rte_vrb_pmd.c b/drivers/baseband/acc/rte_vrb_pmd.c
index 5f7568a4b7ea..c8875447d3d0 100644
--- a/drivers/baseband/acc/rte_vrb_pmd.c
+++ b/drivers/baseband/acc/rte_vrb_pmd.c
@@ -956,6 +956,9 @@ vrb_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
 	struct acc_queue *q;
 	int32_t q_idx;
 	int ret;
+	union acc_dma_desc *desc = NULL;
+	unsigned int desc_idx, b_idx;
+	int fcw_len;
 
 	if (d == NULL) {
 		rte_bbdev_log(ERR, "Undefined device");
@@ -982,16 +985,33 @@ vrb_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
 	}
 
 	/* Prepare the Ring with default descriptor format. */
-	union acc_dma_desc *desc = NULL;
-	unsigned int desc_idx, b_idx;
-	int fcw_len = (conf->op_type == RTE_BBDEV_OP_LDPC_ENC ?
-			ACC_FCW_LE_BLEN : (conf->op_type == RTE_BBDEV_OP_TURBO_DEC ?
-			ACC_FCW_TD_BLEN : (conf->op_type == RTE_BBDEV_OP_LDPC_DEC ?
-			ACC_FCW_LD_BLEN : (conf->op_type == RTE_BBDEV_OP_FFT ?
-			ACC_FCW_FFT_BLEN : ACC_FCW_MLDTS_BLEN))));
-
-	if ((q->d->device_variant == VRB2_VARIANT) && (conf->op_type == RTE_BBDEV_OP_FFT))
-		fcw_len = ACC_FCW_FFT_BLEN_3;
+	switch (conf->op_type) {
+	case RTE_BBDEV_OP_LDPC_ENC:
+		fcw_len = ACC_FCW_LE_BLEN;
+		break;
+	case RTE_BBDEV_OP_LDPC_DEC:
+		fcw_len = ACC_FCW_LD_BLEN;
+		break;
+	case RTE_BBDEV_OP_TURBO_DEC:
+		fcw_len = ACC_FCW_TD_BLEN;
+		break;
+	case RTE_BBDEV_OP_TURBO_ENC:
+		fcw_len = ACC_FCW_TE_BLEN;
+		break;
+	case RTE_BBDEV_OP_FFT:
+		fcw_len = ACC_FCW_FFT_BLEN;
+		if (q->d->device_variant == VRB2_VARIANT)
+			fcw_len = ACC_FCW_FFT_BLEN_3;
+		break;
+	case RTE_BBDEV_OP_MLDTS:
+		fcw_len = ACC_FCW_MLDTS_BLEN;
+		break;
+	default:
+		/* NOT REACHED. */
+		fcw_len = 0;
+		rte_bbdev_log(ERR, "Unexpected error in %s using type %d", __func__, conf->op_type);
+		break;
+	}
 
 	for (desc_idx = 0; desc_idx < d->sw_ring_max_depth; desc_idx++) {
 		desc = q->ring_addr + desc_idx;
@@ -1757,8 +1777,7 @@ vrb_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc_fcw_ld *fcw,
 	if (fcw->hcout_en > 0) {
 		parity_offset = (op->ldpc_dec.basegraph == 1 ? 20 : 8)
 				* op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
-		k0_p = (fcw->k0 > parity_offset) ?
-				fcw->k0 - op->ldpc_dec.n_filler : fcw->k0;
+		k0_p = (fcw->k0 > parity_offset) ? fcw->k0 - op->ldpc_dec.n_filler : fcw->k0;
 		ncb_p = fcw->ncb - op->ldpc_dec.n_filler;
 		l = k0_p + fcw->rm_e;
 		harq_out_length = (uint16_t) fcw->hcin_size0;
@@ -2000,16 +2019,15 @@ vrb_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
 		next_triplet++;
 	}
 
-	if (check_bit(op->ldpc_dec.op_flags,
-				RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) {
+	if (check_bit(op->ldpc_dec.op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) {
 		if (op->ldpc_dec.harq_combined_output.data == 0) {
 			rte_bbdev_log(ERR, "HARQ output is not defined");
 			return -1;
 		}
 
-		/* Pruned size of the HARQ */
+		/* Pruned size of the HARQ. */
 		h_p_size = fcw->hcout_size0 + fcw->hcout_size1;
-		/* Non-Pruned size of the HARQ */
+		/* Non-Pruned size of the HARQ. */
 		h_np_size = fcw->hcout_offset > 0 ?
 				fcw->hcout_offset + fcw->hcout_size1 :
 				h_p_size;
@@ -2583,7 +2601,6 @@ vrb_enqueue_ldpc_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
 			seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
 		else
 			seg_total_left = fcw->rm_e;
-
 		ret = vrb_dma_desc_ld_fill(op, &desc->req, &input, h_output,
 				&in_offset, &h_out_offset,
 				&h_out_length, &mbuf_total_left,
@@ -2645,7 +2662,6 @@ vrb_enqueue_ldpc_dec_one_op_tb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
 	desc_first = desc;
 	fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET;
 	harq_layout = q->d->harq_layout;
-
 	vrb_fcw_ld_fill(op, &desc->req.fcw_ld, harq_layout, q->d->device_variant);
 
 	input = op->ldpc_dec.input.data;
@@ -2849,6 +2865,7 @@ vrb_enqueue_enc_cb(struct rte_bbdev_queue_data *q_data,
 	acc_dma_enqueue(q, i, &q_data->queue_stats);
 
 	acc_update_qstat_enqueue(q_data, i, num - i);
+
 	return i;
 }
 
@@ -2956,6 +2973,7 @@ vrb_enqueue_ldpc_enc_tb(struct rte_bbdev_queue_data *q_data,
 			}
 			descs_used = vrb2_enqueue_ldpc_enc_one_op_tb(q, ops[i], enqueued_descs);
 		}
+
 		if (descs_used < 0) {
 			acc_enqueue_invalid(q_data);
 			break;
@@ -3062,6 +3080,7 @@ vrb_enqueue_ldpc_dec_tb(struct rte_bbdev_queue_data *q_data,
 	acc_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
 
 	acc_update_qstat_enqueue(q_data, i, num - i);
+
 	return i;
 }
 
@@ -3103,6 +3122,7 @@ vrb_enqueue_ldpc_dec_cb(struct rte_bbdev_queue_data *q_data,
 	acc_dma_enqueue(q, i, &q_data->queue_stats);
 
 	acc_update_qstat_enqueue(q_data, i, num - i);
+
 	return i;
 }
 
@@ -3273,7 +3293,7 @@ vrb2_dequeue_ldpc_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op **r
 	return 1;
 }
 
-/* Dequeue one LDPC encode operations from device in TB mode.
+/* Dequeue one encode operations from device in TB mode.
  * That operation may cover multiple descriptors.
  */
 static inline int
@@ -3873,6 +3893,7 @@ vrb_enqueue_fft(struct rte_bbdev_queue_data *q_data,
 	acc_dma_enqueue(q, i, &q_data->queue_stats);
 
 	acc_update_qstat_enqueue(q_data, i, num - i);
+
 	return i;
 }
 
@@ -3939,7 +3960,9 @@ vrb_dequeue_fft(struct rte_bbdev_queue_data *q_data,
 
 	q->aq_dequeued += aq_dequeued;
 	q->sw_ring_tail += dequeued_cbs;
+
 	acc_update_qstat_dequeue(q_data, i);
+
 	return i;
 }
 
@@ -4194,6 +4217,7 @@ vrb2_enqueue_mldts(struct rte_bbdev_queue_data *q_data,
 	acc_dma_enqueue(q, enqueued_descs, &q_data->queue_stats);
 
 	acc_update_qstat_enqueue(q_data, i, num - i);
+
 	return i;
 }
 
-- 
2.37.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v2 02/10] baseband/acc: queue allocation refactor
  2024-10-03 20:49 ` [PATCH v2 02/10] baseband/acc: queue allocation refactor Hernan Vargas
@ 2024-10-04 12:08   ` Maxime Coquelin
  2024-10-04 18:19     ` Chautru, Nicolas
  0 siblings, 1 reply; 13+ messages in thread
From: Maxime Coquelin @ 2024-10-04 12:08 UTC (permalink / raw)
  To: Hernan Vargas, dev, gakhil, trix; +Cc: nicolas.chautru, qi.z.zhang



On 10/3/24 22:49, Hernan Vargas wrote:
> Refactor to manage queue memory per operation more flexibly for VRB
> devices.
> 
> Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
> ---
>   drivers/baseband/acc/acc_common.h  |   5 +
>   drivers/baseband/acc/rte_vrb_pmd.c | 214 ++++++++++++++++++++---------
>   2 files changed, 157 insertions(+), 62 deletions(-)
> 
> diff --git a/drivers/baseband/acc/acc_common.h b/drivers/baseband/acc/acc_common.h
> index b1f81e73e68d..adbac0dcca70 100644
> --- a/drivers/baseband/acc/acc_common.h
> +++ b/drivers/baseband/acc/acc_common.h
> @@ -149,6 +149,8 @@
>   #define VRB2_VF_ID_SHIFT     6
>   
>   #define ACC_MAX_FFT_WIN      16
> +#define ACC_MAX_RING_BUFFER  64
> +#define VRB2_MAX_Q_PER_OP 256
>   
>   extern int acc_common_logtype;
>   
> @@ -581,6 +583,9 @@ struct acc_device {
>   	void *sw_rings_base;  /* Base addr of un-aligned memory for sw rings */
>   	void *sw_rings;  /* 64MBs of 64MB aligned memory for sw rings */
>   	rte_iova_t sw_rings_iova;  /* IOVA address of sw_rings */
> +	void *sw_rings_array[ACC_MAX_RING_BUFFER];  /* Array of aligned memory for sw rings. */
> +	rte_iova_t sw_rings_iova_array[ACC_MAX_RING_BUFFER];  /* Array of sw_rings IOVA. */
> +	uint32_t queue_index[ACC_MAX_RING_BUFFER]; /* Tracking queue index per ring buffer. */
>   	/* Virtual address of the info memory routed to the this function under
>   	 * operation, whether it is PF or VF.
>   	 * HW may DMA information data at this location asynchronously
> diff --git a/drivers/baseband/acc/rte_vrb_pmd.c b/drivers/baseband/acc/rte_vrb_pmd.c
> index bae01e563826..2c62a5b3e329 100644
> --- a/drivers/baseband/acc/rte_vrb_pmd.c
> +++ b/drivers/baseband/acc/rte_vrb_pmd.c
> @@ -281,7 +281,7 @@ fetch_acc_config(struct rte_bbdev *dev)
>   		/* Check the depth of the AQs. */
>   		reg_len0 = acc_reg_read(d, d->reg_addr->depth_log0_offset);
>   		reg_len1 = acc_reg_read(d, d->reg_addr->depth_log1_offset);
> -		for (acc = 0; acc < NUM_ACC; acc++) {
> +		for (acc = 0; acc < VRB1_NUM_ACCS; acc++) {
>   			qtopFromAcc(&q_top, acc, acc_conf);
>   			if (q_top->first_qgroup_index < ACC_NUM_QGRPS_PER_WORD)
>   				q_top->aq_depth_log2 =
> @@ -290,7 +290,7 @@ fetch_acc_config(struct rte_bbdev *dev)
>   				q_top->aq_depth_log2 = (reg_len1 >> ((q_top->first_qgroup_index -
>   						ACC_NUM_QGRPS_PER_WORD) * 4)) & 0xF;
>   		}
> -	} else {
> +	} else if (d->device_variant == VRB2_VARIANT) {
>   		reg0 = acc_reg_read(d, d->reg_addr->qman_group_func);
>   		reg1 = acc_reg_read(d, d->reg_addr->qman_group_func + 4);
>   		reg2 = acc_reg_read(d, d->reg_addr->qman_group_func + 8);
> @@ -308,7 +308,7 @@ fetch_acc_config(struct rte_bbdev *dev)
>   					idx = (reg2 >> ((qg % ACC_NUM_QGRPS_PER_WORD) * 4)) & 0x7;
>   				else
>   					idx = (reg3 >> ((qg % ACC_NUM_QGRPS_PER_WORD) * 4)) & 0x7;
> -				if (idx < VRB_NUM_ACCS) {
> +				if (idx < VRB2_NUM_ACCS) {
>   					acc = qman_func_id[idx];
>   					updateQtop(acc, qg, acc_conf, d);
>   				}
> @@ -321,7 +321,7 @@ fetch_acc_config(struct rte_bbdev *dev)
>   		reg_len2 = acc_reg_read(d, d->reg_addr->depth_log0_offset + 8);
>   		reg_len3 = acc_reg_read(d, d->reg_addr->depth_log0_offset + 12);
>   
> -		for (acc = 0; acc < NUM_ACC; acc++) {
> +		for (acc = 0; acc < VRB2_NUM_ACCS; acc++) {
>   			qtopFromAcc(&q_top, acc, acc_conf);
>   			if (q_top->first_qgroup_index / ACC_NUM_QGRPS_PER_WORD == 0)
>   				q_top->aq_depth_log2 = (reg_len0 >> ((q_top->first_qgroup_index %

This function could be much heavily refactored.
If we look at was is actuallt performed, VRB1 and VRB2 logic is the
same, just a couple of value differs (they could be set at probe time).

I might propose something in the future.

> @@ -543,6 +543,7 @@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id)
>   {
>   	uint32_t phys_low, phys_high, value;
>   	struct acc_device *d = dev->data->dev_private;
> +	uint16_t queues_per_op, i;
>   	int ret;
>   
>   	if (d->pf_device && !d->acc_conf.pf_mode_en) {
> @@ -564,27 +565,37 @@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id)
>   		return -ENODEV;
>   	}
>   
> -	alloc_sw_rings_min_mem(dev, d, num_queues, socket_id);
> +	if (d->device_variant == VRB1_VARIANT) {
> +		alloc_sw_rings_min_mem(dev, d, num_queues, socket_id);
>   
> -	/* If minimal memory space approach failed, then allocate
> -	 * the 2 * 64MB block for the sw rings.
> -	 */
> -	if (d->sw_rings == NULL)
> -		alloc_2x64mb_sw_rings_mem(dev, d, socket_id);
> +		/* If minimal memory space approach failed, then allocate
> +		 * the 2 * 64MB block for the sw rings.
> +		 */
> +		if (d->sw_rings == NULL)
> +			alloc_2x64mb_sw_rings_mem(dev, d, socket_id);
>   
> -	if (d->sw_rings == NULL) {
> -		rte_bbdev_log(NOTICE,
> -				"Failure allocating sw_rings memory");
> -		return -ENOMEM;
> +		if (d->sw_rings == NULL) {
> +			rte_bbdev_log(NOTICE, "Failure allocating sw_rings memory");
> +			return -ENOMEM;
> +		}
> +	} else if (d->device_variant == VRB2_VARIANT) {
> +		queues_per_op = RTE_MIN(VRB2_MAX_Q_PER_OP, num_queues);
> +		for (i = 0; i <= RTE_BBDEV_OP_MLDTS; i++) {
> +			alloc_sw_rings_min_mem(dev, d, queues_per_op, socket_id);
> +			if (d->sw_rings == NULL) {
> +				rte_bbdev_log(NOTICE, "Failure allocating sw_rings memory %d", i);
> +				return -ENOMEM;
> +			}
> +			/* Moves the pointer to the relevant array. */
> +			d->sw_rings_array[i] = d->sw_rings;
> +			d->sw_rings_iova_array[i] = d->sw_rings_iova;
> +			d->sw_rings = NULL;
> +			d->sw_rings_base = NULL;
> +			d->sw_rings_iova = 0;
> +			d->queue_index[i] = 0;
> +		}
>   	}
>   
> -	/* Configure device with the base address for DMA descriptor rings.
> -	 * Same descriptor rings used for UL and DL DMA Engines.
> -	 * Note : Assuming only VF0 bundle is used for PF mode.
> -	 */
> -	phys_high = (uint32_t)(d->sw_rings_iova >> 32);
> -	phys_low  = (uint32_t)(d->sw_rings_iova & ~(ACC_SIZE_64MBYTE-1));
> -
>   	/* Read the populated cfg from device registers. */
>   	fetch_acc_config(dev);
>   
> @@ -599,20 +610,60 @@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id)
>   	if (d->pf_device)
>   		acc_reg_write(d, VRB1_PfDmaAxiControl, 1);
>   
> -	acc_reg_write(d, d->reg_addr->dma_ring_ul5g_hi, phys_high);
> -	acc_reg_write(d, d->reg_addr->dma_ring_ul5g_lo, phys_low);
> -	acc_reg_write(d, d->reg_addr->dma_ring_dl5g_hi, phys_high);
> -	acc_reg_write(d, d->reg_addr->dma_ring_dl5g_lo, phys_low);
> -	acc_reg_write(d, d->reg_addr->dma_ring_ul4g_hi, phys_high);
> -	acc_reg_write(d, d->reg_addr->dma_ring_ul4g_lo, phys_low);
> -	acc_reg_write(d, d->reg_addr->dma_ring_dl4g_hi, phys_high);
> -	acc_reg_write(d, d->reg_addr->dma_ring_dl4g_lo, phys_low);
> -	acc_reg_write(d, d->reg_addr->dma_ring_fft_hi, phys_high);
> -	acc_reg_write(d, d->reg_addr->dma_ring_fft_lo, phys_low);
> -	if (d->device_variant == VRB2_VARIANT) {
> -		acc_reg_write(d, d->reg_addr->dma_ring_mld_hi, phys_high);
> -		acc_reg_write(d, d->reg_addr->dma_ring_mld_lo, phys_low);
> +	if (d->device_variant == VRB1_VARIANT) {
> +		/* Configure device with the base address for DMA descriptor rings.
> +		 * Same descriptor rings used for UL and DL DMA Engines.
> +		 * Note : Assuming only VF0 bundle is used for PF mode.
> +		 */
> +		phys_high = (uint32_t)(d->sw_rings_iova >> 32);
> +		phys_low  = (uint32_t)(d->sw_rings_iova & ~(ACC_SIZE_64MBYTE-1));
> +		acc_reg_write(d, d->reg_addr->dma_ring_ul5g_hi, phys_high);
> +		acc_reg_write(d, d->reg_addr->dma_ring_ul5g_lo, phys_low);
> +		acc_reg_write(d, d->reg_addr->dma_ring_dl5g_hi, phys_high);
> +		acc_reg_write(d, d->reg_addr->dma_ring_dl5g_lo, phys_low);
> +		acc_reg_write(d, d->reg_addr->dma_ring_ul4g_hi, phys_high);
> +		acc_reg_write(d, d->reg_addr->dma_ring_ul4g_lo, phys_low);
> +		acc_reg_write(d, d->reg_addr->dma_ring_dl4g_hi, phys_high);
> +		acc_reg_write(d, d->reg_addr->dma_ring_dl4g_lo, phys_low);
> +		acc_reg_write(d, d->reg_addr->dma_ring_fft_hi, phys_high);
> +		acc_reg_write(d, d->reg_addr->dma_ring_fft_lo, phys_low);
> +	} else if (d->device_variant == VRB2_VARIANT) {
> +		/* Configure device with the base address for DMA descriptor rings.
> +		 * Different ring buffer used for each operation type.
> +		 * Note : Assuming only VF0 bundle is used for PF mode.
> +		 */
> +		acc_reg_write(d, d->reg_addr->dma_ring_ul5g_hi,
> +				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_LDPC_DEC] >> 32));
> +		acc_reg_write(d, d->reg_addr->dma_ring_ul5g_lo,
> +				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_LDPC_DEC]
> +				& ~(ACC_SIZE_64MBYTE - 1)));
> +		acc_reg_write(d, d->reg_addr->dma_ring_dl5g_hi,
> +				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_LDPC_ENC] >> 32));
> +		acc_reg_write(d, d->reg_addr->dma_ring_dl5g_lo,
> +				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_LDPC_ENC]
> +				& ~(ACC_SIZE_64MBYTE - 1)));
> +		acc_reg_write(d, d->reg_addr->dma_ring_ul4g_hi,
> +				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_TURBO_DEC] >> 32));
> +		acc_reg_write(d, d->reg_addr->dma_ring_ul4g_lo,
> +				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_TURBO_DEC]
> +				& ~(ACC_SIZE_64MBYTE - 1)));
> +		acc_reg_write(d, d->reg_addr->dma_ring_dl4g_hi,
> +				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_TURBO_ENC] >> 32));
> +		acc_reg_write(d, d->reg_addr->dma_ring_dl4g_lo,
> +				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_TURBO_ENC]
> +				& ~(ACC_SIZE_64MBYTE - 1)));
> +		acc_reg_write(d, d->reg_addr->dma_ring_fft_hi,
> +				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_FFT] >> 32));
> +		acc_reg_write(d, d->reg_addr->dma_ring_fft_lo,
> +				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_FFT]
> +				& ~(ACC_SIZE_64MBYTE - 1)));
> +		acc_reg_write(d, d->reg_addr->dma_ring_mld_hi,
> +				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_MLDTS] >> 32));
> +		acc_reg_write(d, d->reg_addr->dma_ring_mld_lo,
> +				(uint32_t)(d->sw_rings_iova_array[RTE_BBDEV_OP_MLDTS]
> +				& ~(ACC_SIZE_64MBYTE - 1)));
>   	}
> +
>   	/*
>   	 * Configure Ring Size to the max queue ring size
>   	 * (used for wrapping purpose).
> @@ -636,19 +687,21 @@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id)
>   
>   	phys_high = (uint32_t)(d->tail_ptr_iova >> 32);
>   	phys_low  = (uint32_t)(d->tail_ptr_iova);
> -	acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_hi, phys_high);
> -	acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_lo, phys_low);
> -	acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_hi, phys_high);
> -	acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_lo, phys_low);
> -	acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_hi, phys_high);
> -	acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_lo, phys_low);
> -	acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_hi, phys_high);
> -	acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_lo, phys_low);
> -	acc_reg_write(d, d->reg_addr->tail_ptrs_fft_hi, phys_high);
> -	acc_reg_write(d, d->reg_addr->tail_ptrs_fft_lo, phys_low);
> -	if (d->device_variant == VRB2_VARIANT) {
> -		acc_reg_write(d, d->reg_addr->tail_ptrs_mld_hi, phys_high);
> -		acc_reg_write(d, d->reg_addr->tail_ptrs_mld_lo, phys_low);
> +	{
> +		acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_hi, phys_high);
> +		acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_lo, phys_low);
> +		acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_hi, phys_high);
> +		acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_lo, phys_low);
> +		acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_hi, phys_high);
> +		acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_lo, phys_low);
> +		acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_hi, phys_high);
> +		acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_lo, phys_low);
> +		acc_reg_write(d, d->reg_addr->tail_ptrs_fft_hi, phys_high);
> +		acc_reg_write(d, d->reg_addr->tail_ptrs_fft_lo, phys_low);
> +		if (d->device_variant == VRB2_VARIANT) {
> +			acc_reg_write(d, d->reg_addr->tail_ptrs_mld_hi, phys_high);
> +			acc_reg_write(d, d->reg_addr->tail_ptrs_mld_lo, phys_low);
> +		}
>   	}
>   
>   	ret = allocate_info_ring(dev);
> @@ -684,8 +737,13 @@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id)
>   	rte_free(d->tail_ptrs);
>   	d->tail_ptrs = NULL;
>   free_sw_rings:
> -	rte_free(d->sw_rings_base);
> -	d->sw_rings = NULL;
> +	if (d->device_variant == VRB1_VARIANT) {
> +		rte_free(d->sw_rings_base);
> +		d->sw_rings = NULL;

It was not caught initially, but it looks akward to free sw_rings_base, 
and then set sw_rings to NULL.
sw_rings_based should also be set to NULL.

> +	} else if (d->device_variant == VRB2_VARIANT) {
> +		for (i = 0; i <= RTE_BBDEV_OP_MLDTS; i++)
> +			rte_free(d->sw_rings_array[i]);

Same here, you should set sw_rings_array[i] to NULL to avoid double free
later.

> +	}
>   
>   	return ret;
>   }
> @@ -809,17 +867,34 @@ vrb_intr_enable(struct rte_bbdev *dev)
>   static int
>   vrb_dev_close(struct rte_bbdev *dev)
>   {
> +	int i;
>   	struct acc_device *d = dev->data->dev_private;
> +
>   	vrb_check_ir(d);
> -	if (d->sw_rings_base != NULL) {
> -		rte_free(d->tail_ptrs);
> -		rte_free(d->info_ring);
> -		rte_free(d->sw_rings_base);
> -		rte_free(d->harq_layout);
> -		d->tail_ptrs = NULL;
> -		d->info_ring = NULL;
> -		d->sw_rings_base = NULL;
> -		d->harq_layout = NULL;
> +	if (d->device_variant == VRB1_VARIANT) {
> +		if (d->sw_rings_base != NULL) {
> +			rte_free(d->tail_ptrs);
> +			rte_free(d->info_ring);
> +			rte_free(d->sw_rings_base);
> +			rte_free(d->harq_layout);
> +			d->tail_ptrs = NULL;
> +			d->info_ring = NULL;
> +			d->sw_rings_base = NULL;
> +			d->harq_layout = NULL;
> +		}

Actually, it would be cleanner to perform the free operations
systematically, no need to check whether sw_rings_base is NULL.

> +	} else if (d->device_variant == VRB2_VARIANT) {
> +		if (d->sw_rings_array[1] != NULL) {

Why 1 and not 0? And as mentionned above this can be done
unconditionnally.

> +			rte_free(d->tail_ptrs);
> +			rte_free(d->info_ring);
> +			rte_free(d->harq_layout);
> +			d->tail_ptrs = NULL;
> +			d->info_ring = NULL;
> +			d->harq_layout = NULL;
> +			for (i = 0; i <= RTE_BBDEV_OP_MLDTS; i++) {
> +				rte_free(d->sw_rings_array[i]);
> +				d->sw_rings_array[i] = NULL;
> +			}
> +		}
>   	}
>   	/* Ensure all in flight HW transactions are completed. */
>   	usleep(ACC_LONG_WAIT);
> @@ -890,8 +965,16 @@ vrb_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
>   	}
>   
>   	q->d = d;
> -	q->ring_addr = RTE_PTR_ADD(d->sw_rings, (d->sw_ring_size * queue_id));
> -	q->ring_addr_iova = d->sw_rings_iova + (d->sw_ring_size * queue_id);
> +	if (d->device_variant == VRB1_VARIANT) {
> +		q->ring_addr = RTE_PTR_ADD(d->sw_rings, (d->sw_ring_size * queue_id));
> +		q->ring_addr_iova = d->sw_rings_iova + (d->sw_ring_size * queue_id);
> +	} else if (d->device_variant == VRB2_VARIANT) {
> +		q->ring_addr = RTE_PTR_ADD(d->sw_rings_array[conf->op_type],
> +				(d->sw_ring_size * d->queue_index[conf->op_type]));
> +		q->ring_addr_iova = d->sw_rings_iova_array[conf->op_type] +
> +				(d->sw_ring_size * d->queue_index[conf->op_type]);
> +		d->queue_index[conf->op_type]++;
> +	}
>   
>   	/* Prepare the Ring with default descriptor format. */
>   	union acc_dma_desc *desc = NULL;
> @@ -1347,8 +1430,14 @@ vrb_dev_info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info)
>   	dev_info->queue_priority[RTE_BBDEV_OP_FFT] = d->acc_conf.q_fft.num_qgroups;
>   	dev_info->queue_priority[RTE_BBDEV_OP_MLDTS] = d->acc_conf.q_mld.num_qgroups;
>   	dev_info->max_num_queues = 0;
> -	for (i = RTE_BBDEV_OP_NONE; i <= RTE_BBDEV_OP_MLDTS; i++)
> +	for (i = RTE_BBDEV_OP_NONE; i <= RTE_BBDEV_OP_MLDTS; i++) {
> +		if (unlikely(dev_info->num_queues[i] > VRB2_MAX_Q_PER_OP)) {
> +			rte_bbdev_log(ERR, "Unexpected number of queues %d exposed for op %d",
> +					dev_info->num_queues[i], i);
> +			dev_info->num_queues[i] = VRB2_MAX_Q_PER_OP;
> +		}
>   		dev_info->max_num_queues += dev_info->num_queues[i];
> +	}
>   	dev_info->queue_size_lim = ACC_MAX_QUEUE_DEPTH;
>   	dev_info->hardware_accelerated = true;
>   	dev_info->max_dl_queue_priority =
> @@ -4239,7 +4328,8 @@ vrb_bbdev_init(struct rte_bbdev *dev, struct rte_pci_driver *drv)
>   			d->reg_addr = &vrb1_pf_reg_addr;
>   		else
>   			d->reg_addr = &vrb1_vf_reg_addr;
> -	} else {
> +	} else if ((pci_dev->id.device_id == RTE_VRB2_PF_DEVICE_ID) ||
> +			(pci_dev->id.device_id == RTE_VRB2_VF_DEVICE_ID)) {
>   		d->device_variant = VRB2_VARIANT;
>   		d->queue_offset = vrb2_queue_offset;
>   		d->num_qgroups = VRB2_NUM_QGRPS;


^ permalink raw reply	[flat|nested] 13+ messages in thread

* RE: [PATCH v2 02/10] baseband/acc: queue allocation refactor
  2024-10-04 12:08   ` Maxime Coquelin
@ 2024-10-04 18:19     ` Chautru, Nicolas
  0 siblings, 0 replies; 13+ messages in thread
From: Chautru, Nicolas @ 2024-10-04 18:19 UTC (permalink / raw)
  To: Maxime Coquelin, Vargas, Hernan, dev, gakhil, trix; +Cc: Zhang, Qi Z

Hi Maxime, 

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Friday, October 4, 2024 5:08 AM
> To: Vargas, Hernan <hernan.vargas@intel.com>; dev@dpdk.org;
> gakhil@marvell.com; trix@redhat.com
> Cc: Chautru, Nicolas <nicolas.chautru@intel.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>
> Subject: Re: [PATCH v2 02/10] baseband/acc: queue allocation refactor
> 
> 
> 
> On 10/3/24 22:49, Hernan Vargas wrote:
> > Refactor to manage queue memory per operation more flexibly for VRB
> > devices.
> >
> > Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
> > ---
> >   drivers/baseband/acc/acc_common.h  |   5 +
> >   drivers/baseband/acc/rte_vrb_pmd.c | 214 ++++++++++++++++++++---------
> >   2 files changed, 157 insertions(+), 62 deletions(-)
> >
> > diff --git a/drivers/baseband/acc/acc_common.h
> > b/drivers/baseband/acc/acc_common.h
> > index b1f81e73e68d..adbac0dcca70 100644
> > --- a/drivers/baseband/acc/acc_common.h
> > +++ b/drivers/baseband/acc/acc_common.h
> > @@ -149,6 +149,8 @@
> >   #define VRB2_VF_ID_SHIFT     6
> >
> >   #define ACC_MAX_FFT_WIN      16
> > +#define ACC_MAX_RING_BUFFER  64
> > +#define VRB2_MAX_Q_PER_OP 256
> >
> >   extern int acc_common_logtype;
> >
> > @@ -581,6 +583,9 @@ struct acc_device {
> >   	void *sw_rings_base;  /* Base addr of un-aligned memory for sw rings
> */
> >   	void *sw_rings;  /* 64MBs of 64MB aligned memory for sw rings */
> >   	rte_iova_t sw_rings_iova;  /* IOVA address of sw_rings */
> > +	void *sw_rings_array[ACC_MAX_RING_BUFFER];  /* Array of aligned
> memory for sw rings. */
> > +	rte_iova_t sw_rings_iova_array[ACC_MAX_RING_BUFFER];  /* Array
> of sw_rings IOVA. */
> > +	uint32_t queue_index[ACC_MAX_RING_BUFFER]; /* Tracking queue
> index
> > +per ring buffer. */
> >   	/* Virtual address of the info memory routed to the this function under
> >   	 * operation, whether it is PF or VF.
> >   	 * HW may DMA information data at this location asynchronously diff
> > --git a/drivers/baseband/acc/rte_vrb_pmd.c
> > b/drivers/baseband/acc/rte_vrb_pmd.c
> > index bae01e563826..2c62a5b3e329 100644
> > --- a/drivers/baseband/acc/rte_vrb_pmd.c
> > +++ b/drivers/baseband/acc/rte_vrb_pmd.c
> > @@ -281,7 +281,7 @@ fetch_acc_config(struct rte_bbdev *dev)
> >   		/* Check the depth of the AQs. */
> >   		reg_len0 = acc_reg_read(d, d->reg_addr->depth_log0_offset);
> >   		reg_len1 = acc_reg_read(d, d->reg_addr->depth_log1_offset);
> > -		for (acc = 0; acc < NUM_ACC; acc++) {
> > +		for (acc = 0; acc < VRB1_NUM_ACCS; acc++) {
> >   			qtopFromAcc(&q_top, acc, acc_conf);
> >   			if (q_top->first_qgroup_index <
> ACC_NUM_QGRPS_PER_WORD)
> >   				q_top->aq_depth_log2 =
> > @@ -290,7 +290,7 @@ fetch_acc_config(struct rte_bbdev *dev)
> >   				q_top->aq_depth_log2 = (reg_len1 >> ((q_top-
> >first_qgroup_index -
> >
> 	ACC_NUM_QGRPS_PER_WORD) * 4)) & 0xF;
> >   		}
> > -	} else {
> > +	} else if (d->device_variant == VRB2_VARIANT) {
> >   		reg0 = acc_reg_read(d, d->reg_addr->qman_group_func);
> >   		reg1 = acc_reg_read(d, d->reg_addr->qman_group_func + 4);
> >   		reg2 = acc_reg_read(d, d->reg_addr->qman_group_func + 8);
> @@
> > -308,7 +308,7 @@ fetch_acc_config(struct rte_bbdev *dev)
> >   					idx = (reg2 >> ((qg %
> ACC_NUM_QGRPS_PER_WORD) * 4)) & 0x7;
> >   				else
> >   					idx = (reg3 >> ((qg %
> ACC_NUM_QGRPS_PER_WORD) * 4)) & 0x7;
> > -				if (idx < VRB_NUM_ACCS) {
> > +				if (idx < VRB2_NUM_ACCS) {
> >   					acc = qman_func_id[idx];
> >   					updateQtop(acc, qg, acc_conf, d);
> >   				}
> > @@ -321,7 +321,7 @@ fetch_acc_config(struct rte_bbdev *dev)
> >   		reg_len2 = acc_reg_read(d, d->reg_addr->depth_log0_offset +
> 8);
> >   		reg_len3 = acc_reg_read(d, d->reg_addr->depth_log0_offset +
> 12);
> >
> > -		for (acc = 0; acc < NUM_ACC; acc++) {
> > +		for (acc = 0; acc < VRB2_NUM_ACCS; acc++) {
> >   			qtopFromAcc(&q_top, acc, acc_conf);
> >   			if (q_top->first_qgroup_index /
> ACC_NUM_QGRPS_PER_WORD == 0)
> >   				q_top->aq_depth_log2 = (reg_len0 >> ((q_top-
> >first_qgroup_index
> > %
> 
> This function could be much heavily refactored.
> If we look at was is actuallt performed, VRB1 and VRB2 logic is the same, just a
> couple of value differs (they could be set at probe time).
> 
> I might propose something in the future.
> 
> > @@ -543,6 +543,7 @@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t
> num_queues, int socket_id)
> >   {
> >   	uint32_t phys_low, phys_high, value;
> >   	struct acc_device *d = dev->data->dev_private;
> > +	uint16_t queues_per_op, i;
> >   	int ret;
> >
> >   	if (d->pf_device && !d->acc_conf.pf_mode_en) { @@ -564,27 +565,37
> > @@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int
> socket_id)
> >   		return -ENODEV;
> >   	}
> >
> > -	alloc_sw_rings_min_mem(dev, d, num_queues, socket_id);
> > +	if (d->device_variant == VRB1_VARIANT) {
> > +		alloc_sw_rings_min_mem(dev, d, num_queues, socket_id);
> >
> > -	/* If minimal memory space approach failed, then allocate
> > -	 * the 2 * 64MB block for the sw rings.
> > -	 */
> > -	if (d->sw_rings == NULL)
> > -		alloc_2x64mb_sw_rings_mem(dev, d, socket_id);
> > +		/* If minimal memory space approach failed, then allocate
> > +		 * the 2 * 64MB block for the sw rings.
> > +		 */
> > +		if (d->sw_rings == NULL)
> > +			alloc_2x64mb_sw_rings_mem(dev, d, socket_id);
> >
> > -	if (d->sw_rings == NULL) {
> > -		rte_bbdev_log(NOTICE,
> > -				"Failure allocating sw_rings memory");
> > -		return -ENOMEM;
> > +		if (d->sw_rings == NULL) {
> > +			rte_bbdev_log(NOTICE, "Failure allocating sw_rings
> memory");
> > +			return -ENOMEM;
> > +		}
> > +	} else if (d->device_variant == VRB2_VARIANT) {
> > +		queues_per_op = RTE_MIN(VRB2_MAX_Q_PER_OP,
> num_queues);
> > +		for (i = 0; i <= RTE_BBDEV_OP_MLDTS; i++) {
> > +			alloc_sw_rings_min_mem(dev, d, queues_per_op,
> socket_id);
> > +			if (d->sw_rings == NULL) {
> > +				rte_bbdev_log(NOTICE, "Failure allocating
> sw_rings memory %d", i);
> > +				return -ENOMEM;
> > +			}
> > +			/* Moves the pointer to the relevant array. */
> > +			d->sw_rings_array[i] = d->sw_rings;
> > +			d->sw_rings_iova_array[i] = d->sw_rings_iova;
> > +			d->sw_rings = NULL;
> > +			d->sw_rings_base = NULL;
> > +			d->sw_rings_iova = 0;
> > +			d->queue_index[i] = 0;
> > +		}
> >   	}
> >
> > -	/* Configure device with the base address for DMA descriptor rings.
> > -	 * Same descriptor rings used for UL and DL DMA Engines.
> > -	 * Note : Assuming only VF0 bundle is used for PF mode.
> > -	 */
> > -	phys_high = (uint32_t)(d->sw_rings_iova >> 32);
> > -	phys_low  = (uint32_t)(d->sw_rings_iova & ~(ACC_SIZE_64MBYTE-1));
> > -
> >   	/* Read the populated cfg from device registers. */
> >   	fetch_acc_config(dev);
> >
> > @@ -599,20 +610,60 @@ vrb_setup_queues(struct rte_bbdev *dev,
> uint16_t num_queues, int socket_id)
> >   	if (d->pf_device)
> >   		acc_reg_write(d, VRB1_PfDmaAxiControl, 1);
> >
> > -	acc_reg_write(d, d->reg_addr->dma_ring_ul5g_hi, phys_high);
> > -	acc_reg_write(d, d->reg_addr->dma_ring_ul5g_lo, phys_low);
> > -	acc_reg_write(d, d->reg_addr->dma_ring_dl5g_hi, phys_high);
> > -	acc_reg_write(d, d->reg_addr->dma_ring_dl5g_lo, phys_low);
> > -	acc_reg_write(d, d->reg_addr->dma_ring_ul4g_hi, phys_high);
> > -	acc_reg_write(d, d->reg_addr->dma_ring_ul4g_lo, phys_low);
> > -	acc_reg_write(d, d->reg_addr->dma_ring_dl4g_hi, phys_high);
> > -	acc_reg_write(d, d->reg_addr->dma_ring_dl4g_lo, phys_low);
> > -	acc_reg_write(d, d->reg_addr->dma_ring_fft_hi, phys_high);
> > -	acc_reg_write(d, d->reg_addr->dma_ring_fft_lo, phys_low);
> > -	if (d->device_variant == VRB2_VARIANT) {
> > -		acc_reg_write(d, d->reg_addr->dma_ring_mld_hi, phys_high);
> > -		acc_reg_write(d, d->reg_addr->dma_ring_mld_lo, phys_low);
> > +	if (d->device_variant == VRB1_VARIANT) {
> > +		/* Configure device with the base address for DMA descriptor
> rings.
> > +		 * Same descriptor rings used for UL and DL DMA Engines.
> > +		 * Note : Assuming only VF0 bundle is used for PF mode.
> > +		 */
> > +		phys_high = (uint32_t)(d->sw_rings_iova >> 32);
> > +		phys_low  = (uint32_t)(d->sw_rings_iova &
> ~(ACC_SIZE_64MBYTE-1));
> > +		acc_reg_write(d, d->reg_addr->dma_ring_ul5g_hi, phys_high);
> > +		acc_reg_write(d, d->reg_addr->dma_ring_ul5g_lo, phys_low);
> > +		acc_reg_write(d, d->reg_addr->dma_ring_dl5g_hi, phys_high);
> > +		acc_reg_write(d, d->reg_addr->dma_ring_dl5g_lo, phys_low);
> > +		acc_reg_write(d, d->reg_addr->dma_ring_ul4g_hi, phys_high);
> > +		acc_reg_write(d, d->reg_addr->dma_ring_ul4g_lo, phys_low);
> > +		acc_reg_write(d, d->reg_addr->dma_ring_dl4g_hi, phys_high);
> > +		acc_reg_write(d, d->reg_addr->dma_ring_dl4g_lo, phys_low);
> > +		acc_reg_write(d, d->reg_addr->dma_ring_fft_hi, phys_high);
> > +		acc_reg_write(d, d->reg_addr->dma_ring_fft_lo, phys_low);
> > +	} else if (d->device_variant == VRB2_VARIANT) {
> > +		/* Configure device with the base address for DMA descriptor
> rings.
> > +		 * Different ring buffer used for each operation type.
> > +		 * Note : Assuming only VF0 bundle is used for PF mode.
> > +		 */
> > +		acc_reg_write(d, d->reg_addr->dma_ring_ul5g_hi,
> > +				(uint32_t)(d-
> >sw_rings_iova_array[RTE_BBDEV_OP_LDPC_DEC] >> 32));
> > +		acc_reg_write(d, d->reg_addr->dma_ring_ul5g_lo,
> > +				(uint32_t)(d-
> >sw_rings_iova_array[RTE_BBDEV_OP_LDPC_DEC]
> > +				& ~(ACC_SIZE_64MBYTE - 1)));
> > +		acc_reg_write(d, d->reg_addr->dma_ring_dl5g_hi,
> > +				(uint32_t)(d-
> >sw_rings_iova_array[RTE_BBDEV_OP_LDPC_ENC] >> 32));
> > +		acc_reg_write(d, d->reg_addr->dma_ring_dl5g_lo,
> > +				(uint32_t)(d-
> >sw_rings_iova_array[RTE_BBDEV_OP_LDPC_ENC]
> > +				& ~(ACC_SIZE_64MBYTE - 1)));
> > +		acc_reg_write(d, d->reg_addr->dma_ring_ul4g_hi,
> > +				(uint32_t)(d-
> >sw_rings_iova_array[RTE_BBDEV_OP_TURBO_DEC] >> 32));
> > +		acc_reg_write(d, d->reg_addr->dma_ring_ul4g_lo,
> > +				(uint32_t)(d-
> >sw_rings_iova_array[RTE_BBDEV_OP_TURBO_DEC]
> > +				& ~(ACC_SIZE_64MBYTE - 1)));
> > +		acc_reg_write(d, d->reg_addr->dma_ring_dl4g_hi,
> > +				(uint32_t)(d-
> >sw_rings_iova_array[RTE_BBDEV_OP_TURBO_ENC] >> 32));
> > +		acc_reg_write(d, d->reg_addr->dma_ring_dl4g_lo,
> > +				(uint32_t)(d-
> >sw_rings_iova_array[RTE_BBDEV_OP_TURBO_ENC]
> > +				& ~(ACC_SIZE_64MBYTE - 1)));
> > +		acc_reg_write(d, d->reg_addr->dma_ring_fft_hi,
> > +				(uint32_t)(d-
> >sw_rings_iova_array[RTE_BBDEV_OP_FFT] >> 32));
> > +		acc_reg_write(d, d->reg_addr->dma_ring_fft_lo,
> > +				(uint32_t)(d-
> >sw_rings_iova_array[RTE_BBDEV_OP_FFT]
> > +				& ~(ACC_SIZE_64MBYTE - 1)));
> > +		acc_reg_write(d, d->reg_addr->dma_ring_mld_hi,
> > +				(uint32_t)(d-
> >sw_rings_iova_array[RTE_BBDEV_OP_MLDTS] >> 32));
> > +		acc_reg_write(d, d->reg_addr->dma_ring_mld_lo,
> > +				(uint32_t)(d-
> >sw_rings_iova_array[RTE_BBDEV_OP_MLDTS]
> > +				& ~(ACC_SIZE_64MBYTE - 1)));
> >   	}
> > +
> >   	/*
> >   	 * Configure Ring Size to the max queue ring size
> >   	 * (used for wrapping purpose).
> > @@ -636,19 +687,21 @@ vrb_setup_queues(struct rte_bbdev *dev,
> uint16_t
> > num_queues, int socket_id)
> >
> >   	phys_high = (uint32_t)(d->tail_ptr_iova >> 32);
> >   	phys_low  = (uint32_t)(d->tail_ptr_iova);
> > -	acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_hi, phys_high);
> > -	acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_lo, phys_low);
> > -	acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_hi, phys_high);
> > -	acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_lo, phys_low);
> > -	acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_hi, phys_high);
> > -	acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_lo, phys_low);
> > -	acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_hi, phys_high);
> > -	acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_lo, phys_low);
> > -	acc_reg_write(d, d->reg_addr->tail_ptrs_fft_hi, phys_high);
> > -	acc_reg_write(d, d->reg_addr->tail_ptrs_fft_lo, phys_low);
> > -	if (d->device_variant == VRB2_VARIANT) {
> > -		acc_reg_write(d, d->reg_addr->tail_ptrs_mld_hi, phys_high);
> > -		acc_reg_write(d, d->reg_addr->tail_ptrs_mld_lo, phys_low);
> > +	{
> > +		acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_hi, phys_high);
> > +		acc_reg_write(d, d->reg_addr->tail_ptrs_ul5g_lo, phys_low);
> > +		acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_hi, phys_high);
> > +		acc_reg_write(d, d->reg_addr->tail_ptrs_dl5g_lo, phys_low);
> > +		acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_hi, phys_high);
> > +		acc_reg_write(d, d->reg_addr->tail_ptrs_ul4g_lo, phys_low);
> > +		acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_hi, phys_high);
> > +		acc_reg_write(d, d->reg_addr->tail_ptrs_dl4g_lo, phys_low);
> > +		acc_reg_write(d, d->reg_addr->tail_ptrs_fft_hi, phys_high);
> > +		acc_reg_write(d, d->reg_addr->tail_ptrs_fft_lo, phys_low);
> > +		if (d->device_variant == VRB2_VARIANT) {
> > +			acc_reg_write(d, d->reg_addr->tail_ptrs_mld_hi,
> phys_high);
> > +			acc_reg_write(d, d->reg_addr->tail_ptrs_mld_lo,
> phys_low);
> > +		}
> >   	}
> >
> >   	ret = allocate_info_ring(dev);
> > @@ -684,8 +737,13 @@ vrb_setup_queues(struct rte_bbdev *dev, uint16_t
> num_queues, int socket_id)
> >   	rte_free(d->tail_ptrs);
> >   	d->tail_ptrs = NULL;
> >   free_sw_rings:
> > -	rte_free(d->sw_rings_base);
> > -	d->sw_rings = NULL;
> > +	if (d->device_variant == VRB1_VARIANT) {
> > +		rte_free(d->sw_rings_base);
> > +		d->sw_rings = NULL;
> 
> It was not caught initially, but it looks akward to free sw_rings_base, and then
> set sw_rings to NULL.
> sw_rings_based should also be set to NULL.
> 
> > +	} else if (d->device_variant == VRB2_VARIANT) {
> > +		for (i = 0; i <= RTE_BBDEV_OP_MLDTS; i++)
> > +			rte_free(d->sw_rings_array[i]);
> 
> Same here, you should set sw_rings_array[i] to NULL to avoid double free
> later.

I believe this is more historical and orthogonal to that commit. 
I would prefer to clean that through another serie and apply same BKM consistently, and would not want to rush it.
OK with you?

> 
> > +	}
> >
> >   	return ret;
> >   }
> > @@ -809,17 +867,34 @@ vrb_intr_enable(struct rte_bbdev *dev)
> >   static int
> >   vrb_dev_close(struct rte_bbdev *dev)
> >   {
> > +	int i;
> >   	struct acc_device *d = dev->data->dev_private;
> > +
> >   	vrb_check_ir(d);
> > -	if (d->sw_rings_base != NULL) {
> > -		rte_free(d->tail_ptrs);
> > -		rte_free(d->info_ring);
> > -		rte_free(d->sw_rings_base);
> > -		rte_free(d->harq_layout);
> > -		d->tail_ptrs = NULL;
> > -		d->info_ring = NULL;
> > -		d->sw_rings_base = NULL;
> > -		d->harq_layout = NULL;
> > +	if (d->device_variant == VRB1_VARIANT) {
> > +		if (d->sw_rings_base != NULL) {
> > +			rte_free(d->tail_ptrs);
> > +			rte_free(d->info_ring);
> > +			rte_free(d->sw_rings_base);
> > +			rte_free(d->harq_layout);
> > +			d->tail_ptrs = NULL;
> > +			d->info_ring = NULL;
> > +			d->sw_rings_base = NULL;
> > +			d->harq_layout = NULL;
> > +		}
> 
> Actually, it would be cleanner to perform the free operations systematically, no
> need to check whether sw_rings_base is NULL.

Noted as above.

> 
> > +	} else if (d->device_variant == VRB2_VARIANT) {
> > +		if (d->sw_rings_array[1] != NULL) {
> 
> Why 1 and not 0? And as mentionned above this can be done unconditionnally.

Thanks. We can check to start from index zero indeed to match the allocation. 
Arguably as an aside the queue zero is a special one but we can optimize this a bit in the future. 
Thanks

> 
> > +			rte_free(d->tail_ptrs);
> > +			rte_free(d->info_ring);
> > +			rte_free(d->harq_layout);
> > +			d->tail_ptrs = NULL;
> > +			d->info_ring = NULL;
> > +			d->harq_layout = NULL;
> > +			for (i = 0; i <= RTE_BBDEV_OP_MLDTS; i++) {
> > +				rte_free(d->sw_rings_array[i]);
> > +				d->sw_rings_array[i] = NULL;
> > +			}
> > +		}
> >   	}
> >   	/* Ensure all in flight HW transactions are completed. */
> >   	usleep(ACC_LONG_WAIT);
> > @@ -890,8 +965,16 @@ vrb_queue_setup(struct rte_bbdev *dev, uint16_t
> queue_id,
> >   	}
> >
> >   	q->d = d;
> > -	q->ring_addr = RTE_PTR_ADD(d->sw_rings, (d->sw_ring_size *
> queue_id));
> > -	q->ring_addr_iova = d->sw_rings_iova + (d->sw_ring_size * queue_id);
> > +	if (d->device_variant == VRB1_VARIANT) {
> > +		q->ring_addr = RTE_PTR_ADD(d->sw_rings, (d->sw_ring_size *
> queue_id));
> > +		q->ring_addr_iova = d->sw_rings_iova + (d->sw_ring_size *
> queue_id);
> > +	} else if (d->device_variant == VRB2_VARIANT) {
> > +		q->ring_addr = RTE_PTR_ADD(d->sw_rings_array[conf-
> >op_type],
> > +				(d->sw_ring_size * d->queue_index[conf-
> >op_type]));
> > +		q->ring_addr_iova = d->sw_rings_iova_array[conf->op_type] +
> > +				(d->sw_ring_size * d->queue_index[conf-
> >op_type]);
> > +		d->queue_index[conf->op_type]++;
> > +	}
> >
> >   	/* Prepare the Ring with default descriptor format. */
> >   	union acc_dma_desc *desc = NULL;
> > @@ -1347,8 +1430,14 @@ vrb_dev_info_get(struct rte_bbdev *dev, struct
> rte_bbdev_driver_info *dev_info)
> >   	dev_info->queue_priority[RTE_BBDEV_OP_FFT] = d-
> >acc_conf.q_fft.num_qgroups;
> >   	dev_info->queue_priority[RTE_BBDEV_OP_MLDTS] = d-
> >acc_conf.q_mld.num_qgroups;
> >   	dev_info->max_num_queues = 0;
> > -	for (i = RTE_BBDEV_OP_NONE; i <= RTE_BBDEV_OP_MLDTS; i++)
> > +	for (i = RTE_BBDEV_OP_NONE; i <= RTE_BBDEV_OP_MLDTS; i++) {
> > +		if (unlikely(dev_info->num_queues[i] >
> VRB2_MAX_Q_PER_OP)) {
> > +			rte_bbdev_log(ERR, "Unexpected number of queues
> %d exposed for op %d",
> > +					dev_info->num_queues[i], i);
> > +			dev_info->num_queues[i] = VRB2_MAX_Q_PER_OP;
> > +		}
> >   		dev_info->max_num_queues += dev_info->num_queues[i];
> > +	}
> >   	dev_info->queue_size_lim = ACC_MAX_QUEUE_DEPTH;
> >   	dev_info->hardware_accelerated = true;
> >   	dev_info->max_dl_queue_priority =
> > @@ -4239,7 +4328,8 @@ vrb_bbdev_init(struct rte_bbdev *dev, struct
> rte_pci_driver *drv)
> >   			d->reg_addr = &vrb1_pf_reg_addr;
> >   		else
> >   			d->reg_addr = &vrb1_vf_reg_addr;
> > -	} else {
> > +	} else if ((pci_dev->id.device_id == RTE_VRB2_PF_DEVICE_ID) ||
> > +			(pci_dev->id.device_id == RTE_VRB2_VF_DEVICE_ID)) {
> >   		d->device_variant = VRB2_VARIANT;
> >   		d->queue_offset = vrb2_queue_offset;
> >   		d->num_qgroups = VRB2_NUM_QGRPS;


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2024-10-04 18:19 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-10-03 20:49 [PATCH v2 00/10] acc baseband PMD fix and updates for 24.11 Hernan Vargas
2024-10-03 20:49 ` [PATCH v2 01/10] baseband/acc: fix access to deallocated mem Hernan Vargas
2024-10-03 20:49 ` [PATCH v2 02/10] baseband/acc: queue allocation refactor Hernan Vargas
2024-10-04 12:08   ` Maxime Coquelin
2024-10-04 18:19     ` Chautru, Nicolas
2024-10-03 20:49 ` [PATCH v2 03/10] baseband/acc: configure max queues per device Hernan Vargas
2024-10-03 20:49 ` [PATCH v2 04/10] baseband/acc: future proof structure comparison Hernan Vargas
2024-10-03 20:49 ` [PATCH v2 05/10] baseband/acc: enhance SW ring alignment Hernan Vargas
2024-10-03 20:49 ` [PATCH v2 06/10] baseband/acc: remove soft output bypass Hernan Vargas
2024-10-03 20:49 ` [PATCH v2 07/10] baseband/acc: algorithm tuning for LDPC decoder Hernan Vargas
2024-10-03 20:49 ` [PATCH v2 08/10] baseband/acc: remove check on HARQ memory Hernan Vargas
2024-10-03 20:49 ` [PATCH v2 09/10] baseband/acc: reset ring data valid bit Hernan Vargas
2024-10-03 20:49 ` [PATCH v2 10/10] baseband/acc: cosmetic changes Hernan Vargas

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).