DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] net/mlx5: fix aging queue doorbell ringing
@ 2020-11-03 10:38 Dekel Peled
  2020-11-08  9:11 ` Raslan Darawsheh
  0 siblings, 1 reply; 2+ messages in thread
From: Dekel Peled @ 2020-11-03 10:38 UTC (permalink / raw)
  To: viacheslavo, shahafs, matan; +Cc: dev

Recent patch introduced a new SQ for ASO flow hit management.
This SQ uses two WQEBB's for each WQE.
The SQ producer index is 16 bits wide.

The enqueue loop posts new WQEs to the ASO SQ, using WQE index for
the SQ management.
This 16 bits index multiplied by 2 was wrongly used also for SQ
doorbell ringing.
The multiplication caused the SW index overlapping to be out of sync
with the hardware index, causing it to get stuck.

This patch separates the WQE index management from the doorbell index
management.
So, for each WQE index incrementation by 1, the doorbell index is
incremented by 2.

Fixes: 18c88cf29c29 ("net/mlx5: support flow hit action for aging")

Signed-off-by: Dekel Peled <dekelp@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
 drivers/common/mlx5/mlx5_prm.h   | 21 +++++++++++++------
 drivers/net/mlx5/mlx5.h          |  3 ++-
 drivers/net/mlx5/mlx5_flow_age.c | 36 ++++++++++++++++++--------------
 3 files changed, 37 insertions(+), 23 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 10f9b18d1b..58d180486e 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -293,6 +293,15 @@ struct mlx5_wqe_cseg {
 	uint32_t misc;
 } __rte_packed __rte_aligned(MLX5_WSEG_SIZE);
 
+/*
+ * WQE CSEG opcode field size is 32 bits, divided:
+ * Bits 31:24 OPC_MOD
+ * Bits 23:8 wqe_index
+ * Bits 7:0 OPCODE
+ */
+#define WQE_CSEG_OPC_MOD_OFFSET		24
+#define WQE_CSEG_WQE_INDEX_OFFSET	 8
+
 /* Header of data segment. Minimal size Data Segment */
 struct mlx5_wqe_dseg {
 	uint32_t bcount;
@@ -2359,12 +2368,12 @@ struct mlx5_ifc_create_flow_hit_aso_in_bits {
 	struct mlx5_ifc_flow_hit_aso_bits flow_hit_aso;
 };
 
-enum mlx5_access_aso_op_mod {
-	ASO_OP_MOD_IPSEC = 0x0,
-	ASO_OP_MOD_CONNECTION_TRACKING = 0x1,
-	ASO_OP_MOD_POLICER = 0x2,
-	ASO_OP_MOD_RACE_AVOIDANCE = 0x3,
-	ASO_OP_MOD_FLOW_HIT = 0x4,
+enum mlx5_access_aso_opc_mod {
+	ASO_OPC_MOD_IPSEC = 0x0,
+	ASO_OPC_MOD_CONNECTION_TRACKING = 0x1,
+	ASO_OPC_MOD_POLICER = 0x2,
+	ASO_OPC_MOD_RACE_AVOIDANCE = 0x3,
+	ASO_OPC_MOD_FLOW_HIT = 0x4,
 };
 
 #define ASO_CSEG_DATA_MASK_MODE_OFFSET	30
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 63d263384b..83beee3610 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -516,7 +516,8 @@ struct mlx5_aso_sq {
 	volatile uint64_t *uar_addr;
 	struct mlx5_aso_devx_mr mr;
 	uint16_t pi;
-	uint16_t ci;
+	uint32_t head;
+	uint32_t tail;
 	uint32_t sqn;
 	struct mlx5_aso_sq_elem elts[1 << MLX5_ASO_QUEUE_LOG_DESC];
 	uint16_t next; /* Pool index of the next pool to query. */
diff --git a/drivers/net/mlx5/mlx5_flow_age.c b/drivers/net/mlx5/mlx5_flow_age.c
index 0b7fa46e2a..829094d9cf 100644
--- a/drivers/net/mlx5/mlx5_flow_age.c
+++ b/drivers/net/mlx5/mlx5_flow_age.c
@@ -321,8 +321,9 @@ mlx5_aso_sq_create(void *ctx, struct mlx5_aso_sq *sq, int socket,
 		rte_errno  = ENOMEM;
 		goto error;
 	}
-	sq->ci = 0;
 	sq->pi = 0;
+	sq->head = 0;
+	sq->tail = 0;
 	sq->sqn = sq->sq->id;
 	sq->db_rec = RTE_PTR_ADD(sq->umem_buf, (uintptr_t)(wq_attr->dbr_addr));
 	sq->uar_addr = (volatile uint64_t *)((uint8_t *)sq->uar_obj->base_addr +
@@ -382,20 +383,20 @@ mlx5_aso_sq_enqueue_burst(struct mlx5_aso_age_mng *mng, uint16_t n)
 	uint16_t size = 1 << sq->log_desc_n;
 	uint16_t mask = size - 1;
 	uint16_t max;
-	uint16_t start_pi = sq->pi;
+	uint16_t start_head = sq->head;
 
-	max = RTE_MIN(size - (uint16_t)(sq->pi - sq->ci), n - sq->next);
+	max = RTE_MIN(size - (uint16_t)(sq->head - sq->tail), n - sq->next);
 	if (unlikely(!max))
 		return 0;
-	sq->elts[start_pi & mask].burst_size = max;
+	sq->elts[start_head & mask].burst_size = max;
 	do {
-		wqe = &sq->wqes[sq->pi & mask];
-		rte_prefetch0(&sq->wqes[(sq->pi + 1) & mask]);
+		wqe = &sq->wqes[sq->head & mask];
+		rte_prefetch0(&sq->wqes[(sq->head + 1) & mask]);
 		/* Fill next WQE. */
 		rte_spinlock_lock(&mng->resize_sl);
 		pool = mng->pools[sq->next];
 		rte_spinlock_unlock(&mng->resize_sl);
-		sq->elts[sq->pi & mask].pool = pool;
+		sq->elts[sq->head & mask].pool = pool;
 		wqe->general_cseg.misc =
 				rte_cpu_to_be_32(((struct mlx5_devx_obj *)
 						 (pool->flow_hit_aso_obj))->id);
@@ -403,20 +404,23 @@ mlx5_aso_sq_enqueue_burst(struct mlx5_aso_age_mng *mng, uint16_t n)
 							 MLX5_COMP_MODE_OFFSET);
 		wqe->general_cseg.opcode = rte_cpu_to_be_32
 						(MLX5_OPCODE_ACCESS_ASO |
-						 ASO_OP_MOD_FLOW_HIT << 24 |
-						 sq->pi << 9);
-		sq->pi++;
+						 (ASO_OPC_MOD_FLOW_HIT <<
+						  WQE_CSEG_OPC_MOD_OFFSET) |
+						 (sq->pi <<
+						  WQE_CSEG_WQE_INDEX_OFFSET));
+		sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
+		sq->head++;
 		sq->next++;
 		max--;
 	} while (max);
 	wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
 							 MLX5_COMP_MODE_OFFSET);
 	rte_io_wmb();
-	sq->db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi << 1);
+	sq->db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
 	rte_wmb();
 	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH.*/
 	rte_wmb();
-	return sq->elts[start_pi & mask].burst_size;
+	return sq->elts[start_head & mask].burst_size;
 }
 
 /**
@@ -483,7 +487,7 @@ mlx5_aso_age_action_update(struct mlx5_dev_ctx_shared *sh, uint16_t n)
 	uint16_t i;
 
 	for (i = 0; i < n; ++i) {
-		uint16_t idx = (sq->ci + i) & mask;
+		uint16_t idx = (sq->tail + i) & mask;
 		struct mlx5_aso_age_pool *pool = sq->elts[idx].pool;
 		uint64_t diff = curr - pool->time_of_last_age_check;
 		uint64_t *addr = sq->mr.buf;
@@ -559,7 +563,7 @@ mlx5_aso_completion_handle(struct mlx5_dev_ctx_shared *sh)
 	const unsigned int mask = cq_size - 1;
 	uint32_t idx;
 	uint32_t next_idx = cq->cq_ci & mask;
-	const uint16_t max = (uint16_t)(sq->pi - sq->ci);
+	const uint16_t max = (uint16_t)(sq->head - sq->tail);
 	uint16_t i = 0;
 	int ret;
 	if (unlikely(!max))
@@ -580,13 +584,13 @@ mlx5_aso_completion_handle(struct mlx5_dev_ctx_shared *sh)
 				break;
 			mlx5_aso_cqe_err_handle(sq);
 		} else {
-			i += sq->elts[(sq->ci + i) & mask].burst_size;
+			i += sq->elts[(sq->tail + i) & mask].burst_size;
 		}
 		cq->cq_ci++;
 	} while (1);
 	if (likely(i)) {
 		mlx5_aso_age_action_update(sh, i);
-		sq->ci += i;
+		sq->tail += i;
 		rte_io_wmb();
 		cq->db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
 	}
-- 
2.25.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [dpdk-dev] [PATCH] net/mlx5: fix aging queue doorbell ringing
  2020-11-03 10:38 [dpdk-dev] [PATCH] net/mlx5: fix aging queue doorbell ringing Dekel Peled
@ 2020-11-08  9:11 ` Raslan Darawsheh
  0 siblings, 0 replies; 2+ messages in thread
From: Raslan Darawsheh @ 2020-11-08  9:11 UTC (permalink / raw)
  To: Dekel Peled, Slava Ovsiienko, Shahaf Shuler, Matan Azrad; +Cc: dev

Hi,

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Dekel Peled
> Sent: Tuesday, November 3, 2020 12:38 PM
> To: Slava Ovsiienko <viacheslavo@nvidia.com>; Shahaf Shuler
> <shahafs@nvidia.com>; Matan Azrad <matan@nvidia.com>
> Cc: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH] net/mlx5: fix aging queue doorbell ringing
> 
> Recent patch introduced a new SQ for ASO flow hit management.
> This SQ uses two WQEBB's for each WQE.
> The SQ producer index is 16 bits wide.
> 
> The enqueue loop posts new WQEs to the ASO SQ, using WQE index for
> the SQ management.
> This 16 bits index multiplied by 2 was wrongly used also for SQ
> doorbell ringing.
> The multiplication caused the SW index overlapping to be out of sync
> with the hardware index, causing it to get stuck.
> 
> This patch separates the WQE index management from the doorbell index
> management.
> So, for each WQE index incrementation by 1, the doorbell index is
> incremented by 2.
> 
> Fixes: 18c88cf29c29 ("net/mlx5: support flow hit action for aging")
> 
> Signed-off-by: Dekel Peled <dekelp@nvidia.com>
> Acked-by: Matan Azrad <matan@nvidia.com>
> ---
>  drivers/common/mlx5/mlx5_prm.h   | 21 +++++++++++++------
>  drivers/net/mlx5/mlx5.h          |  3 ++-
>  drivers/net/mlx5/mlx5_flow_age.c | 36 ++++++++++++++++++--------------
>  3 files changed, 37 insertions(+), 23 deletions(-)
> 

Patch applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2020-11-08  9:11 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-03 10:38 [dpdk-dev] [PATCH] net/mlx5: fix aging queue doorbell ringing Dekel Peled
2020-11-08  9:11 ` Raslan Darawsheh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).