* [PATCH] drivers: fix error CQE handling
@ 2023-05-02 18:49 Alexander Kozyrev
2024-10-03 19:14 ` Stephen Hemminger
2024-10-03 20:26 ` [PATCH v2] " Alexander Kozyrev
0 siblings, 2 replies; 6+ messages in thread
From: Alexander Kozyrev @ 2023-05-02 18:49 UTC (permalink / raw)
To: dev; +Cc: stable, rasland, viacheslavo, matan
The completion queue element size can be independently configured
to report either 64 or 128 bytes CQEs by programming cqe_sz parameter
at CQ creation. This parameter depends on the cache line size and
affects both regular CQEs and error CQEs. But the error handling
assumes that an error CQE is 64 bytes and doesn't take the padding
into consideration on platforms with 128-byte cache lines.
Fix the error CQE size in all error handling routines in mlx5.
Fixes: 957e45fb7b ("net/mlx5: handle Tx completion with error")
Cc: stable@dpdk.org
Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
---
drivers/common/mlx5/mlx5_prm.h | 29 ++++++++++++++++++++-
drivers/common/mlx5/windows/mlx5_win_defs.h | 12 ---------
drivers/compress/mlx5/mlx5_compress.c | 4 +--
drivers/crypto/mlx5/mlx5_crypto.c | 2 +-
drivers/net/mlx5/mlx5_flow_aso.c | 6 ++---
drivers/net/mlx5/mlx5_rx.c | 2 +-
drivers/net/mlx5/mlx5_tx.c | 8 +++---
7 files changed, 39 insertions(+), 24 deletions(-)
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index ed3d5efbb7..505ff3cc8e 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -268,8 +268,12 @@
/* Maximum number of DS in WQE. Limited by 6-bit field. */
#define MLX5_DSEG_MAX 63
-/* The 32 bit syndrome offset in struct mlx5_err_cqe. */
+/* The 32 bit syndrome offset in struct mlx5_error_cqe. */
+#if (RTE_CACHE_LINE_SIZE == 128)
+#define MLX5_ERROR_CQE_SYNDROME_OFFSET 116
+#else
#define MLX5_ERROR_CQE_SYNDROME_OFFSET 52
+#endif
/* The completion mode offset in the WQE control segment line 2. */
#define MLX5_COMP_MODE_OFFSET 2
@@ -415,6 +419,29 @@ struct mlx5_wqe_mprq {
#define MLX5_MPRQ_STRIDE_SHIFT_BYTE 2
+struct mlx5_error_cqe {
+#if (RTE_CACHE_LINE_SIZE == 128)
+ uint8_t padding[64];
+#endif
+ uint8_t rsvd0[2];
+ uint16_t eth_wqe_id;
+ uint8_t rsvd1[16];
+ uint16_t ib_stride_index;
+ uint8_t rsvd2[10];
+ uint32_t srqn;
+ uint8_t rsvd3[8];
+ uint32_t byte_cnt;
+ uint8_t rsvd4[4];
+ uint8_t hw_err_synd;
+ uint8_t hw_synd_type;
+ uint8_t vendor_err_synd;
+ uint8_t syndrome;
+ uint32_t s_wqe_opcode_qpn;
+ uint16_t wqe_counter;
+ uint8_t signature;
+ uint8_t op_own;
+};
+
/* CQ element structure - should be equal to the cache line size */
struct mlx5_cqe {
#if (RTE_CACHE_LINE_SIZE == 128)
diff --git a/drivers/common/mlx5/windows/mlx5_win_defs.h b/drivers/common/mlx5/windows/mlx5_win_defs.h
index 65da820c5e..1ddf5c553d 100644
--- a/drivers/common/mlx5/windows/mlx5_win_defs.h
+++ b/drivers/common/mlx5/windows/mlx5_win_defs.h
@@ -219,18 +219,6 @@ struct mlx5_action {
} dest_tir;
};
-struct mlx5_err_cqe {
- uint8_t rsvd0[32];
- uint32_t srqn;
- uint8_t rsvd1[18];
- uint8_t vendor_err_synd;
- uint8_t syndrome;
- uint32_t s_wqe_opcode_qpn;
- uint16_t wqe_counter;
- uint8_t signature;
- uint8_t op_own;
-};
-
struct mlx5_wqe_srq_next_seg {
uint8_t rsvd0[2];
rte_be16_t next_wqe_index;
diff --git a/drivers/compress/mlx5/mlx5_compress.c b/drivers/compress/mlx5/mlx5_compress.c
index 41d9752833..702108c5f9 100644
--- a/drivers/compress/mlx5/mlx5_compress.c
+++ b/drivers/compress/mlx5/mlx5_compress.c
@@ -602,7 +602,7 @@ mlx5_compress_dump_err_objs(volatile uint32_t *cqe, volatile uint32_t *wqe,
size_t i;
DRV_LOG(ERR, "Error cqe:");
- for (i = 0; i < sizeof(struct mlx5_err_cqe) >> 2; i += 4)
+ for (i = 0; i < sizeof(struct mlx5_error_cqe) >> 2; i += 4)
DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
cqe[i + 2], cqe[i + 3]);
DRV_LOG(ERR, "\nError wqe:");
@@ -620,7 +620,7 @@ mlx5_compress_cqe_err_handle(struct mlx5_compress_qp *qp,
struct rte_comp_op *op)
{
const uint32_t idx = qp->ci & (qp->entries_n - 1);
- volatile struct mlx5_err_cqe *cqe = (volatile struct mlx5_err_cqe *)
+ volatile struct mlx5_error_cqe *cqe = (volatile struct mlx5_error_cqe *)
&qp->cq.cqes[idx];
volatile struct mlx5_gga_wqe *wqes = (volatile struct mlx5_gga_wqe *)
qp->qp.wqes;
diff --git a/drivers/crypto/mlx5/mlx5_crypto.c b/drivers/crypto/mlx5/mlx5_crypto.c
index 5267f48c1e..ab2ffe6d7f 100644
--- a/drivers/crypto/mlx5/mlx5_crypto.c
+++ b/drivers/crypto/mlx5/mlx5_crypto.c
@@ -460,7 +460,7 @@ static __rte_noinline void
mlx5_crypto_cqe_err_handle(struct mlx5_crypto_qp *qp, struct rte_crypto_op *op)
{
const uint32_t idx = qp->ci & (qp->entries_n - 1);
- volatile struct mlx5_err_cqe *cqe = (volatile struct mlx5_err_cqe *)
+ volatile struct mlx5_error_cqe *cqe = (volatile struct mlx5_error_cqe *)
&qp->cq_obj.cqes[idx];
op->status = RTE_CRYPTO_OP_STATUS_ERROR;
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 29bd7ce9e8..c2f9b2d507 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -489,7 +489,7 @@ mlx5_aso_dump_err_objs(volatile uint32_t *cqe, volatile uint32_t *wqe)
int i;
DRV_LOG(ERR, "Error cqe:");
- for (i = 0; i < 16; i += 4)
+ for (i = 0; i < (int)sizeof(struct mlx5_error_cqe) / 4; i += 4)
DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
cqe[i + 2], cqe[i + 3]);
DRV_LOG(ERR, "\nError wqe:");
@@ -509,8 +509,8 @@ mlx5_aso_cqe_err_handle(struct mlx5_aso_sq *sq)
{
struct mlx5_aso_cq *cq = &sq->cq;
uint32_t idx = cq->cq_ci & ((1 << cq->log_desc_n) - 1);
- volatile struct mlx5_err_cqe *cqe =
- (volatile struct mlx5_err_cqe *)&cq->cq_obj.cqes[idx];
+ volatile struct mlx5_error_cqe *cqe =
+ (volatile struct mlx5_error_cqe *)&cq->cq_obj.cqes[idx];
cq->errors++;
idx = rte_be_to_cpu_16(cqe->wqe_counter) & (1u << sq->log_desc_n);
diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
index a2be523e9e..69fd173239 100644
--- a/drivers/net/mlx5/mlx5_rx.c
+++ b/drivers/net/mlx5/mlx5_rx.c
@@ -459,7 +459,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec,
container_of(rxq, struct mlx5_rxq_ctrl, rxq);
union {
volatile struct mlx5_cqe *cqe;
- volatile struct mlx5_err_cqe *err_cqe;
+ volatile struct mlx5_error_cqe *err_cqe;
} u = {
.cqe = &(*rxq->cqes)[(rxq->cq_ci - vec) & cqe_mask],
};
diff --git a/drivers/net/mlx5/mlx5_tx.c b/drivers/net/mlx5/mlx5_tx.c
index 14e1487e59..8b1a0ca3d3 100644
--- a/drivers/net/mlx5/mlx5_tx.c
+++ b/drivers/net/mlx5/mlx5_tx.c
@@ -55,7 +55,7 @@ tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl)
/* Return 1 if the error CQE is signed otherwise, sign it and return 0. */
static int
-check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe)
+check_err_cqe_seen(volatile struct mlx5_error_cqe *err_cqe)
{
static const uint8_t magic[] = "seen";
int ret = 1;
@@ -83,7 +83,7 @@ check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe)
*/
static int
mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq,
- volatile struct mlx5_err_cqe *err_cqe)
+ volatile struct mlx5_error_cqe *err_cqe)
{
if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) {
const uint16_t wqe_m = ((1 << txq->wqe_n) - 1);
@@ -107,7 +107,7 @@ mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq,
mlx5_dump_debug_information(name, "MLX5 Error CQ:",
(const void *)((uintptr_t)
txq->cqes),
- sizeof(struct mlx5_cqe) *
+ sizeof(struct mlx5_error_cqe) *
(1 << txq->cqe_n));
mlx5_dump_debug_information(name, "MLX5 Error SQ:",
(const void *)((uintptr_t)
@@ -206,7 +206,7 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq,
*/
rte_wmb();
ret = mlx5_tx_error_cqe_handle
- (txq, (volatile struct mlx5_err_cqe *)cqe);
+ (txq, (volatile struct mlx5_error_cqe *)cqe);
if (unlikely(ret < 0)) {
/*
* Some error occurred on queue error
--
2.18.2
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] drivers: fix error CQE handling
2023-05-02 18:49 [PATCH] drivers: fix error CQE handling Alexander Kozyrev
@ 2024-10-03 19:14 ` Stephen Hemminger
2024-10-03 20:26 ` [PATCH v2] " Alexander Kozyrev
1 sibling, 0 replies; 6+ messages in thread
From: Stephen Hemminger @ 2024-10-03 19:14 UTC (permalink / raw)
To: Alexander Kozyrev; +Cc: dev, stable, rasland, viacheslavo, matan
On Tue, 2 May 2023 21:49:22 +0300
Alexander Kozyrev <akozyrev@nvidia.com> wrote:
> The completion queue element size can be independently configured
> to report either 64 or 128 bytes CQEs by programming cqe_sz parameter
> at CQ creation. This parameter depends on the cache line size and
> affects both regular CQEs and error CQEs. But the error handling
> assumes that an error CQE is 64 bytes and doesn't take the padding
> into consideration on platforms with 128-byte cache lines.
> Fix the error CQE size in all error handling routines in mlx5.
>
> Fixes: 957e45fb7b ("net/mlx5: handle Tx completion with error")
> Cc: stable@dpdk.org
>
> Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
I have no idea about internals of mlx5 but the concept seems fine.
Patch needs to be rebased.
Please title it with net/mlx5 not drivers.
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH v2] drivers: fix error CQE handling
2023-05-02 18:49 [PATCH] drivers: fix error CQE handling Alexander Kozyrev
2024-10-03 19:14 ` Stephen Hemminger
@ 2024-10-03 20:26 ` Alexander Kozyrev
2024-10-28 15:58 ` Slava Ovsiienko
2024-10-28 17:17 ` [PATCH v3] drivers: fix error CQE handling for 128 bytes CQE Alexander Kozyrev
1 sibling, 2 replies; 6+ messages in thread
From: Alexander Kozyrev @ 2024-10-03 20:26 UTC (permalink / raw)
To: dev; +Cc: stable, rasland, viacheslavo, matan, stephen
The completion queue element size can be independently configured
to report either 64 or 128 bytes CQEs by programming cqe_sz parameter
at CQ creation. This parameter depends on the cache line size and
affects both regular CQEs and error CQEs. But the error handling
assumes that an error CQE is 64 bytes and doesn't take the padding
into consideration on platforms with 128-byte cache lines.
Fix the error CQE size in all error handling routines in mlx5.
Fixes: 957e45fb7b ("net/mlx5: handle Tx completion with error")
Cc: stable@dpdk.org
Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
---
drivers/common/mlx5/mlx5_prm.h | 29 ++++++++++++++++++++-
drivers/common/mlx5/windows/mlx5_win_defs.h | 12 ---------
drivers/compress/mlx5/mlx5_compress.c | 4 +--
drivers/crypto/mlx5/mlx5_crypto_gcm.c | 2 +-
drivers/crypto/mlx5/mlx5_crypto_xts.c | 2 +-
drivers/net/mlx5/hws/mlx5dr_send.c | 2 +-
drivers/net/mlx5/mlx5_flow_aso.c | 6 ++---
drivers/net/mlx5/mlx5_rx.c | 2 +-
drivers/net/mlx5/mlx5_tx.c | 8 +++---
9 files changed, 41 insertions(+), 26 deletions(-)
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 359f02f17c..210158350d 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -268,8 +268,12 @@
/* Maximum number of DS in WQE. Limited by 6-bit field. */
#define MLX5_DSEG_MAX 63
-/* The 32 bit syndrome offset in struct mlx5_err_cqe. */
+/* The 32 bit syndrome offset in struct mlx5_error_cqe. */
+#if (RTE_CACHE_LINE_SIZE == 128)
+#define MLX5_ERROR_CQE_SYNDROME_OFFSET 116
+#else
#define MLX5_ERROR_CQE_SYNDROME_OFFSET 52
+#endif
/* The completion mode offset in the WQE control segment line 2. */
#define MLX5_COMP_MODE_OFFSET 2
@@ -415,6 +419,29 @@ struct mlx5_wqe_mprq {
#define MLX5_MPRQ_STRIDE_SHIFT_BYTE 2
+struct mlx5_error_cqe {
+#if (RTE_CACHE_LINE_SIZE == 128)
+ uint8_t padding[64];
+#endif
+ uint8_t rsvd0[2];
+ uint16_t eth_wqe_id;
+ uint8_t rsvd1[16];
+ uint16_t ib_stride_index;
+ uint8_t rsvd2[10];
+ uint32_t srqn;
+ uint8_t rsvd3[8];
+ uint32_t byte_cnt;
+ uint8_t rsvd4[4];
+ uint8_t hw_err_synd;
+ uint8_t hw_synd_type;
+ uint8_t vendor_err_synd;
+ uint8_t syndrome;
+ uint32_t s_wqe_opcode_qpn;
+ uint16_t wqe_counter;
+ uint8_t signature;
+ uint8_t op_own;
+};
+
/* CQ element structure - should be equal to the cache line size */
struct mlx5_cqe {
#if (RTE_CACHE_LINE_SIZE == 128)
diff --git a/drivers/common/mlx5/windows/mlx5_win_defs.h b/drivers/common/mlx5/windows/mlx5_win_defs.h
index 79e7a7f386..d60df6fd37 100644
--- a/drivers/common/mlx5/windows/mlx5_win_defs.h
+++ b/drivers/common/mlx5/windows/mlx5_win_defs.h
@@ -219,18 +219,6 @@ struct mlx5_action {
} dest_tir;
};
-struct mlx5_err_cqe {
- uint8_t rsvd0[32];
- uint32_t srqn;
- uint8_t rsvd1[18];
- uint8_t vendor_err_synd;
- uint8_t syndrome;
- uint32_t s_wqe_opcode_qpn;
- uint16_t wqe_counter;
- uint8_t signature;
- uint8_t op_own;
-};
-
struct mlx5_wqe_srq_next_seg {
uint8_t rsvd0[2];
rte_be16_t next_wqe_index;
diff --git a/drivers/compress/mlx5/mlx5_compress.c b/drivers/compress/mlx5/mlx5_compress.c
index 5998d060e4..82105bfebd 100644
--- a/drivers/compress/mlx5/mlx5_compress.c
+++ b/drivers/compress/mlx5/mlx5_compress.c
@@ -602,7 +602,7 @@ mlx5_compress_dump_err_objs(volatile uint32_t *cqe, volatile uint32_t *wqe,
size_t i;
DRV_LOG(ERR, "Error cqe:");
- for (i = 0; i < sizeof(struct mlx5_err_cqe) >> 2; i += 4)
+ for (i = 0; i < sizeof(struct mlx5_error_cqe) >> 2; i += 4)
DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
cqe[i + 2], cqe[i + 3]);
DRV_LOG(ERR, "\nError wqe:");
@@ -620,7 +620,7 @@ mlx5_compress_cqe_err_handle(struct mlx5_compress_qp *qp,
struct rte_comp_op *op)
{
const uint32_t idx = qp->ci & (qp->entries_n - 1);
- volatile struct mlx5_err_cqe *cqe = (volatile struct mlx5_err_cqe *)
+ volatile struct mlx5_error_cqe *cqe = (volatile struct mlx5_error_cqe *)
&qp->cq.cqes[idx];
volatile struct mlx5_gga_wqe *wqes = (volatile struct mlx5_gga_wqe *)
qp->qp.wqes;
diff --git a/drivers/crypto/mlx5/mlx5_crypto_gcm.c b/drivers/crypto/mlx5/mlx5_crypto_gcm.c
index f598273873..cd21605bd2 100644
--- a/drivers/crypto/mlx5/mlx5_crypto_gcm.c
+++ b/drivers/crypto/mlx5/mlx5_crypto_gcm.c
@@ -877,7 +877,7 @@ mlx5_crypto_gcm_cqe_err_handle(struct mlx5_crypto_qp *qp, struct rte_crypto_op *
{
uint8_t op_code;
const uint32_t idx = qp->cq_ci & (qp->entries_n - 1);
- volatile struct mlx5_err_cqe *cqe = (volatile struct mlx5_err_cqe *)
+ volatile struct mlx5_error_cqe *cqe = (volatile struct mlx5_error_cqe *)
&qp->cq_obj.cqes[idx];
op_code = rte_be_to_cpu_32(cqe->s_wqe_opcode_qpn) >> MLX5_CQ_INDEX_WIDTH;
diff --git a/drivers/crypto/mlx5/mlx5_crypto_xts.c b/drivers/crypto/mlx5/mlx5_crypto_xts.c
index d4e1dd718c..b9214711ac 100644
--- a/drivers/crypto/mlx5/mlx5_crypto_xts.c
+++ b/drivers/crypto/mlx5/mlx5_crypto_xts.c
@@ -363,7 +363,7 @@ static __rte_noinline void
mlx5_crypto_xts_cqe_err_handle(struct mlx5_crypto_qp *qp, struct rte_crypto_op *op)
{
const uint32_t idx = qp->ci & (qp->entries_n - 1);
- volatile struct mlx5_err_cqe *cqe = (volatile struct mlx5_err_cqe *)
+ volatile struct mlx5_error_cqe *cqe = (volatile struct mlx5_error_cqe *)
&qp->cq_obj.cqes[idx];
op->status = RTE_CRYPTO_OP_STATUS_ERROR;
diff --git a/drivers/net/mlx5/hws/mlx5dr_send.c b/drivers/net/mlx5/hws/mlx5dr_send.c
index e9abf3dddb..e121c7f7ed 100644
--- a/drivers/net/mlx5/hws/mlx5dr_send.c
+++ b/drivers/net/mlx5/hws/mlx5dr_send.c
@@ -599,7 +599,7 @@ static void mlx5dr_send_engine_poll_cq(struct mlx5dr_send_engine *queue,
return;
if (unlikely(cqe_opcode != MLX5_CQE_REQ)) {
- struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe;
+ struct mlx5_error_cqe *err_cqe = (struct mlx5_error_cqe *)cqe;
DR_LOG(ERR, "CQE ERR:0x%x, Vendor_ERR:0x%x, OP:0x%x, QPN:0x%x, WQE_CNT:0x%x",
err_cqe->syndrome, err_cqe->vendor_err_synd, cqe_opcode,
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index a94b228396..feca8c3e89 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -489,7 +489,7 @@ mlx5_aso_dump_err_objs(volatile uint32_t *cqe, volatile uint32_t *wqe)
int i;
DRV_LOG(ERR, "Error cqe:");
- for (i = 0; i < 16; i += 4)
+ for (i = 0; i < (int)sizeof(struct mlx5_error_cqe) / 4; i += 4)
DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
cqe[i + 2], cqe[i + 3]);
DRV_LOG(ERR, "\nError wqe:");
@@ -509,8 +509,8 @@ mlx5_aso_cqe_err_handle(struct mlx5_aso_sq *sq)
{
struct mlx5_aso_cq *cq = &sq->cq;
uint32_t idx = cq->cq_ci & ((1 << cq->log_desc_n) - 1);
- volatile struct mlx5_err_cqe *cqe =
- (volatile struct mlx5_err_cqe *)&cq->cq_obj.cqes[idx];
+ volatile struct mlx5_error_cqe *cqe =
+ (volatile struct mlx5_error_cqe *)&cq->cq_obj.cqes[idx];
cq->errors++;
idx = rte_be_to_cpu_16(cqe->wqe_counter) & (1u << sq->log_desc_n);
diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
index f241809e08..5e58eb8bc9 100644
--- a/drivers/net/mlx5/mlx5_rx.c
+++ b/drivers/net/mlx5/mlx5_rx.c
@@ -459,7 +459,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec,
container_of(rxq, struct mlx5_rxq_ctrl, rxq);
union {
volatile struct mlx5_cqe *cqe;
- volatile struct mlx5_err_cqe *err_cqe;
+ volatile struct mlx5_error_cqe *err_cqe;
} u = {
.cqe = &(*rxq->cqes)[(rxq->cq_ci - vec) & cqe_mask],
};
diff --git a/drivers/net/mlx5/mlx5_tx.c b/drivers/net/mlx5/mlx5_tx.c
index 04f80bb9bd..2f48bbc82e 100644
--- a/drivers/net/mlx5/mlx5_tx.c
+++ b/drivers/net/mlx5/mlx5_tx.c
@@ -55,7 +55,7 @@ tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl)
/* Return 1 if the error CQE is signed otherwise, sign it and return 0. */
static int
-check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe)
+check_err_cqe_seen(volatile struct mlx5_error_cqe *err_cqe)
{
static const uint8_t magic[] = "seen";
int ret = 1;
@@ -83,7 +83,7 @@ check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe)
*/
static int
mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq,
- volatile struct mlx5_err_cqe *err_cqe)
+ volatile struct mlx5_error_cqe *err_cqe)
{
if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) {
const uint16_t wqe_m = ((1 << txq->wqe_n) - 1);
@@ -107,7 +107,7 @@ mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq,
mlx5_dump_debug_information(name, "MLX5 Error CQ:",
(const void *)((uintptr_t)
txq->cqes),
- sizeof(struct mlx5_cqe) *
+ sizeof(struct mlx5_error_cqe) *
(1 << txq->cqe_n));
mlx5_dump_debug_information(name, "MLX5 Error SQ:",
(const void *)((uintptr_t)
@@ -206,7 +206,7 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq,
*/
rte_wmb();
ret = mlx5_tx_error_cqe_handle
- (txq, (volatile struct mlx5_err_cqe *)cqe);
+ (txq, (volatile struct mlx5_error_cqe *)cqe);
if (unlikely(ret < 0)) {
/*
* Some error occurred on queue error
--
2.18.2
^ permalink raw reply [flat|nested] 6+ messages in thread
* RE: [PATCH v2] drivers: fix error CQE handling
2024-10-03 20:26 ` [PATCH v2] " Alexander Kozyrev
@ 2024-10-28 15:58 ` Slava Ovsiienko
2024-10-28 17:17 ` [PATCH v3] drivers: fix error CQE handling for 128 bytes CQE Alexander Kozyrev
1 sibling, 0 replies; 6+ messages in thread
From: Slava Ovsiienko @ 2024-10-28 15:58 UTC (permalink / raw)
To: Alexander Kozyrev, dev; +Cc: stable, Raslan Darawsheh, Matan Azrad, stephen
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
> -----Original Message-----
> From: Alexander Kozyrev <akozyrev@nvidia.com>
> Sent: Thursday, October 3, 2024 11:26 PM
> To: dev@dpdk.org
> Cc: stable@dpdk.org; Raslan Darawsheh <rasland@nvidia.com>; Slava
> Ovsiienko <viacheslavo@nvidia.com>; Matan Azrad <matan@nvidia.com>;
> stephen@networkplumber.org
> Subject: [PATCH v2] drivers: fix error CQE handling
>
> The completion queue element size can be independently configured to
> report either 64 or 128 bytes CQEs by programming cqe_sz parameter at CQ
> creation. This parameter depends on the cache line size and affects both
> regular CQEs and error CQEs. But the error handling assumes that an error
> CQE is 64 bytes and doesn't take the padding into consideration on platforms
> with 128-byte cache lines.
> Fix the error CQE size in all error handling routines in mlx5.
>
> Fixes: 957e45fb7b ("net/mlx5: handle Tx completion with error")
> Cc: stable@dpdk.org
>
> Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
> ---
> drivers/common/mlx5/mlx5_prm.h | 29 ++++++++++++++++++++-
> drivers/common/mlx5/windows/mlx5_win_defs.h | 12 ---------
> drivers/compress/mlx5/mlx5_compress.c | 4 +--
> drivers/crypto/mlx5/mlx5_crypto_gcm.c | 2 +-
> drivers/crypto/mlx5/mlx5_crypto_xts.c | 2 +-
> drivers/net/mlx5/hws/mlx5dr_send.c | 2 +-
> drivers/net/mlx5/mlx5_flow_aso.c | 6 ++---
> drivers/net/mlx5/mlx5_rx.c | 2 +-
> drivers/net/mlx5/mlx5_tx.c | 8 +++---
> 9 files changed, 41 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/common/mlx5/mlx5_prm.h
> b/drivers/common/mlx5/mlx5_prm.h index 359f02f17c..210158350d 100644
> --- a/drivers/common/mlx5/mlx5_prm.h
> +++ b/drivers/common/mlx5/mlx5_prm.h
> @@ -268,8 +268,12 @@
> /* Maximum number of DS in WQE. Limited by 6-bit field. */ #define
> MLX5_DSEG_MAX 63
>
> -/* The 32 bit syndrome offset in struct mlx5_err_cqe. */
> +/* The 32 bit syndrome offset in struct mlx5_error_cqe. */ #if
> +(RTE_CACHE_LINE_SIZE == 128) #define
> MLX5_ERROR_CQE_SYNDROME_OFFSET 116
> +#else
> #define MLX5_ERROR_CQE_SYNDROME_OFFSET 52
> +#endif
>
> /* The completion mode offset in the WQE control segment line 2. */
> #define MLX5_COMP_MODE_OFFSET 2 @@ -415,6 +419,29 @@ struct
> mlx5_wqe_mprq {
>
> #define MLX5_MPRQ_STRIDE_SHIFT_BYTE 2
>
> +struct mlx5_error_cqe {
> +#if (RTE_CACHE_LINE_SIZE == 128)
> + uint8_t padding[64];
> +#endif
> + uint8_t rsvd0[2];
> + uint16_t eth_wqe_id;
> + uint8_t rsvd1[16];
> + uint16_t ib_stride_index;
> + uint8_t rsvd2[10];
> + uint32_t srqn;
> + uint8_t rsvd3[8];
> + uint32_t byte_cnt;
> + uint8_t rsvd4[4];
> + uint8_t hw_err_synd;
> + uint8_t hw_synd_type;
> + uint8_t vendor_err_synd;
> + uint8_t syndrome;
> + uint32_t s_wqe_opcode_qpn;
> + uint16_t wqe_counter;
> + uint8_t signature;
> + uint8_t op_own;
> +};
> +
> /* CQ element structure - should be equal to the cache line size */ struct
> mlx5_cqe { #if (RTE_CACHE_LINE_SIZE == 128) diff --git
> a/drivers/common/mlx5/windows/mlx5_win_defs.h
> b/drivers/common/mlx5/windows/mlx5_win_defs.h
> index 79e7a7f386..d60df6fd37 100644
> --- a/drivers/common/mlx5/windows/mlx5_win_defs.h
> +++ b/drivers/common/mlx5/windows/mlx5_win_defs.h
> @@ -219,18 +219,6 @@ struct mlx5_action {
> } dest_tir;
> };
>
> -struct mlx5_err_cqe {
> - uint8_t rsvd0[32];
> - uint32_t srqn;
> - uint8_t rsvd1[18];
> - uint8_t vendor_err_synd;
> - uint8_t syndrome;
> - uint32_t s_wqe_opcode_qpn;
> - uint16_t wqe_counter;
> - uint8_t signature;
> - uint8_t op_own;
> -};
> -
> struct mlx5_wqe_srq_next_seg {
> uint8_t rsvd0[2];
> rte_be16_t next_wqe_index;
> diff --git a/drivers/compress/mlx5/mlx5_compress.c
> b/drivers/compress/mlx5/mlx5_compress.c
> index 5998d060e4..82105bfebd 100644
> --- a/drivers/compress/mlx5/mlx5_compress.c
> +++ b/drivers/compress/mlx5/mlx5_compress.c
> @@ -602,7 +602,7 @@ mlx5_compress_dump_err_objs(volatile uint32_t
> *cqe, volatile uint32_t *wqe,
> size_t i;
>
> DRV_LOG(ERR, "Error cqe:");
> - for (i = 0; i < sizeof(struct mlx5_err_cqe) >> 2; i += 4)
> + for (i = 0; i < sizeof(struct mlx5_error_cqe) >> 2; i += 4)
> DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
> cqe[i + 2], cqe[i + 3]);
> DRV_LOG(ERR, "\nError wqe:");
> @@ -620,7 +620,7 @@ mlx5_compress_cqe_err_handle(struct
> mlx5_compress_qp *qp,
> struct rte_comp_op *op)
> {
> const uint32_t idx = qp->ci & (qp->entries_n - 1);
> - volatile struct mlx5_err_cqe *cqe = (volatile struct mlx5_err_cqe *)
> + volatile struct mlx5_error_cqe *cqe = (volatile struct mlx5_error_cqe
> +*)
> &qp-
> >cq.cqes[idx];
> volatile struct mlx5_gga_wqe *wqes = (volatile struct mlx5_gga_wqe
> *)
> qp-
> >qp.wqes;
> diff --git a/drivers/crypto/mlx5/mlx5_crypto_gcm.c
> b/drivers/crypto/mlx5/mlx5_crypto_gcm.c
> index f598273873..cd21605bd2 100644
> --- a/drivers/crypto/mlx5/mlx5_crypto_gcm.c
> +++ b/drivers/crypto/mlx5/mlx5_crypto_gcm.c
> @@ -877,7 +877,7 @@ mlx5_crypto_gcm_cqe_err_handle(struct
> mlx5_crypto_qp *qp, struct rte_crypto_op * {
> uint8_t op_code;
> const uint32_t idx = qp->cq_ci & (qp->entries_n - 1);
> - volatile struct mlx5_err_cqe *cqe = (volatile struct mlx5_err_cqe *)
> + volatile struct mlx5_error_cqe *cqe = (volatile struct mlx5_error_cqe
> +*)
> &qp-
> >cq_obj.cqes[idx];
>
> op_code = rte_be_to_cpu_32(cqe->s_wqe_opcode_qpn) >>
> MLX5_CQ_INDEX_WIDTH; diff --git a/drivers/crypto/mlx5/mlx5_crypto_xts.c
> b/drivers/crypto/mlx5/mlx5_crypto_xts.c
> index d4e1dd718c..b9214711ac 100644
> --- a/drivers/crypto/mlx5/mlx5_crypto_xts.c
> +++ b/drivers/crypto/mlx5/mlx5_crypto_xts.c
> @@ -363,7 +363,7 @@ static __rte_noinline void
> mlx5_crypto_xts_cqe_err_handle(struct mlx5_crypto_qp *qp, struct
> rte_crypto_op *op) {
> const uint32_t idx = qp->ci & (qp->entries_n - 1);
> - volatile struct mlx5_err_cqe *cqe = (volatile struct mlx5_err_cqe *)
> + volatile struct mlx5_error_cqe *cqe = (volatile struct mlx5_error_cqe
> +*)
> &qp-
> >cq_obj.cqes[idx];
>
> op->status = RTE_CRYPTO_OP_STATUS_ERROR; diff --git
> a/drivers/net/mlx5/hws/mlx5dr_send.c
> b/drivers/net/mlx5/hws/mlx5dr_send.c
> index e9abf3dddb..e121c7f7ed 100644
> --- a/drivers/net/mlx5/hws/mlx5dr_send.c
> +++ b/drivers/net/mlx5/hws/mlx5dr_send.c
> @@ -599,7 +599,7 @@ static void mlx5dr_send_engine_poll_cq(struct
> mlx5dr_send_engine *queue,
> return;
>
> if (unlikely(cqe_opcode != MLX5_CQE_REQ)) {
> - struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe;
> + struct mlx5_error_cqe *err_cqe = (struct mlx5_error_cqe
> *)cqe;
>
> DR_LOG(ERR, "CQE ERR:0x%x, Vendor_ERR:0x%x, OP:0x%x,
> QPN:0x%x, WQE_CNT:0x%x",
> err_cqe->syndrome, err_cqe->vendor_err_synd,
> cqe_opcode, diff --git a/drivers/net/mlx5/mlx5_flow_aso.c
> b/drivers/net/mlx5/mlx5_flow_aso.c
> index a94b228396..feca8c3e89 100644
> --- a/drivers/net/mlx5/mlx5_flow_aso.c
> +++ b/drivers/net/mlx5/mlx5_flow_aso.c
> @@ -489,7 +489,7 @@ mlx5_aso_dump_err_objs(volatile uint32_t *cqe,
> volatile uint32_t *wqe)
> int i;
>
> DRV_LOG(ERR, "Error cqe:");
> - for (i = 0; i < 16; i += 4)
> + for (i = 0; i < (int)sizeof(struct mlx5_error_cqe) / 4; i += 4)
> DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
> cqe[i + 2], cqe[i + 3]);
> DRV_LOG(ERR, "\nError wqe:");
> @@ -509,8 +509,8 @@ mlx5_aso_cqe_err_handle(struct mlx5_aso_sq *sq) {
> struct mlx5_aso_cq *cq = &sq->cq;
> uint32_t idx = cq->cq_ci & ((1 << cq->log_desc_n) - 1);
> - volatile struct mlx5_err_cqe *cqe =
> - (volatile struct mlx5_err_cqe *)&cq-
> >cq_obj.cqes[idx];
> + volatile struct mlx5_error_cqe *cqe =
> + (volatile struct mlx5_error_cqe *)&cq-
> >cq_obj.cqes[idx];
>
> cq->errors++;
> idx = rte_be_to_cpu_16(cqe->wqe_counter) & (1u << sq-
> >log_desc_n); diff --git a/drivers/net/mlx5/mlx5_rx.c
> b/drivers/net/mlx5/mlx5_rx.c index f241809e08..5e58eb8bc9 100644
> --- a/drivers/net/mlx5/mlx5_rx.c
> +++ b/drivers/net/mlx5/mlx5_rx.c
> @@ -459,7 +459,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq,
> uint8_t vec,
> container_of(rxq, struct mlx5_rxq_ctrl, rxq);
> union {
> volatile struct mlx5_cqe *cqe;
> - volatile struct mlx5_err_cqe *err_cqe;
> + volatile struct mlx5_error_cqe *err_cqe;
> } u = {
> .cqe = &(*rxq->cqes)[(rxq->cq_ci - vec) & cqe_mask],
> };
> diff --git a/drivers/net/mlx5/mlx5_tx.c b/drivers/net/mlx5/mlx5_tx.c index
> 04f80bb9bd..2f48bbc82e 100644
> --- a/drivers/net/mlx5/mlx5_tx.c
> +++ b/drivers/net/mlx5/mlx5_tx.c
> @@ -55,7 +55,7 @@ tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl)
>
> /* Return 1 if the error CQE is signed otherwise, sign it and return 0. */ static
> int -check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe)
> +check_err_cqe_seen(volatile struct mlx5_error_cqe *err_cqe)
> {
> static const uint8_t magic[] = "seen";
> int ret = 1;
> @@ -83,7 +83,7 @@ check_err_cqe_seen(volatile struct mlx5_err_cqe
> *err_cqe)
> */
> static int
> mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq,
> - volatile struct mlx5_err_cqe *err_cqe)
> + volatile struct mlx5_error_cqe *err_cqe)
> {
> if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR)
> {
> const uint16_t wqe_m = ((1 << txq->wqe_n) - 1); @@ -107,7
> +107,7 @@ mlx5_tx_error_cqe_handle(struct mlx5_txq_data
> *__rte_restrict txq,
> mlx5_dump_debug_information(name, "MLX5 Error
> CQ:",
> (const void *)((uintptr_t)
> txq->cqes),
> - sizeof(struct mlx5_cqe) *
> + sizeof(struct
> mlx5_error_cqe) *
> (1 << txq->cqe_n));
> mlx5_dump_debug_information(name, "MLX5 Error
> SQ:",
> (const void *)((uintptr_t)
> @@ -206,7 +206,7 @@ mlx5_tx_handle_completion(struct mlx5_txq_data
> *__rte_restrict txq,
> */
> rte_wmb();
> ret = mlx5_tx_error_cqe_handle
> - (txq, (volatile struct mlx5_err_cqe *)cqe);
> + (txq, (volatile struct mlx5_error_cqe *)cqe);
> if (unlikely(ret < 0)) {
> /*
> * Some error occurred on queue error
> --
> 2.18.2
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH v3] drivers: fix error CQE handling for 128 bytes CQE
2024-10-03 20:26 ` [PATCH v2] " Alexander Kozyrev
2024-10-28 15:58 ` Slava Ovsiienko
@ 2024-10-28 17:17 ` Alexander Kozyrev
2024-11-13 13:34 ` Raslan Darawsheh
1 sibling, 1 reply; 6+ messages in thread
From: Alexander Kozyrev @ 2024-10-28 17:17 UTC (permalink / raw)
To: dev; +Cc: stable, rasland, viacheslavo, matan, dsosnowski, bingz, suanmingm
The completion queue element size can be independently configured
to report either 64 or 128 bytes CQEs by programming cqe_sz parameter
at CQ creation. This parameter depends on the cache line size and
affects both regular CQEs and error CQEs. But the error handling
assumes that an error CQE is 64 bytes and doesn't take the padding
into consideration on platforms with 128-byte cache lines.
Fix the error CQE size in all error handling routines in mlx5.
Fixes: 957e45fb7b ("net/mlx5: handle Tx completion with error")
Cc: stable@dpdk.org
Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
drivers/common/mlx5/mlx5_prm.h | 29 ++++++++++++++++++++-
drivers/common/mlx5/windows/mlx5_win_defs.h | 12 ---------
drivers/compress/mlx5/mlx5_compress.c | 4 +--
drivers/crypto/mlx5/mlx5_crypto_gcm.c | 2 +-
drivers/crypto/mlx5/mlx5_crypto_xts.c | 2 +-
drivers/net/mlx5/hws/mlx5dr_send.c | 2 +-
drivers/net/mlx5/mlx5_flow_aso.c | 6 ++---
drivers/net/mlx5/mlx5_rx.c | 2 +-
drivers/net/mlx5/mlx5_tx.c | 8 +++---
9 files changed, 41 insertions(+), 26 deletions(-)
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 359f02f17c..210158350d 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -268,8 +268,12 @@
/* Maximum number of DS in WQE. Limited by 6-bit field. */
#define MLX5_DSEG_MAX 63
-/* The 32 bit syndrome offset in struct mlx5_err_cqe. */
+/* The 32 bit syndrome offset in struct mlx5_error_cqe. */
+#if (RTE_CACHE_LINE_SIZE == 128)
+#define MLX5_ERROR_CQE_SYNDROME_OFFSET 116
+#else
#define MLX5_ERROR_CQE_SYNDROME_OFFSET 52
+#endif
/* The completion mode offset in the WQE control segment line 2. */
#define MLX5_COMP_MODE_OFFSET 2
@@ -415,6 +419,29 @@ struct mlx5_wqe_mprq {
#define MLX5_MPRQ_STRIDE_SHIFT_BYTE 2
+struct mlx5_error_cqe {
+#if (RTE_CACHE_LINE_SIZE == 128)
+ uint8_t padding[64];
+#endif
+ uint8_t rsvd0[2];
+ uint16_t eth_wqe_id;
+ uint8_t rsvd1[16];
+ uint16_t ib_stride_index;
+ uint8_t rsvd2[10];
+ uint32_t srqn;
+ uint8_t rsvd3[8];
+ uint32_t byte_cnt;
+ uint8_t rsvd4[4];
+ uint8_t hw_err_synd;
+ uint8_t hw_synd_type;
+ uint8_t vendor_err_synd;
+ uint8_t syndrome;
+ uint32_t s_wqe_opcode_qpn;
+ uint16_t wqe_counter;
+ uint8_t signature;
+ uint8_t op_own;
+};
+
/* CQ element structure - should be equal to the cache line size */
struct mlx5_cqe {
#if (RTE_CACHE_LINE_SIZE == 128)
diff --git a/drivers/common/mlx5/windows/mlx5_win_defs.h b/drivers/common/mlx5/windows/mlx5_win_defs.h
index 79e7a7f386..d60df6fd37 100644
--- a/drivers/common/mlx5/windows/mlx5_win_defs.h
+++ b/drivers/common/mlx5/windows/mlx5_win_defs.h
@@ -219,18 +219,6 @@ struct mlx5_action {
} dest_tir;
};
-struct mlx5_err_cqe {
- uint8_t rsvd0[32];
- uint32_t srqn;
- uint8_t rsvd1[18];
- uint8_t vendor_err_synd;
- uint8_t syndrome;
- uint32_t s_wqe_opcode_qpn;
- uint16_t wqe_counter;
- uint8_t signature;
- uint8_t op_own;
-};
-
struct mlx5_wqe_srq_next_seg {
uint8_t rsvd0[2];
rte_be16_t next_wqe_index;
diff --git a/drivers/compress/mlx5/mlx5_compress.c b/drivers/compress/mlx5/mlx5_compress.c
index 5998d060e4..82105bfebd 100644
--- a/drivers/compress/mlx5/mlx5_compress.c
+++ b/drivers/compress/mlx5/mlx5_compress.c
@@ -602,7 +602,7 @@ mlx5_compress_dump_err_objs(volatile uint32_t *cqe, volatile uint32_t *wqe,
size_t i;
DRV_LOG(ERR, "Error cqe:");
- for (i = 0; i < sizeof(struct mlx5_err_cqe) >> 2; i += 4)
+ for (i = 0; i < sizeof(struct mlx5_error_cqe) >> 2; i += 4)
DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
cqe[i + 2], cqe[i + 3]);
DRV_LOG(ERR, "\nError wqe:");
@@ -620,7 +620,7 @@ mlx5_compress_cqe_err_handle(struct mlx5_compress_qp *qp,
struct rte_comp_op *op)
{
const uint32_t idx = qp->ci & (qp->entries_n - 1);
- volatile struct mlx5_err_cqe *cqe = (volatile struct mlx5_err_cqe *)
+ volatile struct mlx5_error_cqe *cqe = (volatile struct mlx5_error_cqe *)
&qp->cq.cqes[idx];
volatile struct mlx5_gga_wqe *wqes = (volatile struct mlx5_gga_wqe *)
qp->qp.wqes;
diff --git a/drivers/crypto/mlx5/mlx5_crypto_gcm.c b/drivers/crypto/mlx5/mlx5_crypto_gcm.c
index f598273873..cd21605bd2 100644
--- a/drivers/crypto/mlx5/mlx5_crypto_gcm.c
+++ b/drivers/crypto/mlx5/mlx5_crypto_gcm.c
@@ -877,7 +877,7 @@ mlx5_crypto_gcm_cqe_err_handle(struct mlx5_crypto_qp *qp, struct rte_crypto_op *
{
uint8_t op_code;
const uint32_t idx = qp->cq_ci & (qp->entries_n - 1);
- volatile struct mlx5_err_cqe *cqe = (volatile struct mlx5_err_cqe *)
+ volatile struct mlx5_error_cqe *cqe = (volatile struct mlx5_error_cqe *)
&qp->cq_obj.cqes[idx];
op_code = rte_be_to_cpu_32(cqe->s_wqe_opcode_qpn) >> MLX5_CQ_INDEX_WIDTH;
diff --git a/drivers/crypto/mlx5/mlx5_crypto_xts.c b/drivers/crypto/mlx5/mlx5_crypto_xts.c
index d4e1dd718c..b9214711ac 100644
--- a/drivers/crypto/mlx5/mlx5_crypto_xts.c
+++ b/drivers/crypto/mlx5/mlx5_crypto_xts.c
@@ -363,7 +363,7 @@ static __rte_noinline void
mlx5_crypto_xts_cqe_err_handle(struct mlx5_crypto_qp *qp, struct rte_crypto_op *op)
{
const uint32_t idx = qp->ci & (qp->entries_n - 1);
- volatile struct mlx5_err_cqe *cqe = (volatile struct mlx5_err_cqe *)
+ volatile struct mlx5_error_cqe *cqe = (volatile struct mlx5_error_cqe *)
&qp->cq_obj.cqes[idx];
op->status = RTE_CRYPTO_OP_STATUS_ERROR;
diff --git a/drivers/net/mlx5/hws/mlx5dr_send.c b/drivers/net/mlx5/hws/mlx5dr_send.c
index e9abf3dddb..e121c7f7ed 100644
--- a/drivers/net/mlx5/hws/mlx5dr_send.c
+++ b/drivers/net/mlx5/hws/mlx5dr_send.c
@@ -599,7 +599,7 @@ static void mlx5dr_send_engine_poll_cq(struct mlx5dr_send_engine *queue,
return;
if (unlikely(cqe_opcode != MLX5_CQE_REQ)) {
- struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe;
+ struct mlx5_error_cqe *err_cqe = (struct mlx5_error_cqe *)cqe;
DR_LOG(ERR, "CQE ERR:0x%x, Vendor_ERR:0x%x, OP:0x%x, QPN:0x%x, WQE_CNT:0x%x",
err_cqe->syndrome, err_cqe->vendor_err_synd, cqe_opcode,
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index a94b228396..feca8c3e89 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -489,7 +489,7 @@ mlx5_aso_dump_err_objs(volatile uint32_t *cqe, volatile uint32_t *wqe)
int i;
DRV_LOG(ERR, "Error cqe:");
- for (i = 0; i < 16; i += 4)
+ for (i = 0; i < (int)sizeof(struct mlx5_error_cqe) / 4; i += 4)
DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
cqe[i + 2], cqe[i + 3]);
DRV_LOG(ERR, "\nError wqe:");
@@ -509,8 +509,8 @@ mlx5_aso_cqe_err_handle(struct mlx5_aso_sq *sq)
{
struct mlx5_aso_cq *cq = &sq->cq;
uint32_t idx = cq->cq_ci & ((1 << cq->log_desc_n) - 1);
- volatile struct mlx5_err_cqe *cqe =
- (volatile struct mlx5_err_cqe *)&cq->cq_obj.cqes[idx];
+ volatile struct mlx5_error_cqe *cqe =
+ (volatile struct mlx5_error_cqe *)&cq->cq_obj.cqes[idx];
cq->errors++;
idx = rte_be_to_cpu_16(cqe->wqe_counter) & (1u << sq->log_desc_n);
diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
index f241809e08..5e58eb8bc9 100644
--- a/drivers/net/mlx5/mlx5_rx.c
+++ b/drivers/net/mlx5/mlx5_rx.c
@@ -459,7 +459,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec,
container_of(rxq, struct mlx5_rxq_ctrl, rxq);
union {
volatile struct mlx5_cqe *cqe;
- volatile struct mlx5_err_cqe *err_cqe;
+ volatile struct mlx5_error_cqe *err_cqe;
} u = {
.cqe = &(*rxq->cqes)[(rxq->cq_ci - vec) & cqe_mask],
};
diff --git a/drivers/net/mlx5/mlx5_tx.c b/drivers/net/mlx5/mlx5_tx.c
index 04f80bb9bd..2f48bbc82e 100644
--- a/drivers/net/mlx5/mlx5_tx.c
+++ b/drivers/net/mlx5/mlx5_tx.c
@@ -55,7 +55,7 @@ tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl)
/* Return 1 if the error CQE is signed otherwise, sign it and return 0. */
static int
-check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe)
+check_err_cqe_seen(volatile struct mlx5_error_cqe *err_cqe)
{
static const uint8_t magic[] = "seen";
int ret = 1;
@@ -83,7 +83,7 @@ check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe)
*/
static int
mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq,
- volatile struct mlx5_err_cqe *err_cqe)
+ volatile struct mlx5_error_cqe *err_cqe)
{
if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) {
const uint16_t wqe_m = ((1 << txq->wqe_n) - 1);
@@ -107,7 +107,7 @@ mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq,
mlx5_dump_debug_information(name, "MLX5 Error CQ:",
(const void *)((uintptr_t)
txq->cqes),
- sizeof(struct mlx5_cqe) *
+ sizeof(struct mlx5_error_cqe) *
(1 << txq->cqe_n));
mlx5_dump_debug_information(name, "MLX5 Error SQ:",
(const void *)((uintptr_t)
@@ -206,7 +206,7 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq,
*/
rte_wmb();
ret = mlx5_tx_error_cqe_handle
- (txq, (volatile struct mlx5_err_cqe *)cqe);
+ (txq, (volatile struct mlx5_error_cqe *)cqe);
if (unlikely(ret < 0)) {
/*
* Some error occurred on queue error
--
2.43.5
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v3] drivers: fix error CQE handling for 128 bytes CQE
2024-10-28 17:17 ` [PATCH v3] drivers: fix error CQE handling for 128 bytes CQE Alexander Kozyrev
@ 2024-11-13 13:34 ` Raslan Darawsheh
0 siblings, 0 replies; 6+ messages in thread
From: Raslan Darawsheh @ 2024-11-13 13:34 UTC (permalink / raw)
To: Alexander Kozyrev, dev
Cc: stable, Slava Ovsiienko, Matan Azrad, Dariusz Sosnowski,
Bing Zhao, Suanming Mou
Hi,
From: Alexander Kozyrev <akozyrev@nvidia.com>
Sent: Monday, October 28, 2024 7:17 PM
To: dev@dpdk.org
Cc: stable@dpdk.org; Raslan Darawsheh; Slava Ovsiienko; Matan Azrad; Dariusz Sosnowski; Bing Zhao; Suanming Mou
Subject: [PATCH v3] drivers: fix error CQE handling for 128 bytes CQE
The completion queue element size can be independently configured
to report either 64 or 128 bytes CQEs by programming cqe_sz parameter
at CQ creation. This parameter depends on the cache line size and
affects both regular CQEs and error CQEs. But the error handling
assumes that an error CQE is 64 bytes and doesn't take the padding
into consideration on platforms with 128-byte cache lines.
Fix the error CQE size in all error handling routines in mlx5.
Fixes: 957e45fb7b ("net/mlx5: handle Tx completion with error")
Cc: stable@dpdk.org
Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Patch applied to next-net-mlx,
Kindest regards,
Raslan Darawsheh
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2024-11-13 13:34 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-02 18:49 [PATCH] drivers: fix error CQE handling Alexander Kozyrev
2024-10-03 19:14 ` Stephen Hemminger
2024-10-03 20:26 ` [PATCH v2] " Alexander Kozyrev
2024-10-28 15:58 ` Slava Ovsiienko
2024-10-28 17:17 ` [PATCH v3] drivers: fix error CQE handling for 128 bytes CQE Alexander Kozyrev
2024-11-13 13:34 ` Raslan Darawsheh
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).