From: Kevin Traynor <ktraynor@redhat.com>
To: Alexander Kozyrev <akozyrev@nvidia.com>, stable@dpdk.org
Cc: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Subject: Re: [PATCH 21.11] common/mlx5: fix error CQE handling for 128 bytes CQE
Date: Tue, 3 Dec 2024 17:43:30 +0000 [thread overview]
Message-ID: <4c30bab5-9f6f-4b68-a569-ab3e34cd27ad@redhat.com> (raw)
In-Reply-To: <20241127174839.1770671-1-akozyrev@nvidia.com>
On 27/11/2024 18:48, Alexander Kozyrev wrote:
> [ upstream commit 3cddeba0ca38b00c7dc646277484d08a4cb2d862 ]
>
> The completion queue element size can be independently configured
> to report either 64 or 128 bytes CQEs by programming cqe_sz parameter
> at CQ creation. This parameter depends on the cache line size and
> affects both regular CQEs and error CQEs. But the error handling
> assumes that an error CQE is 64 bytes and doesn't take the padding
> into consideration on platforms with 128-byte cache lines.
> Fix the error CQE size in all error handling routines in mlx5.
>
Hi Alexander,
This is causing a build error [1] on windows in the crypto code [2].
ref
https://dpdkdashboard.iol.unh.edu/results/dashboard/testruns/logs/1475543/
I didn't spend time to analyse. If you have a fix I can apply, or else
we can drop the patch.
thanks,
Kevin.
[1]
../drivers/crypto/mlx5/mlx5_crypto.c
../drivers/crypto/mlx5/mlx5_crypto.c:480:52: error: incomplete
definition of type 'struct mlx5_err_cqe'
DRV_LOG(ERR, "CQE ERR:%x.\n", rte_be_to_cpu_32(cqe->syndrome));
~~~^
[2]
mlx5_crypto_cqe_err_handle(struct mlx5_crypto_qp *qp, struct
rte_crypto_op *op)
{
const uint32_t idx = qp->ci & (qp->entries_n - 1);
volatile struct mlx5_err_cqe *cqe = (volatile struct mlx5_err_cqe *)
&qp->cq_obj.cqes[idx];
op->status = RTE_CRYPTO_OP_STATUS_ERROR;
qp->stats.dequeue_err_count++;
DRV_LOG(ERR, "CQE ERR:%x.\n", rte_be_to_cpu_32(cqe->syndrome));
}
> Fixes: 957e45fb7bcb ("net/mlx5: handle Tx completion with error")
>
> Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
> Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
> ---
> drivers/common/mlx5/mlx5_prm.h | 29 ++++++++++++++++++++-
> drivers/common/mlx5/windows/mlx5_win_defs.h | 12 ---------
> drivers/compress/mlx5/mlx5_compress.c | 4 +--
> drivers/net/mlx5/mlx5_flow_aso.c | 6 ++---
> drivers/net/mlx5/mlx5_rx.c | 2 +-
> drivers/net/mlx5/mlx5_tx.c | 8 +++---
> 6 files changed, 38 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
> index 58aa72df64..8ce1e66888 100644
> --- a/drivers/common/mlx5/mlx5_prm.h
> +++ b/drivers/common/mlx5/mlx5_prm.h
> @@ -249,8 +249,12 @@
> /* Maximum number of DS in WQE. Limited by 6-bit field. */
> #define MLX5_DSEG_MAX 63
>
> -/* The 32 bit syndrome offset in struct mlx5_err_cqe. */
> +/* The 32 bit syndrome offset in struct mlx5_error_cqe. */
> +#if (RTE_CACHE_LINE_SIZE == 128)
> +#define MLX5_ERROR_CQE_SYNDROME_OFFSET 116
> +#else
> #define MLX5_ERROR_CQE_SYNDROME_OFFSET 52
> +#endif
>
> /* The completion mode offset in the WQE control segment line 2. */
> #define MLX5_COMP_MODE_OFFSET 2
> @@ -379,6 +383,29 @@ struct mlx5_wqe_mprq {
>
> #define MLX5_MPRQ_STRIDE_SHIFT_BYTE 2
>
> +struct mlx5_error_cqe {
> +#if (RTE_CACHE_LINE_SIZE == 128)
> + uint8_t padding[64];
> +#endif
> + uint8_t rsvd0[2];
> + uint16_t eth_wqe_id;
> + uint8_t rsvd1[16];
> + uint16_t ib_stride_index;
> + uint8_t rsvd2[10];
> + uint32_t srqn;
> + uint8_t rsvd3[8];
> + uint32_t byte_cnt;
> + uint8_t rsvd4[4];
> + uint8_t hw_err_synd;
> + uint8_t hw_synd_type;
> + uint8_t vendor_err_synd;
> + uint8_t syndrome;
> + uint32_t s_wqe_opcode_qpn;
> + uint16_t wqe_counter;
> + uint8_t signature;
> + uint8_t op_own;
> +};
> +
> /* CQ element structure - should be equal to the cache line size */
> struct mlx5_cqe {
> #if (RTE_CACHE_LINE_SIZE == 128)
> diff --git a/drivers/common/mlx5/windows/mlx5_win_defs.h b/drivers/common/mlx5/windows/mlx5_win_defs.h
> index 0c09325444..d79728bcda 100644
> --- a/drivers/common/mlx5/windows/mlx5_win_defs.h
> +++ b/drivers/common/mlx5/windows/mlx5_win_defs.h
> @@ -223,18 +223,6 @@ struct mlx5_action {
> } dest_tir;
> };
>
> -struct mlx5_err_cqe {
> - uint8_t rsvd0[32];
> - uint32_t srqn;
> - uint8_t rsvd1[18];
> - uint8_t vendor_err_synd;
> - uint8_t syndrome;
> - uint32_t s_wqe_opcode_qpn;
> - uint16_t wqe_counter;
> - uint8_t signature;
> - uint8_t op_own;
> -};
> -
> struct mlx5_wqe_srq_next_seg {
> uint8_t rsvd0[2];
> rte_be16_t next_wqe_index;
> diff --git a/drivers/compress/mlx5/mlx5_compress.c b/drivers/compress/mlx5/mlx5_compress.c
> index 515b5dfa67..e1ca77795c 100644
> --- a/drivers/compress/mlx5/mlx5_compress.c
> +++ b/drivers/compress/mlx5/mlx5_compress.c
> @@ -537,7 +537,7 @@ mlx5_compress_dump_err_objs(volatile uint32_t *cqe, volatile uint32_t *wqe,
> size_t i;
>
> DRV_LOG(ERR, "Error cqe:");
> - for (i = 0; i < sizeof(struct mlx5_err_cqe) >> 2; i += 4)
> + for (i = 0; i < sizeof(struct mlx5_error_cqe) >> 2; i += 4)
> DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
> cqe[i + 2], cqe[i + 3]);
> DRV_LOG(ERR, "\nError wqe:");
> @@ -555,7 +555,7 @@ mlx5_compress_cqe_err_handle(struct mlx5_compress_qp *qp,
> struct rte_comp_op *op)
> {
> const uint32_t idx = qp->ci & (qp->entries_n - 1);
> - volatile struct mlx5_err_cqe *cqe = (volatile struct mlx5_err_cqe *)
> + volatile struct mlx5_error_cqe *cqe = (volatile struct mlx5_error_cqe *)
> &qp->cq.cqes[idx];
> volatile struct mlx5_gga_wqe *wqes = (volatile struct mlx5_gga_wqe *)
> qp->qp.wqes;
> diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
> index eb7fc43da3..0d01ca1310 100644
> --- a/drivers/net/mlx5/mlx5_flow_aso.c
> +++ b/drivers/net/mlx5/mlx5_flow_aso.c
> @@ -406,7 +406,7 @@ mlx5_aso_dump_err_objs(volatile uint32_t *cqe, volatile uint32_t *wqe)
> int i;
>
> DRV_LOG(ERR, "Error cqe:");
> - for (i = 0; i < 16; i += 4)
> + for (i = 0; i < (int)sizeof(struct mlx5_error_cqe) / 4; i += 4)
> DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
> cqe[i + 2], cqe[i + 3]);
> DRV_LOG(ERR, "\nError wqe:");
> @@ -426,8 +426,8 @@ mlx5_aso_cqe_err_handle(struct mlx5_aso_sq *sq)
> {
> struct mlx5_aso_cq *cq = &sq->cq;
> uint32_t idx = cq->cq_ci & ((1 << cq->log_desc_n) - 1);
> - volatile struct mlx5_err_cqe *cqe =
> - (volatile struct mlx5_err_cqe *)&cq->cq_obj.cqes[idx];
> + volatile struct mlx5_error_cqe *cqe =
> + (volatile struct mlx5_error_cqe *)&cq->cq_obj.cqes[idx];
>
> cq->errors++;
> idx = rte_be_to_cpu_16(cqe->wqe_counter) & (1u << sq->log_desc_n);
> diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
> index eea6a5c6c7..78d3949276 100644
> --- a/drivers/net/mlx5/mlx5_rx.c
> +++ b/drivers/net/mlx5/mlx5_rx.c
> @@ -433,7 +433,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec,
> container_of(rxq, struct mlx5_rxq_ctrl, rxq);
> union {
> volatile struct mlx5_cqe *cqe;
> - volatile struct mlx5_err_cqe *err_cqe;
> + volatile struct mlx5_error_cqe *err_cqe;
> } u = {
> .cqe = &(*rxq->cqes)[(rxq->cq_ci - vec) & cqe_mask],
> };
> diff --git a/drivers/net/mlx5/mlx5_tx.c b/drivers/net/mlx5/mlx5_tx.c
> index 8f03743986..edb1e0265f 100644
> --- a/drivers/net/mlx5/mlx5_tx.c
> +++ b/drivers/net/mlx5/mlx5_tx.c
> @@ -55,7 +55,7 @@ tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl)
>
> /* Return 1 if the error CQE is signed otherwise, sign it and return 0. */
> static int
> -check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe)
> +check_err_cqe_seen(volatile struct mlx5_error_cqe *err_cqe)
> {
> static const uint8_t magic[] = "seen";
> int ret = 1;
> @@ -83,7 +83,7 @@ check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe)
> */
> static int
> mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq,
> - volatile struct mlx5_err_cqe *err_cqe)
> + volatile struct mlx5_error_cqe *err_cqe)
> {
> if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) {
> const uint16_t wqe_m = ((1 << txq->wqe_n) - 1);
> @@ -107,7 +107,7 @@ mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq,
> mlx5_dump_debug_information(name, "MLX5 Error CQ:",
> (const void *)((uintptr_t)
> txq->cqes),
> - sizeof(struct mlx5_cqe) *
> + sizeof(struct mlx5_error_cqe) *
> (1 << txq->cqe_n));
> mlx5_dump_debug_information(name, "MLX5 Error SQ:",
> (const void *)((uintptr_t)
> @@ -231,7 +231,7 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq,
> */
> rte_wmb();
> ret = mlx5_tx_error_cqe_handle
> - (txq, (volatile struct mlx5_err_cqe *)cqe);
> + (txq, (volatile struct mlx5_error_cqe *)cqe);
> if (unlikely(ret < 0)) {
> /*
> * Some error occurred on queue error
next prev parent reply other threads:[~2024-12-03 17:43 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-27 17:48 Alexander Kozyrev
2024-12-03 17:43 ` Kevin Traynor [this message]
2024-12-03 17:54 ` Kevin Traynor
2024-12-03 18:08 ` Alexander Kozyrev
2024-12-03 18:17 ` Kevin Traynor
2024-12-04 11:33 ` Kevin Traynor
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4c30bab5-9f6f-4b68-a569-ab3e34cd27ad@redhat.com \
--to=ktraynor@redhat.com \
--cc=akozyrev@nvidia.com \
--cc=stable@dpdk.org \
--cc=viacheslavo@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).