Re: [PATCH 21.11] common/mlx5: fix error CQE handling for 128 bytes CQE

patches for DPDK stable branches
 help / color / mirror / Atom feed

From: Kevin Traynor <ktraynor@redhat.com>
To: Alexander Kozyrev <akozyrev@nvidia.com>, stable@dpdk.org
Cc: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Subject: Re: [PATCH 21.11] common/mlx5: fix error CQE handling for 128 bytes CQE
Date: Tue, 3 Dec 2024 17:43:30 +0000	[thread overview]
Message-ID: <4c30bab5-9f6f-4b68-a569-ab3e34cd27ad@redhat.com> (raw)
In-Reply-To: <20241127174839.1770671-1-akozyrev@nvidia.com>

On 27/11/2024 18:48, Alexander Kozyrev wrote:
> [ upstream commit 3cddeba0ca38b00c7dc646277484d08a4cb2d862 ]
> 
> The completion queue element size can be independently configured
> to report either 64 or 128 bytes CQEs by programming cqe_sz parameter
> at CQ creation. This parameter depends on the cache line size and
> affects both regular CQEs and error CQEs. But the error handling
> assumes that an error CQE is 64 bytes and doesn't take the padding
> into consideration on platforms with 128-byte cache lines.
> Fix the error CQE size in all error handling routines in mlx5.
> 

Hi Alexander,

This is causing a build error [1] on windows in the crypto code [2].

ref
https://dpdkdashboard.iol.unh.edu/results/dashboard/testruns/logs/1475543/

I didn't spend time to analyse. If you have a fix I can apply, or else
we can drop the patch.

thanks,
Kevin.

[1]
../drivers/crypto/mlx5/mlx5_crypto.c
../drivers/crypto/mlx5/mlx5_crypto.c:480:52: error: incomplete
definition of type 'struct mlx5_err_cqe'
        DRV_LOG(ERR, "CQE ERR:%x.\n", rte_be_to_cpu_32(cqe->syndrome));
                                                       ~~~^
[2]
mlx5_crypto_cqe_err_handle(struct mlx5_crypto_qp *qp, struct
rte_crypto_op *op)
{
	const uint32_t idx = qp->ci & (qp->entries_n - 1);
	volatile struct mlx5_err_cqe *cqe = (volatile struct mlx5_err_cqe *)
							&qp->cq_obj.cqes[idx];

	op->status = RTE_CRYPTO_OP_STATUS_ERROR;
	qp->stats.dequeue_err_count++;
	DRV_LOG(ERR, "CQE ERR:%x.\n", rte_be_to_cpu_32(cqe->syndrome));
}


> Fixes: 957e45fb7bcb ("net/mlx5: handle Tx completion with error")
> 
> Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
> Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
> ---
>  drivers/common/mlx5/mlx5_prm.h              | 29 ++++++++++++++++++++-
>  drivers/common/mlx5/windows/mlx5_win_defs.h | 12 ---------
>  drivers/compress/mlx5/mlx5_compress.c       |  4 +--
>  drivers/net/mlx5/mlx5_flow_aso.c            |  6 ++---
>  drivers/net/mlx5/mlx5_rx.c                  |  2 +-
>  drivers/net/mlx5/mlx5_tx.c                  |  8 +++---
>  6 files changed, 38 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
> index 58aa72df64..8ce1e66888 100644
> --- a/drivers/common/mlx5/mlx5_prm.h
> +++ b/drivers/common/mlx5/mlx5_prm.h
> @@ -249,8 +249,12 @@
>  /* Maximum number of DS in WQE. Limited by 6-bit field. */
>  #define MLX5_DSEG_MAX 63
>  
> -/* The 32 bit syndrome offset in struct mlx5_err_cqe. */
> +/* The 32 bit syndrome offset in struct mlx5_error_cqe. */
> +#if (RTE_CACHE_LINE_SIZE == 128)
> +#define MLX5_ERROR_CQE_SYNDROME_OFFSET 116
> +#else
>  #define MLX5_ERROR_CQE_SYNDROME_OFFSET 52
> +#endif
>  
>  /* The completion mode offset in the WQE control segment line 2. */
>  #define MLX5_COMP_MODE_OFFSET 2
> @@ -379,6 +383,29 @@ struct mlx5_wqe_mprq {
>  
>  #define MLX5_MPRQ_STRIDE_SHIFT_BYTE 2
>  
> +struct mlx5_error_cqe {
> +#if (RTE_CACHE_LINE_SIZE == 128)
> +	uint8_t padding[64];
> +#endif
> +	uint8_t rsvd0[2];
> +	uint16_t eth_wqe_id;
> +	uint8_t	rsvd1[16];
> +	uint16_t ib_stride_index;
> +	uint8_t	rsvd2[10];
> +	uint32_t srqn;
> +	uint8_t	rsvd3[8];
> +	uint32_t byte_cnt;
> +	uint8_t	rsvd4[4];
> +	uint8_t	hw_err_synd;
> +	uint8_t	hw_synd_type;
> +	uint8_t	vendor_err_synd;
> +	uint8_t	syndrome;
> +	uint32_t s_wqe_opcode_qpn;
> +	uint16_t wqe_counter;
> +	uint8_t	signature;
> +	uint8_t	op_own;
> +};
> +
>  /* CQ element structure - should be equal to the cache line size */
>  struct mlx5_cqe {
>  #if (RTE_CACHE_LINE_SIZE == 128)
> diff --git a/drivers/common/mlx5/windows/mlx5_win_defs.h b/drivers/common/mlx5/windows/mlx5_win_defs.h
> index 0c09325444..d79728bcda 100644
> --- a/drivers/common/mlx5/windows/mlx5_win_defs.h
> +++ b/drivers/common/mlx5/windows/mlx5_win_defs.h
> @@ -223,18 +223,6 @@ struct mlx5_action {
>  	} dest_tir;
>  };
>  
> -struct mlx5_err_cqe {
> -	uint8_t		rsvd0[32];
> -	uint32_t	srqn;
> -	uint8_t		rsvd1[18];
> -	uint8_t		vendor_err_synd;
> -	uint8_t		syndrome;
> -	uint32_t	s_wqe_opcode_qpn;
> -	uint16_t	wqe_counter;
> -	uint8_t		signature;
> -	uint8_t		op_own;
> -};
> -
>  struct mlx5_wqe_srq_next_seg {
>  	uint8_t			rsvd0[2];
>  	rte_be16_t		next_wqe_index;
> diff --git a/drivers/compress/mlx5/mlx5_compress.c b/drivers/compress/mlx5/mlx5_compress.c
> index 515b5dfa67..e1ca77795c 100644
> --- a/drivers/compress/mlx5/mlx5_compress.c
> +++ b/drivers/compress/mlx5/mlx5_compress.c
> @@ -537,7 +537,7 @@ mlx5_compress_dump_err_objs(volatile uint32_t *cqe, volatile uint32_t *wqe,
>  	size_t i;
>  
>  	DRV_LOG(ERR, "Error cqe:");
> -	for (i = 0; i < sizeof(struct mlx5_err_cqe) >> 2; i += 4)
> +	for (i = 0; i < sizeof(struct mlx5_error_cqe) >> 2; i += 4)
>  		DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
>  			cqe[i + 2], cqe[i + 3]);
>  	DRV_LOG(ERR, "\nError wqe:");
> @@ -555,7 +555,7 @@ mlx5_compress_cqe_err_handle(struct mlx5_compress_qp *qp,
>  			     struct rte_comp_op *op)
>  {
>  	const uint32_t idx = qp->ci & (qp->entries_n - 1);
> -	volatile struct mlx5_err_cqe *cqe = (volatile struct mlx5_err_cqe *)
> +	volatile struct mlx5_error_cqe *cqe = (volatile struct mlx5_error_cqe *)
>  							      &qp->cq.cqes[idx];
>  	volatile struct mlx5_gga_wqe *wqes = (volatile struct mlx5_gga_wqe *)
>  								    qp->qp.wqes;
> diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
> index eb7fc43da3..0d01ca1310 100644
> --- a/drivers/net/mlx5/mlx5_flow_aso.c
> +++ b/drivers/net/mlx5/mlx5_flow_aso.c
> @@ -406,7 +406,7 @@ mlx5_aso_dump_err_objs(volatile uint32_t *cqe, volatile uint32_t *wqe)
>  	int i;
>  
>  	DRV_LOG(ERR, "Error cqe:");
> -	for (i = 0; i < 16; i += 4)
> +	for (i = 0; i < (int)sizeof(struct mlx5_error_cqe) / 4; i += 4)
>  		DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
>  			cqe[i + 2], cqe[i + 3]);
>  	DRV_LOG(ERR, "\nError wqe:");
> @@ -426,8 +426,8 @@ mlx5_aso_cqe_err_handle(struct mlx5_aso_sq *sq)
>  {
>  	struct mlx5_aso_cq *cq = &sq->cq;
>  	uint32_t idx = cq->cq_ci & ((1 << cq->log_desc_n) - 1);
> -	volatile struct mlx5_err_cqe *cqe =
> -			(volatile struct mlx5_err_cqe *)&cq->cq_obj.cqes[idx];
> +	volatile struct mlx5_error_cqe *cqe =
> +			(volatile struct mlx5_error_cqe *)&cq->cq_obj.cqes[idx];
>  
>  	cq->errors++;
>  	idx = rte_be_to_cpu_16(cqe->wqe_counter) & (1u << sq->log_desc_n);
> diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
> index eea6a5c6c7..78d3949276 100644
> --- a/drivers/net/mlx5/mlx5_rx.c
> +++ b/drivers/net/mlx5/mlx5_rx.c
> @@ -433,7 +433,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec,
>  			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
>  	union {
>  		volatile struct mlx5_cqe *cqe;
> -		volatile struct mlx5_err_cqe *err_cqe;
> +		volatile struct mlx5_error_cqe *err_cqe;
>  	} u = {
>  		.cqe = &(*rxq->cqes)[(rxq->cq_ci - vec) & cqe_mask],
>  	};
> diff --git a/drivers/net/mlx5/mlx5_tx.c b/drivers/net/mlx5/mlx5_tx.c
> index 8f03743986..edb1e0265f 100644
> --- a/drivers/net/mlx5/mlx5_tx.c
> +++ b/drivers/net/mlx5/mlx5_tx.c
> @@ -55,7 +55,7 @@ tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl)
>  
>  /* Return 1 if the error CQE is signed otherwise, sign it and return 0. */
>  static int
> -check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe)
> +check_err_cqe_seen(volatile struct mlx5_error_cqe *err_cqe)
>  {
>  	static const uint8_t magic[] = "seen";
>  	int ret = 1;
> @@ -83,7 +83,7 @@ check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe)
>   */
>  static int
>  mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq,
> -			 volatile struct mlx5_err_cqe *err_cqe)
> +			 volatile struct mlx5_error_cqe *err_cqe)
>  {
>  	if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) {
>  		const uint16_t wqe_m = ((1 << txq->wqe_n) - 1);
> @@ -107,7 +107,7 @@ mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq,
>  			mlx5_dump_debug_information(name, "MLX5 Error CQ:",
>  						    (const void *)((uintptr_t)
>  						    txq->cqes),
> -						    sizeof(struct mlx5_cqe) *
> +						    sizeof(struct mlx5_error_cqe) *
>  						    (1 << txq->cqe_n));
>  			mlx5_dump_debug_information(name, "MLX5 Error SQ:",
>  						    (const void *)((uintptr_t)
> @@ -231,7 +231,7 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq,
>  			 */
>  			rte_wmb();
>  			ret = mlx5_tx_error_cqe_handle
> -				(txq, (volatile struct mlx5_err_cqe *)cqe);
> +				(txq, (volatile struct mlx5_error_cqe *)cqe);
>  			if (unlikely(ret < 0)) {
>  				/*
>  				 * Some error occurred on queue error

next prev parent reply	other threads:[~2024-12-03 17:43 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-11-27 17:48 Alexander Kozyrev
2024-12-03 17:43 ` Kevin Traynor [this message]
2024-12-03 17:54   ` Kevin Traynor
2024-12-03 18:08     ` Alexander Kozyrev
2024-12-03 18:17       ` Kevin Traynor
2024-12-04 11:33         ` Kevin Traynor

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4c30bab5-9f6f-4b68-a569-ab3e34cd27ad@redhat.com \
    --to=ktraynor@redhat.com \
    --cc=akozyrev@nvidia.com \
    --cc=stable@dpdk.org \
    --cc=viacheslavo@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).