DPDK patches and discussions
 help / color / mirror / Atom feed
From: Slava Ovsiienko <viacheslavo@nvidia.com>
To: Alexander Kozyrev <akozyrev@nvidia.com>, "dev@dpdk.org" <dev@dpdk.org>
Cc: "stable@dpdk.org" <stable@dpdk.org>,
	Raslan Darawsheh <rasland@nvidia.com>,
	Matan Azrad <matan@nvidia.com>,
	"stephen@networkplumber.org" <stephen@networkplumber.org>
Subject: RE: [PATCH v2] drivers: fix error CQE handling
Date: Mon, 28 Oct 2024 15:58:41 +0000	[thread overview]
Message-ID: <DM4PR12MB7549E0C6A589DCC02EA435D6DF4A2@DM4PR12MB7549.namprd12.prod.outlook.com> (raw)
In-Reply-To: <20241003202605.867419-1-akozyrev@nvidia.com>

Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

> -----Original Message-----
> From: Alexander Kozyrev <akozyrev@nvidia.com>
> Sent: Thursday, October 3, 2024 11:26 PM
> To: dev@dpdk.org
> Cc: stable@dpdk.org; Raslan Darawsheh <rasland@nvidia.com>; Slava
> Ovsiienko <viacheslavo@nvidia.com>; Matan Azrad <matan@nvidia.com>;
> stephen@networkplumber.org
> Subject: [PATCH v2] drivers: fix error CQE handling
> 
> The completion queue element size can be independently configured to
> report either 64 or 128 bytes CQEs by programming cqe_sz parameter at CQ
> creation. This parameter depends on the cache line size and affects both
> regular CQEs and error CQEs. But the error handling assumes that an error
> CQE is 64 bytes and doesn't take the padding into consideration on platforms
> with 128-byte cache lines.
> Fix the error CQE size in all error handling routines in mlx5.
> 
> Fixes: 957e45fb7b ("net/mlx5: handle Tx completion with error")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
> ---
>  drivers/common/mlx5/mlx5_prm.h              | 29 ++++++++++++++++++++-
>  drivers/common/mlx5/windows/mlx5_win_defs.h | 12 ---------
>  drivers/compress/mlx5/mlx5_compress.c       |  4 +--
>  drivers/crypto/mlx5/mlx5_crypto_gcm.c       |  2 +-
>  drivers/crypto/mlx5/mlx5_crypto_xts.c       |  2 +-
>  drivers/net/mlx5/hws/mlx5dr_send.c          |  2 +-
>  drivers/net/mlx5/mlx5_flow_aso.c            |  6 ++---
>  drivers/net/mlx5/mlx5_rx.c                  |  2 +-
>  drivers/net/mlx5/mlx5_tx.c                  |  8 +++---
>  9 files changed, 41 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/common/mlx5/mlx5_prm.h
> b/drivers/common/mlx5/mlx5_prm.h index 359f02f17c..210158350d 100644
> --- a/drivers/common/mlx5/mlx5_prm.h
> +++ b/drivers/common/mlx5/mlx5_prm.h
> @@ -268,8 +268,12 @@
>  /* Maximum number of DS in WQE. Limited by 6-bit field. */  #define
> MLX5_DSEG_MAX 63
> 
> -/* The 32 bit syndrome offset in struct mlx5_err_cqe. */
> +/* The 32 bit syndrome offset in struct mlx5_error_cqe. */ #if
> +(RTE_CACHE_LINE_SIZE == 128) #define
> MLX5_ERROR_CQE_SYNDROME_OFFSET 116
> +#else
>  #define MLX5_ERROR_CQE_SYNDROME_OFFSET 52
> +#endif
> 
>  /* The completion mode offset in the WQE control segment line 2. */
> #define MLX5_COMP_MODE_OFFSET 2 @@ -415,6 +419,29 @@ struct
> mlx5_wqe_mprq {
> 
>  #define MLX5_MPRQ_STRIDE_SHIFT_BYTE 2
> 
> +struct mlx5_error_cqe {
> +#if (RTE_CACHE_LINE_SIZE == 128)
> +	uint8_t padding[64];
> +#endif
> +	uint8_t rsvd0[2];
> +	uint16_t eth_wqe_id;
> +	uint8_t	rsvd1[16];
> +	uint16_t ib_stride_index;
> +	uint8_t	rsvd2[10];
> +	uint32_t srqn;
> +	uint8_t	rsvd3[8];
> +	uint32_t byte_cnt;
> +	uint8_t	rsvd4[4];
> +	uint8_t	hw_err_synd;
> +	uint8_t	hw_synd_type;
> +	uint8_t	vendor_err_synd;
> +	uint8_t	syndrome;
> +	uint32_t s_wqe_opcode_qpn;
> +	uint16_t wqe_counter;
> +	uint8_t	signature;
> +	uint8_t	op_own;
> +};
> +
>  /* CQ element structure - should be equal to the cache line size */  struct
> mlx5_cqe {  #if (RTE_CACHE_LINE_SIZE == 128) diff --git
> a/drivers/common/mlx5/windows/mlx5_win_defs.h
> b/drivers/common/mlx5/windows/mlx5_win_defs.h
> index 79e7a7f386..d60df6fd37 100644
> --- a/drivers/common/mlx5/windows/mlx5_win_defs.h
> +++ b/drivers/common/mlx5/windows/mlx5_win_defs.h
> @@ -219,18 +219,6 @@ struct mlx5_action {
>  	} dest_tir;
>  };
> 
> -struct mlx5_err_cqe {
> -	uint8_t		rsvd0[32];
> -	uint32_t	srqn;
> -	uint8_t		rsvd1[18];
> -	uint8_t		vendor_err_synd;
> -	uint8_t		syndrome;
> -	uint32_t	s_wqe_opcode_qpn;
> -	uint16_t	wqe_counter;
> -	uint8_t		signature;
> -	uint8_t		op_own;
> -};
> -
>  struct mlx5_wqe_srq_next_seg {
>  	uint8_t			rsvd0[2];
>  	rte_be16_t		next_wqe_index;
> diff --git a/drivers/compress/mlx5/mlx5_compress.c
> b/drivers/compress/mlx5/mlx5_compress.c
> index 5998d060e4..82105bfebd 100644
> --- a/drivers/compress/mlx5/mlx5_compress.c
> +++ b/drivers/compress/mlx5/mlx5_compress.c
> @@ -602,7 +602,7 @@ mlx5_compress_dump_err_objs(volatile uint32_t
> *cqe, volatile uint32_t *wqe,
>  	size_t i;
> 
>  	DRV_LOG(ERR, "Error cqe:");
> -	for (i = 0; i < sizeof(struct mlx5_err_cqe) >> 2; i += 4)
> +	for (i = 0; i < sizeof(struct mlx5_error_cqe) >> 2; i += 4)
>  		DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
>  			cqe[i + 2], cqe[i + 3]);
>  	DRV_LOG(ERR, "\nError wqe:");
> @@ -620,7 +620,7 @@ mlx5_compress_cqe_err_handle(struct
> mlx5_compress_qp *qp,
>  			     struct rte_comp_op *op)
>  {
>  	const uint32_t idx = qp->ci & (qp->entries_n - 1);
> -	volatile struct mlx5_err_cqe *cqe = (volatile struct mlx5_err_cqe *)
> +	volatile struct mlx5_error_cqe *cqe = (volatile struct mlx5_error_cqe
> +*)
>  							      &qp-
> >cq.cqes[idx];
>  	volatile struct mlx5_gga_wqe *wqes = (volatile struct mlx5_gga_wqe
> *)
>  								    qp-
> >qp.wqes;
> diff --git a/drivers/crypto/mlx5/mlx5_crypto_gcm.c
> b/drivers/crypto/mlx5/mlx5_crypto_gcm.c
> index f598273873..cd21605bd2 100644
> --- a/drivers/crypto/mlx5/mlx5_crypto_gcm.c
> +++ b/drivers/crypto/mlx5/mlx5_crypto_gcm.c
> @@ -877,7 +877,7 @@ mlx5_crypto_gcm_cqe_err_handle(struct
> mlx5_crypto_qp *qp, struct rte_crypto_op *  {
>  	uint8_t op_code;
>  	const uint32_t idx = qp->cq_ci & (qp->entries_n - 1);
> -	volatile struct mlx5_err_cqe *cqe = (volatile struct mlx5_err_cqe *)
> +	volatile struct mlx5_error_cqe *cqe = (volatile struct mlx5_error_cqe
> +*)
>  							&qp-
> >cq_obj.cqes[idx];
> 
>  	op_code = rte_be_to_cpu_32(cqe->s_wqe_opcode_qpn) >>
> MLX5_CQ_INDEX_WIDTH; diff --git a/drivers/crypto/mlx5/mlx5_crypto_xts.c
> b/drivers/crypto/mlx5/mlx5_crypto_xts.c
> index d4e1dd718c..b9214711ac 100644
> --- a/drivers/crypto/mlx5/mlx5_crypto_xts.c
> +++ b/drivers/crypto/mlx5/mlx5_crypto_xts.c
> @@ -363,7 +363,7 @@ static __rte_noinline void
> mlx5_crypto_xts_cqe_err_handle(struct mlx5_crypto_qp *qp, struct
> rte_crypto_op *op)  {
>  	const uint32_t idx = qp->ci & (qp->entries_n - 1);
> -	volatile struct mlx5_err_cqe *cqe = (volatile struct mlx5_err_cqe *)
> +	volatile struct mlx5_error_cqe *cqe = (volatile struct mlx5_error_cqe
> +*)
>  							&qp-
> >cq_obj.cqes[idx];
> 
>  	op->status = RTE_CRYPTO_OP_STATUS_ERROR; diff --git
> a/drivers/net/mlx5/hws/mlx5dr_send.c
> b/drivers/net/mlx5/hws/mlx5dr_send.c
> index e9abf3dddb..e121c7f7ed 100644
> --- a/drivers/net/mlx5/hws/mlx5dr_send.c
> +++ b/drivers/net/mlx5/hws/mlx5dr_send.c
> @@ -599,7 +599,7 @@ static void mlx5dr_send_engine_poll_cq(struct
> mlx5dr_send_engine *queue,
>  		return;
> 
>  	if (unlikely(cqe_opcode != MLX5_CQE_REQ)) {
> -		struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe;
> +		struct mlx5_error_cqe *err_cqe = (struct mlx5_error_cqe
> *)cqe;
> 
>  		DR_LOG(ERR, "CQE ERR:0x%x, Vendor_ERR:0x%x, OP:0x%x,
> QPN:0x%x, WQE_CNT:0x%x",
>  			err_cqe->syndrome, err_cqe->vendor_err_synd,
> cqe_opcode, diff --git a/drivers/net/mlx5/mlx5_flow_aso.c
> b/drivers/net/mlx5/mlx5_flow_aso.c
> index a94b228396..feca8c3e89 100644
> --- a/drivers/net/mlx5/mlx5_flow_aso.c
> +++ b/drivers/net/mlx5/mlx5_flow_aso.c
> @@ -489,7 +489,7 @@ mlx5_aso_dump_err_objs(volatile uint32_t *cqe,
> volatile uint32_t *wqe)
>  	int i;
> 
>  	DRV_LOG(ERR, "Error cqe:");
> -	for (i = 0; i < 16; i += 4)
> +	for (i = 0; i < (int)sizeof(struct mlx5_error_cqe) / 4; i += 4)
>  		DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
>  			cqe[i + 2], cqe[i + 3]);
>  	DRV_LOG(ERR, "\nError wqe:");
> @@ -509,8 +509,8 @@ mlx5_aso_cqe_err_handle(struct mlx5_aso_sq *sq)  {
>  	struct mlx5_aso_cq *cq = &sq->cq;
>  	uint32_t idx = cq->cq_ci & ((1 << cq->log_desc_n) - 1);
> -	volatile struct mlx5_err_cqe *cqe =
> -			(volatile struct mlx5_err_cqe *)&cq-
> >cq_obj.cqes[idx];
> +	volatile struct mlx5_error_cqe *cqe =
> +			(volatile struct mlx5_error_cqe *)&cq-
> >cq_obj.cqes[idx];
> 
>  	cq->errors++;
>  	idx = rte_be_to_cpu_16(cqe->wqe_counter) & (1u << sq-
> >log_desc_n); diff --git a/drivers/net/mlx5/mlx5_rx.c
> b/drivers/net/mlx5/mlx5_rx.c index f241809e08..5e58eb8bc9 100644
> --- a/drivers/net/mlx5/mlx5_rx.c
> +++ b/drivers/net/mlx5/mlx5_rx.c
> @@ -459,7 +459,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq,
> uint8_t vec,
>  			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
>  	union {
>  		volatile struct mlx5_cqe *cqe;
> -		volatile struct mlx5_err_cqe *err_cqe;
> +		volatile struct mlx5_error_cqe *err_cqe;
>  	} u = {
>  		.cqe = &(*rxq->cqes)[(rxq->cq_ci - vec) & cqe_mask],
>  	};
> diff --git a/drivers/net/mlx5/mlx5_tx.c b/drivers/net/mlx5/mlx5_tx.c index
> 04f80bb9bd..2f48bbc82e 100644
> --- a/drivers/net/mlx5/mlx5_tx.c
> +++ b/drivers/net/mlx5/mlx5_tx.c
> @@ -55,7 +55,7 @@ tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl)
> 
>  /* Return 1 if the error CQE is signed otherwise, sign it and return 0. */  static
> int -check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe)
> +check_err_cqe_seen(volatile struct mlx5_error_cqe *err_cqe)
>  {
>  	static const uint8_t magic[] = "seen";
>  	int ret = 1;
> @@ -83,7 +83,7 @@ check_err_cqe_seen(volatile struct mlx5_err_cqe
> *err_cqe)
>   */
>  static int
>  mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq,
> -			 volatile struct mlx5_err_cqe *err_cqe)
> +			 volatile struct mlx5_error_cqe *err_cqe)
>  {
>  	if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR)
> {
>  		const uint16_t wqe_m = ((1 << txq->wqe_n) - 1); @@ -107,7
> +107,7 @@ mlx5_tx_error_cqe_handle(struct mlx5_txq_data
> *__rte_restrict txq,
>  			mlx5_dump_debug_information(name, "MLX5 Error
> CQ:",
>  						    (const void *)((uintptr_t)
>  						    txq->cqes),
> -						    sizeof(struct mlx5_cqe) *
> +						    sizeof(struct
> mlx5_error_cqe) *
>  						    (1 << txq->cqe_n));
>  			mlx5_dump_debug_information(name, "MLX5 Error
> SQ:",
>  						    (const void *)((uintptr_t)
> @@ -206,7 +206,7 @@ mlx5_tx_handle_completion(struct mlx5_txq_data
> *__rte_restrict txq,
>  			 */
>  			rte_wmb();
>  			ret = mlx5_tx_error_cqe_handle
> -				(txq, (volatile struct mlx5_err_cqe *)cqe);
> +				(txq, (volatile struct mlx5_error_cqe *)cqe);
>  			if (unlikely(ret < 0)) {
>  				/*
>  				 * Some error occurred on queue error
> --
> 2.18.2


  reply	other threads:[~2024-10-28 15:58 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-02 18:49 [PATCH] " Alexander Kozyrev
2024-10-03 19:14 ` Stephen Hemminger
2024-10-03 20:26 ` [PATCH v2] " Alexander Kozyrev
2024-10-28 15:58   ` Slava Ovsiienko [this message]
2024-10-28 17:17   ` [PATCH v3] drivers: fix error CQE handling for 128 bytes CQE Alexander Kozyrev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=DM4PR12MB7549E0C6A589DCC02EA435D6DF4A2@DM4PR12MB7549.namprd12.prod.outlook.com \
    --to=viacheslavo@nvidia.com \
    --cc=akozyrev@nvidia.com \
    --cc=dev@dpdk.org \
    --cc=matan@nvidia.com \
    --cc=rasland@nvidia.com \
    --cc=stable@dpdk.org \
    --cc=stephen@networkplumber.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).