patches for DPDK stable branches
 help / color / mirror / Atom feed
From: Alexander Kozyrev <akozyrev@nvidia.com>
To: <dev@dpdk.org>
Cc: <stable@dpdk.org>, <rasland@nvidia.com>, <matan@nvidia.com>,
	<viacheslavo@nvidia.com>
Subject: [PATCH] net/mlx5: ignore non-critical syndromes for Rx queue
Date: Fri, 27 Jan 2023 05:22:43 +0200	[thread overview]
Message-ID: <20230127032243.3990099-1-akozyrev@nvidia.com> (raw)

For non-fatal syndromes like LOCAL_LENGTH_ERR, the Rx queue reset
shouldn't be triggered. Rx queue could continue with the next packets
without any recovery. Only three syndromes warrant Rx queue reset:
LOCAL_QP_OP_ERR, LOCAL_PROT_ERR and WR_FLUSH_ERR.
Do not initiate a Rx queue reset in any other cases.
Skip all non-critical error CQEs and continue with packet processing.

Fixes: 88c0733535 ("net/mlx5: extend Rx completion with error handling")
Cc: stable@dpdk.org

Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
---
 drivers/net/mlx5/mlx5_rx.c       | 123 ++++++++++++++++++++++++-------
 drivers/net/mlx5/mlx5_rx.h       |   5 +-
 drivers/net/mlx5/mlx5_rxtx_vec.c |   3 +-
 3 files changed, 102 insertions(+), 29 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
index 7612d15f01..99a08ef5f1 100644
--- a/drivers/net/mlx5/mlx5_rx.c
+++ b/drivers/net/mlx5/mlx5_rx.c
@@ -39,7 +39,8 @@ rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
 
 static __rte_always_inline int
 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
-		 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe);
+		 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe,
+		 uint16_t *skip_cnt, bool mprq);
 
 static __rte_always_inline uint32_t
 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe);
@@ -408,10 +409,14 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
 	*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
 }
 
+#define MLX5_ERROR_CQE_MASK 0x40000000
 /* Must be negative. */
-#define MLX5_ERROR_CQE_RET (-1)
+#define MLX5_REGULAR_ERROR_CQE_RET (-5)
+#define MLX5_CRITICAL_ERROR_CQE_RET (-4)
 /* Must not be negative. */
 #define MLX5_RECOVERY_ERROR_RET 0
+#define MLX5_RECOVERY_IGNORE_RET 1
+#define MLX5_RECOVERY_COMPLETED_RET 2
 
 /**
  * Handle a Rx error.
@@ -429,10 +434,14 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
  *   Number of CQEs to check for an error.
  *
  * @return
- *   MLX5_RECOVERY_ERROR_RET in case of recovery error, otherwise the CQE status.
+ *   MLX5_RECOVERY_ERROR_RET in case of recovery error,
+ *   MLX5_RECOVERY_IGNORE_RET in case of non-critical error syndrome,
+ *   MLX5_RECOVERY_COMPLETED_RET in case of recovery is completed,
+ *   otherwise the CQE status after ignored error syndrome or queue reset.
  */
 int
-mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, uint16_t err_n)
+mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec,
+		   uint16_t err_n, uint16_t *skip_cnt)
 {
 	const uint16_t cqe_n = 1 << rxq->cqe_n;
 	const uint16_t cqe_mask = cqe_n - 1;
@@ -447,14 +456,35 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, uint16_t err_n)
 		.cqe = &(*rxq->cqes)[(rxq->cq_ci - vec) & cqe_mask],
 	};
 	struct mlx5_mp_arg_queue_state_modify sm;
+	bool critical_syndrome = false;
 	int ret, i;
 
 	switch (rxq->err_state) {
+	case MLX5_RXQ_ERR_STATE_IGNORE:
+		ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci - vec);
+		if (ret != MLX5_CQE_STATUS_ERR) {
+			rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR;
+			return ret;
+		}
+		/* Fall-through */
 	case MLX5_RXQ_ERR_STATE_NO_ERROR:
 		for (i = 0; i < (int)err_n; i++) {
 			u.cqe = &(*rxq->cqes)[(rxq->cq_ci - vec - i) & cqe_mask];
-			if (MLX5_CQE_OPCODE(u.cqe->op_own) == MLX5_CQE_RESP_ERR)
+			if (MLX5_CQE_OPCODE(u.cqe->op_own) == MLX5_CQE_RESP_ERR) {
+				if (u.err_cqe->syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR ||
+				    u.err_cqe->syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR ||
+				    u.err_cqe->syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR)
+					critical_syndrome = true;
 				break;
+			}
+		}
+		if (!critical_syndrome) {
+			if (rxq->err_state == MLX5_RXQ_ERR_STATE_NO_ERROR) {
+				*skip_cnt = 0;
+				if (i == err_n)
+					rxq->err_state = MLX5_RXQ_ERR_STATE_IGNORE;
+			}
+			return MLX5_RECOVERY_IGNORE_RET;
 		}
 		rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET;
 		/* Fall-through */
@@ -546,6 +576,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, uint16_t err_n)
 			}
 			mlx5_rxq_initialize(rxq);
 			rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR;
+			return MLX5_RECOVERY_COMPLETED_RET;
 		}
 		return ret;
 	default:
@@ -565,19 +596,24 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, uint16_t err_n)
  * @param[out] mcqe
  *   Store pointer to mini-CQE if compressed. Otherwise, the pointer is not
  *   written.
- *
+ * @param[out] skip_cnt
+ *   Number of packets skipped due to recoverable errors.
+ * @param mprq
+ *   Indication if it is called from MPRQ.
  * @return
- *   0 in case of empty CQE, MLX5_ERROR_CQE_RET in case of error CQE,
- *   otherwise the packet size in regular RxQ, and striding byte
- *   count format in mprq case.
+ *   0 in case of empty CQE, MLX5_REGULAR_ERROR_CQE_RET in case of error CQE,
+ *   MLX5_CRITICAL_ERROR_CQE_RET in case of error CQE lead to Rx queue reset,
+ *   otherwise the packet size in regular RxQ,
+ *   and striding byte count format in mprq case.
  */
 static inline int
 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
-		 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe)
+		 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe,
+		 uint16_t *skip_cnt, bool mprq)
 {
 	struct rxq_zip *zip = &rxq->zip;
 	uint16_t cqe_n = cqe_cnt + 1;
-	int len;
+	int len = 0, ret = 0;
 	uint16_t idx, end;
 
 	do {
@@ -626,7 +662,6 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
 		 * compressed.
 		 */
 		} else {
-			int ret;
 			int8_t op_own;
 			uint32_t cq_ci;
 
@@ -634,10 +669,12 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
 			if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
 				if (unlikely(ret == MLX5_CQE_STATUS_ERR ||
 					     rxq->err_state)) {
-					ret = mlx5_rx_err_handle(rxq, 0, 1);
-					if (ret == MLX5_CQE_STATUS_HW_OWN ||
-					    ret == MLX5_RECOVERY_ERROR_RET)
-						return MLX5_ERROR_CQE_RET;
+					ret = mlx5_rx_err_handle(rxq, 0, 1, skip_cnt);
+					if (ret == MLX5_CQE_STATUS_HW_OWN)
+						return MLX5_ERROR_CQE_MASK;
+					if (ret == MLX5_RECOVERY_ERROR_RET ||
+						ret == MLX5_RECOVERY_COMPLETED_RET)
+						return MLX5_CRITICAL_ERROR_CQE_RET;
 				} else {
 					return 0;
 				}
@@ -690,8 +727,15 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
 			}
 		}
 		if (unlikely(rxq->err_state)) {
+			if (rxq->err_state == MLX5_RXQ_ERR_STATE_IGNORE &&
+			    ret == MLX5_CQE_STATUS_SW_OWN) {
+				rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR;
+				return len & MLX5_ERROR_CQE_MASK;
+			}
 			cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
 			++rxq->stats.idropped;
+			(*skip_cnt) += mprq ? (len & MLX5_MPRQ_STRIDE_NUM_MASK) >>
+				MLX5_MPRQ_STRIDE_NUM_SHIFT : 1;
 		} else {
 			return len;
 		}
@@ -843,6 +887,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	int len = 0; /* keep its value across iterations. */
 
 	while (pkts_n) {
+		uint16_t skip_cnt;
 		unsigned int idx = rq_ci & wqe_cnt;
 		volatile struct mlx5_wqe_data_seg *wqe =
 			&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx];
@@ -881,11 +926,24 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		}
 		if (!pkt) {
 			cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
-			len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe);
-			if (len <= 0) {
-				rte_mbuf_raw_free(rep);
-				if (unlikely(len == MLX5_ERROR_CQE_RET))
+			len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe, &skip_cnt, false);
+			if (unlikely(len & MLX5_ERROR_CQE_MASK)) {
+				if (len == MLX5_CRITICAL_ERROR_CQE_RET) {
+					rte_mbuf_raw_free(rep);
 					rq_ci = rxq->rq_ci << sges_n;
+					break;
+				}
+				rq_ci >>= sges_n;
+				rq_ci += skip_cnt;
+				rq_ci <<= sges_n;
+				idx = rq_ci & wqe_cnt;
+				wqe = &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx];
+				seg = (*rxq->elts)[idx];
+				cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
+				len = len & ~MLX5_ERROR_CQE_MASK;
+			}
+			if (len == 0) {
+				rte_mbuf_raw_free(rep);
 				break;
 			}
 			pkt = seg;
@@ -1095,6 +1153,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		uint16_t strd_cnt;
 		uint16_t strd_idx;
 		uint32_t byte_cnt;
+		uint16_t skip_cnt;
 		volatile struct mlx5_mini_cqe8 *mcqe = NULL;
 		enum mlx5_rqx_code rxq_code;
 
@@ -1107,14 +1166,26 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
 		}
 		cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
-		ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe);
+		ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe, &skip_cnt, true);
+		if (unlikely(ret & MLX5_ERROR_CQE_MASK)) {
+			if (ret == MLX5_CRITICAL_ERROR_CQE_RET) {
+				rq_ci = rxq->rq_ci;
+				consumed_strd = rxq->consumed_strd;
+				break;
+			}
+			consumed_strd += skip_cnt;
+			while (consumed_strd >= strd_n) {
+				/* Replace WQE if the buffer is still in use. */
+				mprq_buf_replace(rxq, rq_ci & wq_mask);
+				/* Advance to the next WQE. */
+				consumed_strd -= strd_n;
+				++rq_ci;
+				buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
+			}
+			cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
+		}
 		if (ret == 0)
 			break;
-		if (unlikely(ret == MLX5_ERROR_CQE_RET)) {
-			rq_ci = rxq->rq_ci;
-			consumed_strd = rxq->consumed_strd;
-			break;
-		}
 		byte_cnt = ret;
 		len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT;
 		MLX5_ASSERT((int)len >= (rxq->crc_present << 2));
diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index 4ba53ebc48..6b42e27c89 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -62,6 +62,7 @@ enum mlx5_rxq_err_state {
 	MLX5_RXQ_ERR_STATE_NO_ERROR = 0,
 	MLX5_RXQ_ERR_STATE_NEED_RESET,
 	MLX5_RXQ_ERR_STATE_NEED_READY,
+	MLX5_RXQ_ERR_STATE_IGNORE,
 };
 
 enum mlx5_rqx_code {
@@ -286,8 +287,8 @@ int mlx5_hrxq_modify(struct rte_eth_dev *dev, uint32_t hxrq_idx,
 
 uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n);
 void mlx5_rxq_initialize(struct mlx5_rxq_data *rxq);
-__rte_noinline int mlx5_rx_err_handle(struct mlx5_rxq_data *rxq,
-				      uint8_t vec, uint16_t err_n);
+__rte_noinline int mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec,
+				      uint16_t err_n, uint16_t *skip_cnt);
 void mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf);
 uint16_t mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts,
 			    uint16_t pkts_n);
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c
index c6be2be763..667475a93e 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
@@ -51,6 +51,7 @@ rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
 			 uint16_t pkts_n)
 {
 	uint16_t n = 0;
+	uint16_t skip_cnt;
 	unsigned int i;
 #ifdef MLX5_PMD_SOFT_COUNTERS
 	uint32_t err_bytes = 0;
@@ -74,7 +75,7 @@ rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
 	rxq->stats.ipackets -= (pkts_n - n);
 	rxq->stats.ibytes -= err_bytes;
 #endif
-	mlx5_rx_err_handle(rxq, 1, pkts_n);
+	mlx5_rx_err_handle(rxq, 1, pkts_n, &skip_cnt);
 	return n;
 }
 
-- 
2.18.2


             reply	other threads:[~2023-01-27  3:23 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-01-27  3:22 Alexander Kozyrev [this message]
2023-02-06 15:05 ` Matan Azrad
2023-02-12 13:37 ` Raslan Darawsheh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230127032243.3990099-1-akozyrev@nvidia.com \
    --to=akozyrev@nvidia.com \
    --cc=dev@dpdk.org \
    --cc=matan@nvidia.com \
    --cc=rasland@nvidia.com \
    --cc=stable@dpdk.org \
    --cc=viacheslavo@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).