From: Alexander Kozyrev <akozyrev@nvidia.com>
To: <dev@dpdk.org>
Cc: <stable@dpdk.org>, <rasland@nvidia.com>, <matan@nvidia.com>,
<viacheslavo@nvidia.com>
Subject: [PATCH] net/mlx5: ignore non-critical syndromes for Rx queue
Date: Fri, 27 Jan 2023 05:22:43 +0200 [thread overview]
Message-ID: <20230127032243.3990099-1-akozyrev@nvidia.com> (raw)
For non-fatal syndromes like LOCAL_LENGTH_ERR, the Rx queue reset
shouldn't be triggered. Rx queue could continue with the next packets
without any recovery. Only three syndromes warrant Rx queue reset:
LOCAL_QP_OP_ERR, LOCAL_PROT_ERR and WR_FLUSH_ERR.
Do not initiate a Rx queue reset in any other cases.
Skip all non-critical error CQEs and continue with packet processing.
Fixes: 88c0733535 ("net/mlx5: extend Rx completion with error handling")
Cc: stable@dpdk.org
Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com>
---
drivers/net/mlx5/mlx5_rx.c | 123 ++++++++++++++++++++++++-------
drivers/net/mlx5/mlx5_rx.h | 5 +-
drivers/net/mlx5/mlx5_rxtx_vec.c | 3 +-
3 files changed, 102 insertions(+), 29 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
index 7612d15f01..99a08ef5f1 100644
--- a/drivers/net/mlx5/mlx5_rx.c
+++ b/drivers/net/mlx5/mlx5_rx.c
@@ -39,7 +39,8 @@ rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
static __rte_always_inline int
mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
- uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe);
+ uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe,
+ uint16_t *skip_cnt, bool mprq);
static __rte_always_inline uint32_t
rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe);
@@ -408,10 +409,14 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
}
+#define MLX5_ERROR_CQE_MASK 0x40000000
/* Must be negative. */
-#define MLX5_ERROR_CQE_RET (-1)
+#define MLX5_REGULAR_ERROR_CQE_RET (-5)
+#define MLX5_CRITICAL_ERROR_CQE_RET (-4)
/* Must not be negative. */
#define MLX5_RECOVERY_ERROR_RET 0
+#define MLX5_RECOVERY_IGNORE_RET 1
+#define MLX5_RECOVERY_COMPLETED_RET 2
/**
* Handle a Rx error.
@@ -429,10 +434,14 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
* Number of CQEs to check for an error.
*
* @return
- * MLX5_RECOVERY_ERROR_RET in case of recovery error, otherwise the CQE status.
+ * MLX5_RECOVERY_ERROR_RET in case of recovery error,
+ * MLX5_RECOVERY_IGNORE_RET in case of non-critical error syndrome,
+ * MLX5_RECOVERY_COMPLETED_RET in case of recovery is completed,
+ * otherwise the CQE status after ignored error syndrome or queue reset.
*/
int
-mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, uint16_t err_n)
+mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec,
+ uint16_t err_n, uint16_t *skip_cnt)
{
const uint16_t cqe_n = 1 << rxq->cqe_n;
const uint16_t cqe_mask = cqe_n - 1;
@@ -447,14 +456,35 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, uint16_t err_n)
.cqe = &(*rxq->cqes)[(rxq->cq_ci - vec) & cqe_mask],
};
struct mlx5_mp_arg_queue_state_modify sm;
+ bool critical_syndrome = false;
int ret, i;
switch (rxq->err_state) {
+ case MLX5_RXQ_ERR_STATE_IGNORE:
+ ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci - vec);
+ if (ret != MLX5_CQE_STATUS_ERR) {
+ rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR;
+ return ret;
+ }
+ /* Fall-through */
case MLX5_RXQ_ERR_STATE_NO_ERROR:
for (i = 0; i < (int)err_n; i++) {
u.cqe = &(*rxq->cqes)[(rxq->cq_ci - vec - i) & cqe_mask];
- if (MLX5_CQE_OPCODE(u.cqe->op_own) == MLX5_CQE_RESP_ERR)
+ if (MLX5_CQE_OPCODE(u.cqe->op_own) == MLX5_CQE_RESP_ERR) {
+ if (u.err_cqe->syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR ||
+ u.err_cqe->syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR ||
+ u.err_cqe->syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR)
+ critical_syndrome = true;
break;
+ }
+ }
+ if (!critical_syndrome) {
+ if (rxq->err_state == MLX5_RXQ_ERR_STATE_NO_ERROR) {
+ *skip_cnt = 0;
+ if (i == err_n)
+ rxq->err_state = MLX5_RXQ_ERR_STATE_IGNORE;
+ }
+ return MLX5_RECOVERY_IGNORE_RET;
}
rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET;
/* Fall-through */
@@ -546,6 +576,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, uint16_t err_n)
}
mlx5_rxq_initialize(rxq);
rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR;
+ return MLX5_RECOVERY_COMPLETED_RET;
}
return ret;
default:
@@ -565,19 +596,24 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, uint16_t err_n)
* @param[out] mcqe
* Store pointer to mini-CQE if compressed. Otherwise, the pointer is not
* written.
- *
+ * @param[out] skip_cnt
+ * Number of packets skipped due to recoverable errors.
+ * @param mprq
+ * Indication if it is called from MPRQ.
* @return
- * 0 in case of empty CQE, MLX5_ERROR_CQE_RET in case of error CQE,
- * otherwise the packet size in regular RxQ, and striding byte
- * count format in mprq case.
+ * 0 in case of empty CQE, MLX5_REGULAR_ERROR_CQE_RET in case of error CQE,
+ * MLX5_CRITICAL_ERROR_CQE_RET in case of error CQE lead to Rx queue reset,
+ * otherwise the packet size in regular RxQ,
+ * and striding byte count format in mprq case.
*/
static inline int
mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
- uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe)
+ uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe,
+ uint16_t *skip_cnt, bool mprq)
{
struct rxq_zip *zip = &rxq->zip;
uint16_t cqe_n = cqe_cnt + 1;
- int len;
+ int len = 0, ret = 0;
uint16_t idx, end;
do {
@@ -626,7 +662,6 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
* compressed.
*/
} else {
- int ret;
int8_t op_own;
uint32_t cq_ci;
@@ -634,10 +669,12 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
if (unlikely(ret == MLX5_CQE_STATUS_ERR ||
rxq->err_state)) {
- ret = mlx5_rx_err_handle(rxq, 0, 1);
- if (ret == MLX5_CQE_STATUS_HW_OWN ||
- ret == MLX5_RECOVERY_ERROR_RET)
- return MLX5_ERROR_CQE_RET;
+ ret = mlx5_rx_err_handle(rxq, 0, 1, skip_cnt);
+ if (ret == MLX5_CQE_STATUS_HW_OWN)
+ return MLX5_ERROR_CQE_MASK;
+ if (ret == MLX5_RECOVERY_ERROR_RET ||
+ ret == MLX5_RECOVERY_COMPLETED_RET)
+ return MLX5_CRITICAL_ERROR_CQE_RET;
} else {
return 0;
}
@@ -690,8 +727,15 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
}
}
if (unlikely(rxq->err_state)) {
+ if (rxq->err_state == MLX5_RXQ_ERR_STATE_IGNORE &&
+ ret == MLX5_CQE_STATUS_SW_OWN) {
+ rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR;
+ return len & MLX5_ERROR_CQE_MASK;
+ }
cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
++rxq->stats.idropped;
+ (*skip_cnt) += mprq ? (len & MLX5_MPRQ_STRIDE_NUM_MASK) >>
+ MLX5_MPRQ_STRIDE_NUM_SHIFT : 1;
} else {
return len;
}
@@ -843,6 +887,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
int len = 0; /* keep its value across iterations. */
while (pkts_n) {
+ uint16_t skip_cnt;
unsigned int idx = rq_ci & wqe_cnt;
volatile struct mlx5_wqe_data_seg *wqe =
&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx];
@@ -881,11 +926,24 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
}
if (!pkt) {
cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
- len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe);
- if (len <= 0) {
- rte_mbuf_raw_free(rep);
- if (unlikely(len == MLX5_ERROR_CQE_RET))
+ len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe, &skip_cnt, false);
+ if (unlikely(len & MLX5_ERROR_CQE_MASK)) {
+ if (len == MLX5_CRITICAL_ERROR_CQE_RET) {
+ rte_mbuf_raw_free(rep);
rq_ci = rxq->rq_ci << sges_n;
+ break;
+ }
+ rq_ci >>= sges_n;
+ rq_ci += skip_cnt;
+ rq_ci <<= sges_n;
+ idx = rq_ci & wqe_cnt;
+ wqe = &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx];
+ seg = (*rxq->elts)[idx];
+ cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
+ len = len & ~MLX5_ERROR_CQE_MASK;
+ }
+ if (len == 0) {
+ rte_mbuf_raw_free(rep);
break;
}
pkt = seg;
@@ -1095,6 +1153,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
uint16_t strd_cnt;
uint16_t strd_idx;
uint32_t byte_cnt;
+ uint16_t skip_cnt;
volatile struct mlx5_mini_cqe8 *mcqe = NULL;
enum mlx5_rqx_code rxq_code;
@@ -1107,14 +1166,26 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
}
cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
- ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe);
+ ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe, &skip_cnt, true);
+ if (unlikely(ret & MLX5_ERROR_CQE_MASK)) {
+ if (ret == MLX5_CRITICAL_ERROR_CQE_RET) {
+ rq_ci = rxq->rq_ci;
+ consumed_strd = rxq->consumed_strd;
+ break;
+ }
+ consumed_strd += skip_cnt;
+ while (consumed_strd >= strd_n) {
+ /* Replace WQE if the buffer is still in use. */
+ mprq_buf_replace(rxq, rq_ci & wq_mask);
+ /* Advance to the next WQE. */
+ consumed_strd -= strd_n;
+ ++rq_ci;
+ buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
+ }
+ cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
+ }
if (ret == 0)
break;
- if (unlikely(ret == MLX5_ERROR_CQE_RET)) {
- rq_ci = rxq->rq_ci;
- consumed_strd = rxq->consumed_strd;
- break;
- }
byte_cnt = ret;
len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT;
MLX5_ASSERT((int)len >= (rxq->crc_present << 2));
diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index 4ba53ebc48..6b42e27c89 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -62,6 +62,7 @@ enum mlx5_rxq_err_state {
MLX5_RXQ_ERR_STATE_NO_ERROR = 0,
MLX5_RXQ_ERR_STATE_NEED_RESET,
MLX5_RXQ_ERR_STATE_NEED_READY,
+ MLX5_RXQ_ERR_STATE_IGNORE,
};
enum mlx5_rqx_code {
@@ -286,8 +287,8 @@ int mlx5_hrxq_modify(struct rte_eth_dev *dev, uint32_t hxrq_idx,
uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n);
void mlx5_rxq_initialize(struct mlx5_rxq_data *rxq);
-__rte_noinline int mlx5_rx_err_handle(struct mlx5_rxq_data *rxq,
- uint8_t vec, uint16_t err_n);
+__rte_noinline int mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec,
+ uint16_t err_n, uint16_t *skip_cnt);
void mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf);
uint16_t mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts,
uint16_t pkts_n);
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c
index c6be2be763..667475a93e 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
@@ -51,6 +51,7 @@ rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
uint16_t pkts_n)
{
uint16_t n = 0;
+ uint16_t skip_cnt;
unsigned int i;
#ifdef MLX5_PMD_SOFT_COUNTERS
uint32_t err_bytes = 0;
@@ -74,7 +75,7 @@ rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
rxq->stats.ipackets -= (pkts_n - n);
rxq->stats.ibytes -= err_bytes;
#endif
- mlx5_rx_err_handle(rxq, 1, pkts_n);
+ mlx5_rx_err_handle(rxq, 1, pkts_n, &skip_cnt);
return n;
}
--
2.18.2
next reply other threads:[~2023-01-27 3:23 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-01-27 3:22 Alexander Kozyrev [this message]
2023-02-06 15:05 ` Matan Azrad
2023-02-12 13:37 ` Raslan Darawsheh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230127032243.3990099-1-akozyrev@nvidia.com \
--to=akozyrev@nvidia.com \
--cc=dev@dpdk.org \
--cc=matan@nvidia.com \
--cc=rasland@nvidia.com \
--cc=stable@dpdk.org \
--cc=viacheslavo@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).