patches for DPDK stable branches
 help / color / mirror / Atom feed
From: Kalesh A P <kalesh-anakkur.purayil@broadcom.com>
To: stable@dpdk.org
Subject: [dpdk-stable] [PATCH 19.11 4/6] net/bnxt: detect bad opaque in Rx completion
Date: Mon, 16 Aug 2021 20:20:57 +0530	[thread overview]
Message-ID: <20210816145059.31065-5-kalesh-anakkur.purayil@broadcom.com> (raw)
In-Reply-To: <20210816145059.31065-1-kalesh-anakkur.purayil@broadcom.com>

From: Somnath Kotur <somnath.kotur@broadcom.com>

[ upstream commit 03c8f2fe111c2b4c4fddc960dc82253ac7e6c5c5 ]

There is a rare hardware bug that can cause a bad opaque value in the RX
or TPA start completion. When this happens, the hardware may have used the
same buffer twice for 2 Rx packets.  In addition, the driver might also
crash later using the bad opaque as an index into the ring.

The Rx opaque value is predictable and is always monotonically increasing.
The workaround is to keep track of the expected next opaque value and
compare it with the one returned by hardware during RX and TPA start
completions. If they miscompare, log it, discard the completion,
schedule a ring reset and move on to the next one.

Fixes: 0958d8b6435d ("net/bnxt: support LRO")

Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt_hwrm.c           |  19 ++++++
 drivers/net/bnxt/bnxt_hwrm.h           |   1 +
 drivers/net/bnxt/bnxt_rxq.h            |   1 +
 drivers/net/bnxt/bnxt_rxr.c            | 102 ++++++++++++++++++++++++++++++++-
 drivers/net/bnxt/bnxt_rxr.h            |   1 +
 drivers/net/bnxt/hsi_struct_def_dpdk.h |  11 +++-
 6 files changed, 131 insertions(+), 4 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index e354df1..724fa25 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -2530,6 +2530,25 @@ int bnxt_clear_hwrm_vnic_filters(struct bnxt *bp, struct bnxt_vnic_info *vnic)
 	return rc;
 }
 
+int bnxt_hwrm_rx_ring_reset(struct bnxt *bp, int queue_index)
+{
+	int rc;
+	struct hwrm_ring_reset_input req = {.req_type = 0 };
+	struct hwrm_ring_reset_output *resp = bp->hwrm_cmd_resp_addr;
+
+	HWRM_PREP(req, RING_RESET, BNXT_USE_CHIMP_MB);
+
+	req.ring_type = HWRM_RING_RESET_INPUT_RING_TYPE_RX_RING_GRP;
+	req.ring_id = rte_cpu_to_le_16(bp->grp_info[queue_index].fw_grp_id);
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
+
+	HWRM_CHECK_RESULT();
+
+	HWRM_UNLOCK();
+
+	return rc;
+}
+
 static int
 bnxt_clear_hwrm_vnic_flows(struct bnxt *bp, struct bnxt_vnic_info *vnic)
 {
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index b8fb37d..8ee9a7b 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -229,4 +229,5 @@ int bnxt_clear_one_vnic_filter(struct bnxt *bp,
 			       struct bnxt_filter_info *filter);
 int bnxt_hwrm_poll_ver_get(struct bnxt *bp);
 int bnxt_hwrm_port_phy_qcaps(struct bnxt *bp);
+int bnxt_hwrm_rx_ring_reset(struct bnxt *bp, int queue_index);
 #endif
diff --git a/drivers/net/bnxt/bnxt_rxq.h b/drivers/net/bnxt/bnxt_rxq.h
index ae3badb..6942af9 100644
--- a/drivers/net/bnxt/bnxt_rxq.h
+++ b/drivers/net/bnxt/bnxt_rxq.h
@@ -32,6 +32,7 @@ struct bnxt_rx_queue {
 	uint8_t			rx_deferred_start; /* not in global dev start */
 	uint8_t			rx_started; /* RX queue is started */
 	uint8_t			drop_en; /* Drop when rx desc not available. */
+	uint8_t			in_reset; /* Rx ring is scheduled for reset */
 
 	struct bnxt		*bp;
 	int			index;
diff --git a/drivers/net/bnxt/bnxt_rxr.c b/drivers/net/bnxt/bnxt_rxr.c
index 8abd391..faa13bc 100644
--- a/drivers/net/bnxt/bnxt_rxr.c
+++ b/drivers/net/bnxt/bnxt_rxr.c
@@ -10,15 +10,14 @@
 #include <rte_byteorder.h>
 #include <rte_malloc.h>
 #include <rte_memory.h>
+#include <rte_alarm.h>
 
 #include "bnxt.h"
 #include "bnxt_ring.h"
 #include "bnxt_rxr.h"
 #include "bnxt_rxq.h"
 #include "hsi_struct_def_dpdk.h"
-#ifdef RTE_LIBRTE_IEEE1588
 #include "bnxt_hwrm.h"
-#endif
 
 /*
  * RX Ring handling
@@ -122,6 +121,50 @@ struct rte_mbuf *bnxt_consume_rx_buf(struct bnxt_rx_ring_info *rxr,
 	return mbuf;
 }
 
+static void bnxt_rx_ring_reset(void *arg)
+{
+	struct bnxt *bp = arg;
+	int i, rc = 0;
+	struct bnxt_rx_queue *rxq;
+
+
+	for (i = 0; i < (int)bp->rx_nr_rings; i++) {
+		struct bnxt_rx_ring_info *rxr;
+
+		rxq = bp->rx_queues[i];
+		if (!rxq || !rxq->in_reset)
+			continue;
+
+		rxr = rxq->rx_ring;
+		/* Disable and flush TPA before resetting the RX ring */
+		if (rxr->tpa_info)
+			bnxt_hwrm_vnic_tpa_cfg(bp, rxq->vnic, false);
+		rc = bnxt_hwrm_rx_ring_reset(bp, i);
+		if (rc) {
+			PMD_DRV_LOG(ERR, "Rx ring%d reset failed\n", i);
+			continue;
+		}
+
+		bnxt_rx_queue_release_mbufs(rxq);
+		rxr->rx_prod = 0;
+		rxr->ag_prod = 0;
+		rxr->rx_next_cons = 0;
+		bnxt_init_one_rx_ring(rxq);
+		bnxt_db_write(&rxr->rx_db, rxr->rx_prod);
+		bnxt_db_write(&rxr->ag_db, rxr->ag_prod);
+		if (rxr->tpa_info)
+			bnxt_hwrm_vnic_tpa_cfg(bp, rxq->vnic, true);
+
+		rxq->in_reset = 0;
+	}
+}
+
+static void bnxt_sched_ring_reset(struct bnxt_rx_queue *rxq)
+{
+	rxq->in_reset = 1;
+	rte_eal_alarm_set(1, bnxt_rx_ring_reset, (void *)rxq->bp);
+}
+
 static void bnxt_tpa_start(struct bnxt_rx_queue *rxq,
 			   struct rx_tpa_start_cmpl *tpa_start,
 			   struct rx_tpa_start_cmpl_hi *tpa_start1)
@@ -136,6 +179,12 @@ static void bnxt_tpa_start(struct bnxt_rx_queue *rxq,
 
 	data_cons = tpa_start->opaque;
 	tpa_info = &rxr->tpa_info[agg_id];
+	if (unlikely(data_cons != rxr->rx_next_cons)) {
+		PMD_DRV_LOG(ERR, "TPA cons %x, expected cons %x\n",
+			    data_cons, rxr->rx_next_cons);
+		bnxt_sched_ring_reset(rxq);
+		return;
+	}
 
 	mbuf = bnxt_consume_rx_buf(rxr, data_cons);
 
@@ -172,6 +221,8 @@ static void bnxt_tpa_start(struct bnxt_rx_queue *rxq,
 	/* recycle next mbuf */
 	data_cons = RING_NEXT(rxr->rx_ring_struct, data_cons);
 	bnxt_reuse_rx_mbuf(rxr, bnxt_consume_rx_buf(rxr, data_cons));
+
+	rxr->rx_next_cons = RING_NEXT(rxr->rx_ring_struct, data_cons);
 }
 
 static int bnxt_agg_bufs_valid(struct bnxt_cp_ring_info *cpr,
@@ -267,6 +318,34 @@ static int bnxt_rx_pages(struct bnxt_rx_queue *rxq,
 	return 0;
 }
 
+static int bnxt_discard_rx(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
+			   uint32_t *raw_cons, void *cmp)
+{
+	struct rx_pkt_cmpl *rxcmp = cmp;
+	uint32_t tmp_raw_cons = *raw_cons;
+	uint8_t cmp_type, agg_bufs = 0;
+
+	cmp_type = CMP_TYPE(rxcmp);
+
+	if (cmp_type == CMPL_BASE_TYPE_RX_L2) {
+		agg_bufs = BNXT_RX_L2_AGG_BUFS(rxcmp);
+	} else if (cmp_type == RX_TPA_END_CMPL_TYPE_RX_TPA_END) {
+		struct rx_tpa_end_cmpl *tpa_end = cmp;
+
+		if (BNXT_CHIP_THOR(bp))
+			return 0;
+
+		agg_bufs = BNXT_TPA_END_AGG_BUFS(tpa_end);
+	}
+
+	if (agg_bufs) {
+		if (!bnxt_agg_bufs_valid(cpr, agg_bufs, tmp_raw_cons))
+			return -EBUSY;
+	}
+	*raw_cons = tmp_raw_cons;
+	return 0;
+}
+
 static inline struct rte_mbuf *bnxt_tpa_end(
 		struct bnxt_rx_queue *rxq,
 		uint32_t *raw_cp_cons,
@@ -281,6 +360,13 @@ static inline struct rte_mbuf *bnxt_tpa_end(
 	uint8_t payload_offset;
 	struct bnxt_tpa_info *tpa_info;
 
+	if (unlikely(rxq->in_reset)) {
+		PMD_DRV_LOG(ERR, "rxq->in_reset: raw_cp_cons:%d\n",
+			    *raw_cp_cons);
+		bnxt_discard_rx(rxq->bp, cpr, raw_cp_cons, tpa_end);
+		return NULL;
+	}
+
 	if (BNXT_CHIP_THOR(rxq->bp)) {
 		struct rx_tpa_v2_end_cmpl *th_tpa_end;
 		struct rx_tpa_v2_end_cmpl_hi *th_tpa_end1;
@@ -475,6 +561,14 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
 	prod = rxr->rx_prod;
 
 	cons = rxcmp->opaque;
+	if (unlikely(cons != rxr->rx_next_cons)) {
+		bnxt_discard_rx(rxq->bp, cpr, &tmp_raw_cons, rxcmp);
+		PMD_DRV_LOG(ERR, "RX cons %x != expected cons %x\n",
+			    cons, rxr->rx_next_cons);
+		bnxt_sched_ring_reset(rxq);
+		rc = -EBUSY;
+		goto next_rx;
+	}
 	mbuf = bnxt_consume_rx_buf(rxr, cons);
 	if (mbuf == NULL)
 		return -EBUSY;
@@ -588,6 +682,7 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
 		goto rx;
 	}
 	rxr->rx_prod = prod;
+	rxr->rx_next_cons = RING_NEXT(rxr->rx_ring_struct, cons);
 	/*
 	 * All MBUFs are allocated with the same size under DPDK,
 	 * no optimization for rx_copy_thresh
@@ -907,5 +1002,8 @@ int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq)
 	}
 	PMD_DRV_LOG(DEBUG, "TPA alloc Done!\n");
 
+	/* Explicitly reset this driver internal tracker on a ring init */
+	rxr->rx_next_cons = 0;
+
 	return 0;
 }
diff --git a/drivers/net/bnxt/bnxt_rxr.h b/drivers/net/bnxt/bnxt_rxr.h
index 387ab0b..f53bdde 100644
--- a/drivers/net/bnxt/bnxt_rxr.h
+++ b/drivers/net/bnxt/bnxt_rxr.h
@@ -190,6 +190,7 @@ struct bnxt_sw_rx_bd {
 struct bnxt_rx_ring_info {
 	uint16_t		rx_prod;
 	uint16_t		ag_prod;
+	uint16_t                rx_next_cons;
 	struct bnxt_db_info     rx_db;
 	struct bnxt_db_info     ag_db;
 
diff --git a/drivers/net/bnxt/hsi_struct_def_dpdk.h b/drivers/net/bnxt/hsi_struct_def_dpdk.h
index c2bae0f..ced1e04 100644
--- a/drivers/net/bnxt/hsi_struct_def_dpdk.h
+++ b/drivers/net/bnxt/hsi_struct_def_dpdk.h
@@ -24388,8 +24388,15 @@ struct hwrm_ring_reset_input {
 	#define HWRM_RING_RESET_INPUT_RING_TYPE_RX        UINT32_C(0x2)
 	/* RoCE Notification Completion Ring (ROCE_CR) */
 	#define HWRM_RING_RESET_INPUT_RING_TYPE_ROCE_CMPL UINT32_C(0x3)
-	#define HWRM_RING_RESET_INPUT_RING_TYPE_LAST \
-		HWRM_RING_RESET_INPUT_RING_TYPE_ROCE_CMPL
+	/*
+	 * Rx Ring Group.  This is to reset rx and aggregation in an atomic
+	 * operation. Completion ring associated with this ring group is
+	 * not reset.
+	 */
+        #define HWRM_RING_RESET_INPUT_RING_TYPE_RX_RING_GRP UINT32_C(0x6)
+        #define HWRM_RING_RESET_INPUT_RING_TYPE_LAST \
+                HWRM_RING_RESET_INPUT_RING_TYPE_RX_RING_GRP
+
 	uint8_t	unused_0;
 	/* Physical number of the ring. */
 	uint16_t	ring_id;
-- 
2.10.1


  parent reply	other threads:[~2021-08-16 14:30 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-16 14:50 [dpdk-stable] [PATCH 19.11 0/6] backport bnxt fixes to 19.11 Kalesh A P
2021-08-16 14:50 ` [dpdk-stable] [PATCH 19.11 1/6] net/bnxt: fix auto-negotiation on Whitney+ Kalesh A P
2021-08-16 14:50 ` [dpdk-stable] [PATCH 19.11 2/6] net/bnxt: remove unnecessary comment Kalesh A P
2021-08-16 14:50 ` [dpdk-stable] [PATCH 19.11 3/6] net/bnxt: invoke device removal event on recovery failure Kalesh A P
2021-08-16 14:50 ` Kalesh A P [this message]
2021-08-16 14:50 ` [dpdk-stable] [PATCH 19.11 5/6] net/bnxt: workaround spurious zero stats in Thor Kalesh A P
2021-08-16 14:50 ` [dpdk-stable] [PATCH 19.11 6/6] net/bnxt: clear cached statistics Kalesh A P
2021-08-17  9:40 ` [dpdk-stable] [PATCH 19.11 0/6] backport bnxt fixes to 19.11 Christian Ehrhardt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210816145059.31065-5-kalesh-anakkur.purayil@broadcom.com \
    --to=kalesh-anakkur.purayil@broadcom.com \
    --cc=stable@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).