From: Ajit Khaparde <ajit.khaparde@broadcom.com>
To: Somnath Kotur <somnath.kotur@broadcom.com>
Cc: dpdk-dev <dev@dpdk.org>, Ferruh Yigit <ferruh.yigit@intel.com>
Subject: Re: [dpdk-dev] [PATCH 1/2] net/bnxt: detect bad opaque in Rx completion
Date: Tue, 8 Jun 2021 10:46:18 -0700 [thread overview]
Message-ID: <CACZ4nhurH2TtLKLnq2Uhb1H_Ftyt_jW-GF5UPqHcO+3=bDC=Pg@mail.gmail.com> (raw)
In-Reply-To: <20210527061846.11803-1-somnath.kotur@broadcom.com>
[-- Attachment #1: Type: text/plain, Size: 10039 bytes --]
On Wed, May 26, 2021 at 11:21 PM Somnath Kotur
<somnath.kotur@broadcom.com> wrote:
>
> There is a rare hardware bug that can cause a bad opaque value in the RX
> or TPA start completion. When this happens, the hardware may have used the
> same buffer twice for 2 rx packets. In addition, the driver might also
> crash later using the bad opaque as an index into the ring.
>
> The rx opaque value is predictable and is always monotonically increasing.
> The workaround is to keep track of the expected next opaque value and
> compare it with the one returned by hardware during RX and TPA start
> completions. If they miscompare, log it, discard the completion,
> schedule a ring reset and move on to the next one.
>
> Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
> Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Updated the commit log with fixes tag and
applied to dpdk-next-net-brcm for-next-net branch.
> ---
> drivers/net/bnxt/bnxt_hwrm.c | 19 +++++++
> drivers/net/bnxt/bnxt_hwrm.h | 1 +
> drivers/net/bnxt/bnxt_rxq.h | 1 +
> drivers/net/bnxt/bnxt_rxr.c | 101 ++++++++++++++++++++++++++++++++++-
> drivers/net/bnxt/bnxt_rxr.h | 1 +
> 5 files changed, 121 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
> index 931ecea77c..3d04b93d88 100644
> --- a/drivers/net/bnxt/bnxt_hwrm.c
> +++ b/drivers/net/bnxt/bnxt_hwrm.c
> @@ -2711,6 +2711,25 @@ void bnxt_free_hwrm_rx_ring(struct bnxt *bp, int queue_index)
> bp->grp_info[queue_index].cp_fw_ring_id = INVALID_HW_RING_ID;
> }
>
> +int bnxt_hwrm_rx_ring_reset(struct bnxt *bp, int queue_index)
> +{
> + int rc;
> + struct hwrm_ring_reset_input req = {.req_type = 0 };
> + struct hwrm_ring_reset_output *resp = bp->hwrm_cmd_resp_addr;
> +
> + HWRM_PREP(&req, HWRM_RING_RESET, BNXT_USE_CHIMP_MB);
> +
> + req.ring_type = HWRM_RING_RESET_INPUT_RING_TYPE_RX_RING_GRP;
> + req.ring_id = rte_cpu_to_le_16(bp->grp_info[queue_index].fw_grp_id);
> + rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
> +
> + HWRM_CHECK_RESULT();
> +
> + HWRM_UNLOCK();
> +
> + return rc;
> +}
> +
> static int
> bnxt_free_all_hwrm_rings(struct bnxt *bp)
> {
> diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
> index 90aff0c2de..a7271b7876 100644
> --- a/drivers/net/bnxt/bnxt_hwrm.h
> +++ b/drivers/net/bnxt/bnxt_hwrm.h
> @@ -301,4 +301,5 @@ int bnxt_hwrm_cfa_adv_flow_mgmt_qcaps(struct bnxt *bp);
> int bnxt_hwrm_fw_echo_reply(struct bnxt *bp, uint32_t echo_req_data1,
> uint32_t echo_req_data2);
> int bnxt_hwrm_poll_ver_get(struct bnxt *bp);
> +int bnxt_hwrm_rx_ring_reset(struct bnxt *bp, int queue_index);
> #endif
> diff --git a/drivers/net/bnxt/bnxt_rxq.h b/drivers/net/bnxt/bnxt_rxq.h
> index fd8c69fc80..42bd8e7ab7 100644
> --- a/drivers/net/bnxt/bnxt_rxq.h
> +++ b/drivers/net/bnxt/bnxt_rxq.h
> @@ -30,6 +30,7 @@ struct bnxt_rx_queue {
> uint8_t rx_deferred_start; /* not in global dev start */
> uint8_t rx_started; /* RX queue is started */
> uint8_t drop_en; /* Drop when rx desc not available. */
> + uint8_t in_reset; /* Rx ring is scheduled for reset */
>
> struct bnxt *bp;
> int index;
> diff --git a/drivers/net/bnxt/bnxt_rxr.c b/drivers/net/bnxt/bnxt_rxr.c
> index 2ef4115ef9..63c8661669 100644
> --- a/drivers/net/bnxt/bnxt_rxr.c
> +++ b/drivers/net/bnxt/bnxt_rxr.c
> @@ -10,6 +10,7 @@
> #include <rte_byteorder.h>
> #include <rte_malloc.h>
> #include <rte_memory.h>
> +#include <rte_alarm.h>
>
> #include "bnxt.h"
> #include "bnxt_reps.h"
> @@ -17,9 +18,7 @@
> #include "bnxt_rxr.h"
> #include "bnxt_rxq.h"
> #include "hsi_struct_def_dpdk.h"
> -#ifdef RTE_LIBRTE_IEEE1588
> #include "bnxt_hwrm.h"
> -#endif
>
> #include <bnxt_tf_common.h>
> #include <ulp_mark_mgr.h>
> @@ -134,6 +133,51 @@ struct rte_mbuf *bnxt_consume_rx_buf(struct bnxt_rx_ring_info *rxr,
> return mbuf;
> }
>
> +static void bnxt_rx_ring_reset(void *arg)
> +{
> + struct bnxt *bp = arg;
> + int i, rc = 0;
> + struct bnxt_rx_queue *rxq;
> +
> +
> + for (i = 0; i < (int)bp->rx_nr_rings; i++) {
> + struct bnxt_rx_ring_info *rxr;
> +
> + rxq = bp->rx_queues[i];
> + if (!rxq || !rxq->in_reset)
> + continue;
> +
> + rxr = rxq->rx_ring;
> + /* Disable and flush TPA before resetting the RX ring */
> + if (rxr->tpa_info)
> + bnxt_hwrm_vnic_tpa_cfg(bp, rxq->vnic, false);
> + rc = bnxt_hwrm_rx_ring_reset(bp, i);
> + if (rc) {
> + PMD_DRV_LOG(ERR, "Rx ring%d reset failed\n", i);
> + continue;
> + }
> +
> + bnxt_rx_queue_release_mbufs(rxq);
> + rxr->rx_raw_prod = 0;
> + rxr->ag_raw_prod = 0;
> + rxr->rx_next_cons = 0;
> + bnxt_init_one_rx_ring(rxq);
> + bnxt_db_write(&rxr->rx_db, rxr->rx_raw_prod);
> + bnxt_db_write(&rxr->ag_db, rxr->ag_raw_prod);
> + if (rxr->tpa_info)
> + bnxt_hwrm_vnic_tpa_cfg(bp, rxq->vnic, true);
> +
> + rxq->in_reset = 0;
> + }
> +}
> +
> +
> +static void bnxt_sched_ring_reset(struct bnxt_rx_queue *rxq)
> +{
> + rxq->in_reset = 1;
> + rte_eal_alarm_set(1, bnxt_rx_ring_reset, (void *)rxq->bp);
> +}
> +
> static void bnxt_tpa_get_metadata(struct bnxt *bp,
> struct bnxt_tpa_info *tpa_info,
> struct rx_tpa_start_cmpl *tpa_start,
> @@ -195,6 +239,12 @@ static void bnxt_tpa_start(struct bnxt_rx_queue *rxq,
>
> data_cons = tpa_start->opaque;
> tpa_info = &rxr->tpa_info[agg_id];
> + if (unlikely(data_cons != rxr->rx_next_cons)) {
> + PMD_DRV_LOG(ERR, "TPA cons %x, expected cons %x\n",
> + data_cons, rxr->rx_next_cons);
> + bnxt_sched_ring_reset(rxq);
> + return;
> + }
>
> mbuf = bnxt_consume_rx_buf(rxr, data_cons);
>
> @@ -233,6 +283,9 @@ static void bnxt_tpa_start(struct bnxt_rx_queue *rxq,
> /* recycle next mbuf */
> data_cons = RING_NEXT(data_cons);
> bnxt_reuse_rx_mbuf(rxr, bnxt_consume_rx_buf(rxr, data_cons));
> +
> + rxr->rx_next_cons = RING_IDX(rxr->rx_ring_struct,
> + RING_NEXT(data_cons));
> }
>
> static int bnxt_agg_bufs_valid(struct bnxt_cp_ring_info *cpr,
> @@ -330,6 +383,34 @@ static int bnxt_rx_pages(struct bnxt_rx_queue *rxq,
> return 0;
> }
>
> +static int bnxt_discard_rx(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
> + uint32_t *raw_cons, void *cmp)
> +{
> + struct rx_pkt_cmpl *rxcmp = cmp;
> + uint32_t tmp_raw_cons = *raw_cons;
> + uint8_t cmp_type, agg_bufs = 0;
> +
> + cmp_type = CMP_TYPE(rxcmp);
> +
> + if (cmp_type == CMPL_BASE_TYPE_RX_L2) {
> + agg_bufs = BNXT_RX_L2_AGG_BUFS(rxcmp);
> + } else if (cmp_type == RX_TPA_END_CMPL_TYPE_RX_TPA_END) {
> + struct rx_tpa_end_cmpl *tpa_end = cmp;
> +
> + if (BNXT_CHIP_P5(bp))
> + return 0;
> +
> + agg_bufs = BNXT_TPA_END_AGG_BUFS(tpa_end);
> + }
> +
> + if (agg_bufs) {
> + if (!bnxt_agg_bufs_valid(cpr, agg_bufs, tmp_raw_cons))
> + return -EBUSY;
> + }
> + *raw_cons = tmp_raw_cons;
> + return 0;
> +}
> +
> static inline struct rte_mbuf *bnxt_tpa_end(
> struct bnxt_rx_queue *rxq,
> uint32_t *raw_cp_cons,
> @@ -344,6 +425,13 @@ static inline struct rte_mbuf *bnxt_tpa_end(
> uint8_t payload_offset;
> struct bnxt_tpa_info *tpa_info;
>
> + if (unlikely(rxq->in_reset)) {
> + PMD_DRV_LOG(ERR, "rxq->in_reset: raw_cp_cons:%d\n",
> + *raw_cp_cons);
> + bnxt_discard_rx(rxq->bp, cpr, raw_cp_cons, tpa_end);
> + return NULL;
> + }
> +
> if (BNXT_CHIP_P5(rxq->bp)) {
> struct rx_tpa_v2_end_cmpl *th_tpa_end;
> struct rx_tpa_v2_end_cmpl_hi *th_tpa_end1;
> @@ -839,6 +927,14 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
> raw_prod = rxr->rx_raw_prod;
>
> cons = rxcmp->opaque;
> + if (unlikely(cons != rxr->rx_next_cons)) {
> + bnxt_discard_rx(bp, cpr, &tmp_raw_cons, rxcmp);
> + PMD_DRV_LOG(ERR, "RX cons %x != expected cons %x\n",
> + cons, rxr->rx_next_cons);
> + bnxt_sched_ring_reset(rxq);
> + rc = -EBUSY;
> + goto next_rx;
> + }
> mbuf = bnxt_consume_rx_buf(rxr, cons);
> if (mbuf == NULL)
> return -EBUSY;
> @@ -911,6 +1007,7 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
> goto rx;
> }
> rxr->rx_raw_prod = raw_prod;
> + rxr->rx_next_cons = RING_IDX(rxr->rx_ring_struct, RING_NEXT(cons));
>
> if (BNXT_TRUFLOW_EN(bp) && (BNXT_VF_IS_TRUSTED(bp) || BNXT_PF(bp)) &&
> vfr_flag) {
> diff --git a/drivers/net/bnxt/bnxt_rxr.h b/drivers/net/bnxt/bnxt_rxr.h
> index b43256e03e..7b7756313e 100644
> --- a/drivers/net/bnxt/bnxt_rxr.h
> +++ b/drivers/net/bnxt/bnxt_rxr.h
> @@ -66,6 +66,7 @@ struct bnxt_rx_ring_info {
> uint16_t rx_raw_prod;
> uint16_t ag_raw_prod;
> uint16_t rx_cons; /* Needed for representor */
> + uint16_t rx_next_cons;
> struct bnxt_db_info rx_db;
> struct bnxt_db_info ag_db;
>
> --
> 2.28.0.497.g54e85e7
>
prev parent reply other threads:[~2021-06-08 17:46 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-05-27 6:18 Somnath Kotur
2021-06-08 17:46 ` Ajit Khaparde [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CACZ4nhurH2TtLKLnq2Uhb1H_Ftyt_jW-GF5UPqHcO+3=bDC=Pg@mail.gmail.com' \
--to=ajit.khaparde@broadcom.com \
--cc=dev@dpdk.org \
--cc=ferruh.yigit@intel.com \
--cc=somnath.kotur@broadcom.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).