From: Ajit Khaparde <ajit.khaparde@broadcom.com>
To: dev@dpdk.org
Cc: ferruh.yigit@intel.com,
Kalesh AP <kalesh-anakkur.purayil@broadcom.com>,
Somnath Kotur <somnath.kotur@broadcom.com>
Subject: [dpdk-dev] [PATCH v2 09/13] net/bnxt: add code for periodic FW health monitoring
Date: Fri, 30 Aug 2019 09:35:33 -0700 [thread overview]
Message-ID: <20190830163537.32704-10-ajit.khaparde@broadcom.com> (raw)
In-Reply-To: <20190830163537.32704-1-ajit.khaparde@broadcom.com>
From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Periodically poll the FW heartbeat register and FW recovery counter
registers to check the FW health. Polling frequency will be
advertised by the FW in HWRM_ERROR_RECOVERY_QCFG response.
Schedule the task upon receiving the async event from FW.
Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
---
drivers/net/bnxt/bnxt.h | 6 +++
drivers/net/bnxt/bnxt_cpr.c | 10 ++++
drivers/net/bnxt/bnxt_ethdev.c | 89 ++++++++++++++++++++++++++++++++++
3 files changed, 105 insertions(+)
diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index f9147a9a8..5579e127c 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -368,6 +368,9 @@ struct bnxt_error_recovery_info {
#define BNXT_FLAG_MASTER_FUNC (1 << 2)
#define BNXT_FLAG_RECOVERY_ENABLED (1 << 3)
uint32_t flags;
+
+ uint32_t last_heart_beat;
+ uint32_t last_reset_counter;
};
/* address space location of register */
@@ -415,6 +418,7 @@ struct bnxt {
#define BNXT_FLAG_FATAL_ERROR (1 << 16)
#define BNXT_FLAG_FW_CAP_IF_CHANGE (1 << 17)
#define BNXT_FLAG_FW_CAP_ERROR_RECOVERY (1 << 18)
+#define BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED (1 << 19)
#define BNXT_FLAG_EXT_STATS_SUPPORTED (1 << 29)
#define BNXT_FLAG_NEW_RM (1 << 30)
#define BNXT_FLAG_INIT_DONE (1U << 31)
@@ -531,6 +535,8 @@ int bnxt_rcv_msg_from_vf(struct bnxt *bp, uint16_t vf_id, void *msg);
int is_bnxt_in_error(struct bnxt *bp);
int bnxt_map_fw_health_status_regs(struct bnxt *bp);
+uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index);
+void bnxt_schedule_fw_health_check(struct bnxt *bp);
bool is_bnxt_supported(struct rte_eth_dev *dev);
bool bnxt_stratus_device(struct bnxt *bp);
diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index a70301adc..3cedb891e 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -90,6 +90,16 @@ void bnxt_handle_async_event(struct bnxt *bp,
PMD_DRV_LOG(INFO, "recovery enabled(%d), master function(%d)\n",
bnxt_is_recovery_enabled(bp),
bnxt_is_master_func(bp));
+
+ if (bp->flags & BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED)
+ return;
+
+ info->last_heart_beat =
+ bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG);
+ info->last_reset_counter =
+ bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG);
+
+ bnxt_schedule_fw_health_check(bp);
break;
default:
PMD_DRV_LOG(INFO, "handle_async_event id = 0x%x\n", event_id);
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index a0de259da..62a4a65fb 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -3607,6 +3607,94 @@ void bnxt_dev_reset_and_resume(void *arg)
PMD_DRV_LOG(ERR, "Error setting recovery alarm");
}
+uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index)
+{
+ struct bnxt_error_recovery_info *info = bp->recovery_info;
+ uint32_t reg = info->status_regs[index];
+ uint32_t type, offset, val = 0;
+
+ type = BNXT_FW_STATUS_REG_TYPE(reg);
+ offset = BNXT_FW_STATUS_REG_OFF(reg);
+
+ switch (type) {
+ case BNXT_FW_STATUS_REG_TYPE_CFG:
+ rte_pci_read_config(bp->pdev, &val, sizeof(val), offset);
+ break;
+ case BNXT_FW_STATUS_REG_TYPE_GRC:
+ offset = info->mapped_status_regs[index];
+ /* FALLTHROUGH */
+ case BNXT_FW_STATUS_REG_TYPE_BAR0:
+ val = rte_le_to_cpu_32(rte_read32((uint8_t *)bp->bar0 +
+ offset));
+ break;
+ }
+
+ return val;
+}
+
+/* Driver should poll FW heartbeat, reset_counter with the frequency
+ * advertised by FW in HWRM_ERROR_RECOVERY_QCFG.
+ * When the driver detects heartbeat stop or change in reset_counter,
+ * it has to trigger a reset to recover from the error condition.
+ * A “master PF” is the function who will have the privilege to
+ * initiate the chimp reset. The master PF will be elected by the
+ * firmware and will be notified through async message.
+ */
+static void bnxt_check_fw_health(void *arg)
+{
+ struct bnxt *bp = arg;
+ struct bnxt_error_recovery_info *info = bp->recovery_info;
+ uint32_t val = 0;
+
+ if (!info || !bnxt_is_recovery_enabled(bp) ||
+ is_bnxt_in_error(bp))
+ return;
+
+ val = bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG);
+ if (val == info->last_heart_beat)
+ goto reset;
+
+ info->last_heart_beat = val;
+
+ val = bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG);
+ if (val != info->last_reset_counter)
+ goto reset;
+
+ info->last_reset_counter = val;
+
+ rte_eal_alarm_set(US_PER_MS * info->driver_polling_freq,
+ bnxt_check_fw_health, (void *)bp);
+
+ return;
+reset:
+ /* Stop DMA to/from device */
+ bp->flags |= BNXT_FLAG_FATAL_ERROR;
+ bp->flags |= BNXT_FLAG_FW_RESET;
+
+ PMD_DRV_LOG(ERR, "Detected FW dead condition\n");
+}
+
+void bnxt_schedule_fw_health_check(struct bnxt *bp)
+{
+ uint32_t polling_freq = bp->recovery_info->driver_polling_freq;
+
+ if (!bnxt_is_recovery_enabled(bp))
+ return;
+
+ rte_eal_alarm_set(US_PER_MS * polling_freq,
+ bnxt_check_fw_health, (void *)bp);
+ bp->flags |= BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED;
+}
+
+static void bnxt_cancel_fw_health_check(struct bnxt *bp)
+{
+ if (!bnxt_is_recovery_enabled(bp))
+ return;
+
+ rte_eal_alarm_cancel(bnxt_check_fw_health, (void *)bp);
+ bp->flags &= ~BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED;
+}
+
static bool bnxt_vf_pciid(uint16_t id)
{
if (id == BROADCOM_DEV_ID_57304_VF ||
@@ -4269,6 +4357,7 @@ bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev)
bnxt_free_int(bp);
bnxt_free_mem(bp, reconfig_dev);
bnxt_hwrm_func_buf_unrgtr(bp);
+ bnxt_cancel_fw_health_check(bp);
bnxt_unmap_fw_health_status_regs(bp);
rc = bnxt_hwrm_func_driver_unregister(bp, 0);
bp->flags &= ~BNXT_FLAG_REGISTERED;
--
2.20.1 (Apple Git-117)
next prev parent reply other threads:[~2019-08-30 16:37 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-08-22 5:53 [dpdk-dev] [PATCH 00/13] bnxt patchset to support device error recovery Ajit Khaparde
2019-08-22 5:53 ` [dpdk-dev] [PATCH 01/13] net/bnxt: hsi version update Ajit Khaparde
2019-08-27 13:51 ` Ferruh Yigit
2019-08-30 16:35 ` [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery Ajit Khaparde
2019-08-30 16:35 ` [dpdk-dev] [PATCH v2 01/13] net/bnxt: add FW reset HWRM command Ajit Khaparde
2019-08-30 16:35 ` [dpdk-dev] [PATCH v2 02/13] net/bnxt: prevent device access when device is in reset Ajit Khaparde
2019-08-30 16:35 ` [dpdk-dev] [PATCH v2 03/13] net/bnxt: handle reset notify async event from FW Ajit Khaparde
2019-08-30 16:35 ` [dpdk-dev] [PATCH v2 04/13] net/bnxt: inform firmware about IF state changes Ajit Khaparde
2019-08-30 16:35 ` [dpdk-dev] [PATCH v2 05/13] net/bnxt: handle fatal event from FW under error conditions Ajit Khaparde
2019-08-30 16:35 ` [dpdk-dev] [PATCH v2 06/13] net/bnxt: query firmware error recovery capabilities Ajit Khaparde
2019-08-30 16:35 ` [dpdk-dev] [PATCH v2 07/13] net/bnxt: map status registers for FW health monitoring Ajit Khaparde
2019-08-30 16:35 ` [dpdk-dev] [PATCH v2 08/13] net/bnxt: advertise error recovery capability and handle async event Ajit Khaparde
2019-08-30 16:35 ` Ajit Khaparde [this message]
2019-08-30 16:35 ` [dpdk-dev] [PATCH v2 10/13] net/bnxt: add support for FW reset Ajit Khaparde
2019-08-30 16:35 ` [dpdk-dev] [PATCH v2 11/13] net/bnxt: reduce verbosity of logs Ajit Khaparde
2019-08-30 16:35 ` [dpdk-dev] [PATCH v2 12/13] net/bnxt: use BIT macro instead of bit fields Ajit Khaparde
2019-08-30 16:35 ` [dpdk-dev] [PATCH v2 13/13] net/bnxt: avoid null pointer dereference Ajit Khaparde
2019-09-30 13:29 ` [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery Ferruh Yigit
2019-10-02 1:23 ` [dpdk-dev] [PATCH v3 00/15] " Ajit Khaparde
2019-10-02 1:23 ` [dpdk-dev] [PATCH v3 01/15] net/bnxt: add FW reset HWRM command Ajit Khaparde
2019-10-02 1:23 ` [dpdk-dev] [PATCH v3 02/15] net/bnxt: prevent device access when device is in reset Ajit Khaparde
2019-10-02 1:23 ` [dpdk-dev] [PATCH v3 03/15] net/bnxt: handle reset notify async event from FW Ajit Khaparde
2019-10-02 1:23 ` [dpdk-dev] [PATCH v3 04/15] net/bnxt: inform firmware about IF state changes Ajit Khaparde
2019-10-02 1:23 ` [dpdk-dev] [PATCH v3 05/15] net/bnxt: handle fatal event from FW under error conditions Ajit Khaparde
2019-10-02 1:23 ` [dpdk-dev] [PATCH v3 06/15] net/bnxt: query firmware error recovery capabilities Ajit Khaparde
2019-10-02 1:23 ` [dpdk-dev] [PATCH v3 07/15] net/bnxt: map status registers for FW health monitoring Ajit Khaparde
2019-10-02 1:23 ` [dpdk-dev] [PATCH v3 08/15] net/bnxt: advertise error recovery capability and handle async event Ajit Khaparde
2019-10-02 1:23 ` [dpdk-dev] [PATCH v3 09/15] net/bnxt: add code for periodic FW health monitoring Ajit Khaparde
2019-10-02 1:23 ` [dpdk-dev] [PATCH v3 10/15] net/bnxt: add support for FW reset Ajit Khaparde
2019-10-02 1:23 ` [dpdk-dev] [PATCH v3 11/15] net/bnxt: add hot firmware upgrade support for Stingray Ajit Khaparde
2019-10-02 1:23 ` [dpdk-dev] [PATCH v3 12/15] net/bnxt: reduce verbosity of logs Ajit Khaparde
2019-10-02 1:23 ` [dpdk-dev] [PATCH v3 13/15] net/bnxt: avoid null pointer dereference Ajit Khaparde
2019-10-02 1:23 ` [dpdk-dev] [PATCH v3 14/15] net/bnxt: use BIT macro instead of bit fields Ajit Khaparde
2019-10-02 1:23 ` [dpdk-dev] [PATCH v3 15/15] net/bnxt: add PTP support for Thor Ajit Khaparde
2019-10-02 17:02 ` [dpdk-dev] [PATCH v3 00/15] bnxt patchset to support device error recovery Ferruh Yigit
2019-08-22 5:53 ` [dpdk-dev] [PATCH 02/13] net/bnxt: prevent device access when device is in reset Ajit Khaparde
2019-08-27 15:00 ` Ferruh Yigit
2019-08-22 5:53 ` [dpdk-dev] [PATCH 03/13] net/bnxt: handle reset notify async event from FW Ajit Khaparde
2019-08-22 5:53 ` [dpdk-dev] [PATCH 04/13] net/bnxt: inform firmware about IF state changes Ajit Khaparde
2019-08-22 5:53 ` [dpdk-dev] [PATCH 05/13] net/bnxt: handle fatal event from FW under error conditions Ajit Khaparde
2019-08-22 5:53 ` [dpdk-dev] [PATCH 06/13] net/bnxt: query firmware error recovery capabilities Ajit Khaparde
2019-08-22 5:53 ` [dpdk-dev] [PATCH 07/13] net/bnxt: map status registers for FW health monitoring Ajit Khaparde
2019-08-22 5:53 ` [dpdk-dev] [PATCH 08/13] net/bnxt: advertise error recovery capability and handle async event Ajit Khaparde
2019-08-22 5:53 ` [dpdk-dev] [PATCH 09/13] net/bnxt: add code for periodic FW health monitoring Ajit Khaparde
2019-08-22 5:53 ` [dpdk-dev] [PATCH 10/13] net/bnxt: use BIT macro instead of bit fields Ajit Khaparde
2019-08-22 5:53 ` [dpdk-dev] [PATCH 11/13] net/bnxt: reschedule the health check alarm correctly Ajit Khaparde
2019-08-22 5:53 ` [dpdk-dev] [PATCH 12/13] net/bnxt: add support for FW reset Ajit Khaparde
2019-08-22 5:54 ` [dpdk-dev] [PATCH 13/13] net/bnxt: reduce verbosity of logs Ajit Khaparde
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190830163537.32704-10-ajit.khaparde@broadcom.com \
--to=ajit.khaparde@broadcom.com \
--cc=dev@dpdk.org \
--cc=ferruh.yigit@intel.com \
--cc=kalesh-anakkur.purayil@broadcom.com \
--cc=somnath.kotur@broadcom.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).