From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id C0566A0613 for ; Fri, 30 Aug 2019 18:37:18 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 6383F1E9C8; Fri, 30 Aug 2019 18:36:08 +0200 (CEST) Received: from rnd-relay.smtp.broadcom.com (rnd-relay.smtp.broadcom.com [192.19.229.170]) by dpdk.org (Postfix) with ESMTP id 6D0AA1E999 for ; Fri, 30 Aug 2019 18:35:48 +0200 (CEST) Received: from nis-sj1-27.broadcom.com (nis-sj1-27.lvn.broadcom.net [10.75.144.136]) by rnd-relay.smtp.broadcom.com (Postfix) with ESMTP id 828EA30C06D; Fri, 30 Aug 2019 09:35:41 -0700 (PDT) DKIM-Filter: OpenDKIM Filter v2.10.3 rnd-relay.smtp.broadcom.com 828EA30C06D DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=broadcom.com; s=dkimrelay; t=1567182941; bh=gK23BJ+WJntaXysMpwAZaVDXjl5eMgufuPucpY7Nr4U=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=cjxAsWF8LX6Wvf8dncElafxVRHKExclnzhwtlP1H5QqRpXAF20pNB8GVHHbmrO+ZS NTzSjwTJmkYEPQvnI+DP1Mu1SME8qCy93QeogpUzgMDU1LTfX4/asUx9ovQTlqmEul n3jt6Krc/MfsBW6DRPJPKYLQ0Q0wTKXijSHw/B3M= Received: from localhost.localdomain (unknown [10.230.30.225]) by nis-sj1-27.broadcom.com (Postfix) with ESMTP id 159ECAC06AB; Fri, 30 Aug 2019 09:35:43 -0700 (PDT) From: Ajit Khaparde To: dev@dpdk.org Cc: ferruh.yigit@intel.com, Kalesh AP , Somnath Kotur Date: Fri, 30 Aug 2019 09:35:33 -0700 Message-Id: <20190830163537.32704-10-ajit.khaparde@broadcom.com> X-Mailer: git-send-email 2.20.1 (Apple Git-117) In-Reply-To: <20190830163537.32704-1-ajit.khaparde@broadcom.com> References: <2a851a62-f5a1-7061-edb4-412db0d335ce@intel.com> <20190830163537.32704-1-ajit.khaparde@broadcom.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Subject: [dpdk-dev] [PATCH v2 09/13] net/bnxt: add code for periodic FW health monitoring X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: Kalesh AP Periodically poll the FW heartbeat register and FW recovery counter registers to check the FW health. Polling frequency will be advertised by the FW in HWRM_ERROR_RECOVERY_QCFG response. Schedule the task upon receiving the async event from FW. Signed-off-by: Kalesh AP Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur --- drivers/net/bnxt/bnxt.h | 6 +++ drivers/net/bnxt/bnxt_cpr.c | 10 ++++ drivers/net/bnxt/bnxt_ethdev.c | 89 ++++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+) diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h index f9147a9a8..5579e127c 100644 --- a/drivers/net/bnxt/bnxt.h +++ b/drivers/net/bnxt/bnxt.h @@ -368,6 +368,9 @@ struct bnxt_error_recovery_info { #define BNXT_FLAG_MASTER_FUNC (1 << 2) #define BNXT_FLAG_RECOVERY_ENABLED (1 << 3) uint32_t flags; + + uint32_t last_heart_beat; + uint32_t last_reset_counter; }; /* address space location of register */ @@ -415,6 +418,7 @@ struct bnxt { #define BNXT_FLAG_FATAL_ERROR (1 << 16) #define BNXT_FLAG_FW_CAP_IF_CHANGE (1 << 17) #define BNXT_FLAG_FW_CAP_ERROR_RECOVERY (1 << 18) +#define BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED (1 << 19) #define BNXT_FLAG_EXT_STATS_SUPPORTED (1 << 29) #define BNXT_FLAG_NEW_RM (1 << 30) #define BNXT_FLAG_INIT_DONE (1U << 31) @@ -531,6 +535,8 @@ int bnxt_rcv_msg_from_vf(struct bnxt *bp, uint16_t vf_id, void *msg); int is_bnxt_in_error(struct bnxt *bp); int bnxt_map_fw_health_status_regs(struct bnxt *bp); +uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index); +void bnxt_schedule_fw_health_check(struct bnxt *bp); bool is_bnxt_supported(struct rte_eth_dev *dev); bool bnxt_stratus_device(struct bnxt *bp); diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c index a70301adc..3cedb891e 100644 --- a/drivers/net/bnxt/bnxt_cpr.c +++ b/drivers/net/bnxt/bnxt_cpr.c @@ -90,6 +90,16 @@ void bnxt_handle_async_event(struct bnxt *bp, PMD_DRV_LOG(INFO, "recovery enabled(%d), master function(%d)\n", bnxt_is_recovery_enabled(bp), bnxt_is_master_func(bp)); + + if (bp->flags & BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED) + return; + + info->last_heart_beat = + bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG); + info->last_reset_counter = + bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG); + + bnxt_schedule_fw_health_check(bp); break; default: PMD_DRV_LOG(INFO, "handle_async_event id = 0x%x\n", event_id); diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c index a0de259da..62a4a65fb 100644 --- a/drivers/net/bnxt/bnxt_ethdev.c +++ b/drivers/net/bnxt/bnxt_ethdev.c @@ -3607,6 +3607,94 @@ void bnxt_dev_reset_and_resume(void *arg) PMD_DRV_LOG(ERR, "Error setting recovery alarm"); } +uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index) +{ + struct bnxt_error_recovery_info *info = bp->recovery_info; + uint32_t reg = info->status_regs[index]; + uint32_t type, offset, val = 0; + + type = BNXT_FW_STATUS_REG_TYPE(reg); + offset = BNXT_FW_STATUS_REG_OFF(reg); + + switch (type) { + case BNXT_FW_STATUS_REG_TYPE_CFG: + rte_pci_read_config(bp->pdev, &val, sizeof(val), offset); + break; + case BNXT_FW_STATUS_REG_TYPE_GRC: + offset = info->mapped_status_regs[index]; + /* FALLTHROUGH */ + case BNXT_FW_STATUS_REG_TYPE_BAR0: + val = rte_le_to_cpu_32(rte_read32((uint8_t *)bp->bar0 + + offset)); + break; + } + + return val; +} + +/* Driver should poll FW heartbeat, reset_counter with the frequency + * advertised by FW in HWRM_ERROR_RECOVERY_QCFG. + * When the driver detects heartbeat stop or change in reset_counter, + * it has to trigger a reset to recover from the error condition. + * A “master PF” is the function who will have the privilege to + * initiate the chimp reset. The master PF will be elected by the + * firmware and will be notified through async message. + */ +static void bnxt_check_fw_health(void *arg) +{ + struct bnxt *bp = arg; + struct bnxt_error_recovery_info *info = bp->recovery_info; + uint32_t val = 0; + + if (!info || !bnxt_is_recovery_enabled(bp) || + is_bnxt_in_error(bp)) + return; + + val = bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG); + if (val == info->last_heart_beat) + goto reset; + + info->last_heart_beat = val; + + val = bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG); + if (val != info->last_reset_counter) + goto reset; + + info->last_reset_counter = val; + + rte_eal_alarm_set(US_PER_MS * info->driver_polling_freq, + bnxt_check_fw_health, (void *)bp); + + return; +reset: + /* Stop DMA to/from device */ + bp->flags |= BNXT_FLAG_FATAL_ERROR; + bp->flags |= BNXT_FLAG_FW_RESET; + + PMD_DRV_LOG(ERR, "Detected FW dead condition\n"); +} + +void bnxt_schedule_fw_health_check(struct bnxt *bp) +{ + uint32_t polling_freq = bp->recovery_info->driver_polling_freq; + + if (!bnxt_is_recovery_enabled(bp)) + return; + + rte_eal_alarm_set(US_PER_MS * polling_freq, + bnxt_check_fw_health, (void *)bp); + bp->flags |= BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED; +} + +static void bnxt_cancel_fw_health_check(struct bnxt *bp) +{ + if (!bnxt_is_recovery_enabled(bp)) + return; + + rte_eal_alarm_cancel(bnxt_check_fw_health, (void *)bp); + bp->flags &= ~BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED; +} + static bool bnxt_vf_pciid(uint16_t id) { if (id == BROADCOM_DEV_ID_57304_VF || @@ -4269,6 +4357,7 @@ bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev) bnxt_free_int(bp); bnxt_free_mem(bp, reconfig_dev); bnxt_hwrm_func_buf_unrgtr(bp); + bnxt_cancel_fw_health_check(bp); bnxt_unmap_fw_health_status_regs(bp); rc = bnxt_hwrm_func_driver_unregister(bp, 0); bp->flags &= ~BNXT_FLAG_REGISTERED; -- 2.20.1 (Apple Git-117)