From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id DCB05A046B for ; Thu, 22 Aug 2019 07:55:19 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 2A87A1BF58; Thu, 22 Aug 2019 07:54:27 +0200 (CEST) Received: from rnd-relay.smtp.broadcom.com (rnd-relay.smtp.broadcom.com [192.19.229.170]) by dpdk.org (Postfix) with ESMTP id 95C501BF2E for ; Thu, 22 Aug 2019 07:54:09 +0200 (CEST) Received: from nis-sj1-27.broadcom.com (nis-sj1-27.lvn.broadcom.net [10.75.144.136]) by rnd-relay.smtp.broadcom.com (Postfix) with ESMTP id 1DDE230C20B; Wed, 21 Aug 2019 22:54:05 -0700 (PDT) DKIM-Filter: OpenDKIM Filter v2.10.3 rnd-relay.smtp.broadcom.com 1DDE230C20B DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=broadcom.com; s=dkimrelay; t=1566453245; bh=NN6sG8zvVTQ56f/GQqxjo9Iyccp0cdfrm16O9jDD7C8=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=ZLllmjCsztyawNzKCJ4xlqvYY4CiGwEdyXV1bxM1dUoyXf2+NvRG/QClU9Me1DXZt Y+UUcbZXJhhM21c2/+AOqwKuyWJQadZERQbtijjhcaqQQLp3zSaXL1VrgnRH64Yjf6 UwXQdVrC3kNPEq7hFhS00M8KmGibHdj04uvn7l5w= Received: from localhost.localdomain (unknown [10.230.30.225]) by nis-sj1-27.broadcom.com (Postfix) with ESMTP id 066CEAC06AD; Wed, 21 Aug 2019 22:54:05 -0700 (PDT) From: Ajit Khaparde To: dev@dpdk.org Cc: ferruh.yigit@intel.com, Kalesh AP , Somnath Kotur Date: Wed, 21 Aug 2019 22:53:56 -0700 Message-Id: <20190822055400.30119-10-ajit.khaparde@broadcom.com> X-Mailer: git-send-email 2.20.1 (Apple Git-117) In-Reply-To: <20190822055400.30119-1-ajit.khaparde@broadcom.com> References: <20190822055400.30119-1-ajit.khaparde@broadcom.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Subject: [dpdk-dev] [PATCH 09/13] net/bnxt: add code for periodic FW health monitoring X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: Kalesh AP Periodically poll the FW heartbeat register and FW recovery counter registers to check the FW health. Polling frequency will be advertised by the FW in HWRM_ERROR_RECOVERY_QCFG response. Schedule the task upon receiving the async event from FW. Signed-off-by: Kalesh AP Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur --- drivers/net/bnxt/bnxt.h | 5 ++ drivers/net/bnxt/bnxt_cpr.c | 7 +++ drivers/net/bnxt/bnxt_ethdev.c | 89 ++++++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+) diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h index f9147a9a8..a23c4a64c 100644 --- a/drivers/net/bnxt/bnxt.h +++ b/drivers/net/bnxt/bnxt.h @@ -368,6 +368,9 @@ struct bnxt_error_recovery_info { #define BNXT_FLAG_MASTER_FUNC (1 << 2) #define BNXT_FLAG_RECOVERY_ENABLED (1 << 3) uint32_t flags; + + uint32_t last_heart_beat; + uint32_t last_reset_counter; }; /* address space location of register */ @@ -531,6 +534,8 @@ int bnxt_rcv_msg_from_vf(struct bnxt *bp, uint16_t vf_id, void *msg); int is_bnxt_in_error(struct bnxt *bp); int bnxt_map_fw_health_status_regs(struct bnxt *bp); +uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index); +void bnxt_schedule_fw_health_check(struct bnxt *bp); bool is_bnxt_supported(struct rte_eth_dev *dev); bool bnxt_stratus_device(struct bnxt *bp); diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c index 7f5b3314e..a692fbe7c 100644 --- a/drivers/net/bnxt/bnxt_cpr.c +++ b/drivers/net/bnxt/bnxt_cpr.c @@ -88,6 +88,13 @@ void bnxt_handle_async_event(struct bnxt *bp, PMD_DRV_LOG(INFO, "recovery enabled(%d), master function(%d)\n", bnxt_is_recovery_enabled(bp), bnxt_is_master_func(bp)); + + info->last_heart_beat = + bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG); + info->last_reset_counter = + bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG); + + bnxt_schedule_fw_health_check(bp); break; default: PMD_DRV_LOG(INFO, "handle_async_event id = 0x%x\n", event_id); diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c index 52c460d2c..0317eb888 100644 --- a/drivers/net/bnxt/bnxt_ethdev.c +++ b/drivers/net/bnxt/bnxt_ethdev.c @@ -169,6 +169,7 @@ static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu); static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev); static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev); static int bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev); +static void bnxt_cancel_fw_health_check(struct bnxt *bp); int is_bnxt_in_error(struct bnxt *bp) { @@ -880,6 +881,8 @@ static void bnxt_dev_stop_op(struct rte_eth_dev *eth_dev) /* disable uio/vfio intr/eventfd mapping */ rte_intr_disable(intr_handle); + bnxt_cancel_fw_health_check(bp); + bp->flags &= ~BNXT_FLAG_INIT_DONE; if (bp->eth_dev->data->dev_started) { /* TBD: STOP HW queues DMA */ @@ -3608,6 +3611,92 @@ int bnxt_dev_reset_and_resume(struct bnxt *bp) return rc; } +uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index) +{ + struct bnxt_error_recovery_info *info = bp->recovery_info; + uint32_t reg = info->status_regs[index]; + uint32_t type, offset, val = 0; + + type = BNXT_FW_STATUS_REG_TYPE(reg); + offset = BNXT_FW_STATUS_REG_OFF(reg); + + switch (type) { + case BNXT_FW_STATUS_REG_TYPE_CFG: + rte_pci_read_config(bp->pdev, &val, sizeof(val), offset); + break; + case BNXT_FW_STATUS_REG_TYPE_GRC: + offset = info->mapped_status_regs[index]; + /* FALLTHROUGH */ + case BNXT_FW_STATUS_REG_TYPE_BAR0: + val = rte_le_to_cpu_32(rte_read32((uint8_t *)bp->bar0 + + offset)); + break; + } + + return val; +} + +/* Driver should poll FW heartbeat, reset_counter with the frequency + * advertised by FW in HWRM_ERROR_RECOVERY_QCFG. + * When the driver detects heartbeat stop or change in reset_counter, + * it has to trigger a reset to recover from the error condition. + * A “master PF” is the function who will have the privilege to + * initiate the chimp reset. The master PF will be elected by the + * firmware and will be notified through async message. + */ +static void bnxt_check_fw_health(void *arg) +{ + struct bnxt *bp = arg; + struct bnxt_error_recovery_info *info = bp->recovery_info; + uint32_t val = 0; + + if (!info || !bnxt_is_recovery_enabled(bp) || + is_bnxt_in_error(bp)) + return; + + val = bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG); + if (val == info->last_heart_beat) + goto reset; + + info->last_heart_beat = val; + + val = bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG); + if (val != info->last_reset_counter) + goto reset; + + info->last_reset_counter = val; + + rte_eal_alarm_set(US_PER_MS * info->driver_polling_freq, + bnxt_check_fw_health, (void *)bp); + + return; +reset: + /* Stop DMA to/from device */ + bp->flags |= BNXT_FLAG_FATAL_ERROR; + bp->flags |= BNXT_FLAG_FW_RESET; + + PMD_DRV_LOG(ERR, "Detected FW dead condition\n"); +} + +void bnxt_schedule_fw_health_check(struct bnxt *bp) +{ + uint32_t polling_freq = bp->recovery_info->driver_polling_freq; + + if (!bnxt_is_recovery_enabled(bp)) + return; + + rte_eal_alarm_set(US_PER_MS * polling_freq, + bnxt_check_fw_health, (void *)bp); +} + +static void bnxt_cancel_fw_health_check(struct bnxt *bp) +{ + if (!bnxt_is_recovery_enabled(bp)) + return; + + rte_eal_alarm_cancel(bnxt_check_fw_health, (void *)bp); +} + static bool bnxt_vf_pciid(uint16_t id) { if (id == BROADCOM_DEV_ID_57304_VF || -- 2.20.1 (Apple Git-117)