From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id A2EF0A2EDB for ; Wed, 2 Oct 2019 03:25:42 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id F3B681BF10; Wed, 2 Oct 2019 03:24:16 +0200 (CEST) Received: from rnd-relay.smtp.broadcom.com (unknown [192.19.229.170]) by dpdk.org (Postfix) with ESMTP id 33EA01BE81 for ; Wed, 2 Oct 2019 03:23:48 +0200 (CEST) Received: from mail-irv-17.broadcom.com (mail-irv-17.lvn.broadcom.net [10.75.242.48]) by rnd-relay.smtp.broadcom.com (Postfix) with ESMTP id DA9E730CC53; Tue, 1 Oct 2019 18:22:31 -0700 (PDT) DKIM-Filter: OpenDKIM Filter v2.10.3 rnd-relay.smtp.broadcom.com DA9E730CC53 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=broadcom.com; s=dkimrelay; t=1569979351; bh=4J0nXGVkbXK75fVdFsTLE7sWLy6uV/fIKKfPaEF99GU=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=gQ9ykmfWq8L0MxgfGe71v4Qmcp8pvuTCcZg5S3KpOY4G5APHS0Zw9RrH9wIt3gCJK 5YjIJjrX8hI97v75ceo3+qj6FMiBgULejixkC26cfPCtfkH3eLQ16gE/lRbgGtA1Va ba/L+DehfNcpbnrAUOIS4DsFFTVQ+27H7bKHib4M= Received: from localhost.localdomain (unknown [10.230.30.225]) by mail-irv-17.broadcom.com (Postfix) with ESMTP id 6BBEB140069; Tue, 1 Oct 2019 18:23:41 -0700 (PDT) From: Ajit Khaparde To: dev@dpdk.org Cc: ferruh.yigit@intel.com, Kalesh AP , Somnath Kotur Date: Tue, 1 Oct 2019 18:23:29 -0700 Message-Id: <20191002012335.85324-10-ajit.khaparde@broadcom.com> X-Mailer: git-send-email 2.20.1 (Apple Git-117) In-Reply-To: <20191002012335.85324-1-ajit.khaparde@broadcom.com> References: <7c08999f-13f3-5fb6-39a2-557a0884bfde@intel.com> <20191002012335.85324-1-ajit.khaparde@broadcom.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Subject: [dpdk-dev] [PATCH v3 09/15] net/bnxt: add code for periodic FW health monitoring X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: Kalesh AP Periodically poll the FW heartbeat register and FW recovery counter registers to check the FW health. Polling frequency will be advertised by the FW in HWRM_ERROR_RECOVERY_QCFG response. Schedule the task upon receiving the async event from FW. Signed-off-by: Kalesh AP Reviewed-by: Ajit Khaparde Reviewed-by: Somnath Kotur --- drivers/net/bnxt/bnxt.h | 6 +++ drivers/net/bnxt/bnxt_cpr.c | 10 ++++ drivers/net/bnxt/bnxt_ethdev.c | 97 ++++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+) diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h index ea556d0792..50b9b38565 100644 --- a/drivers/net/bnxt/bnxt.h +++ b/drivers/net/bnxt/bnxt.h @@ -368,6 +368,9 @@ struct bnxt_error_recovery_info { #define BNXT_FLAG_MASTER_FUNC (1 << 2) #define BNXT_FLAG_RECOVERY_ENABLED (1 << 3) uint32_t flags; + + uint32_t last_heart_beat; + uint32_t last_reset_counter; }; /* address space location of register */ @@ -416,6 +419,7 @@ struct bnxt { #define BNXT_FLAG_FW_CAP_IF_CHANGE (1 << 17) #define BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE (1 << 18) #define BNXT_FLAG_FW_CAP_ERROR_RECOVERY (1 << 19) +#define BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED (1 << 20) #define BNXT_FLAG_EXT_STATS_SUPPORTED (1 << 29) #define BNXT_FLAG_NEW_RM (1 << 30) #define BNXT_FLAG_INIT_DONE (1U << 31) @@ -532,6 +536,8 @@ int bnxt_rcv_msg_from_vf(struct bnxt *bp, uint16_t vf_id, void *msg); int is_bnxt_in_error(struct bnxt *bp); int bnxt_map_fw_health_status_regs(struct bnxt *bp); +uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index); +void bnxt_schedule_fw_health_check(struct bnxt *bp); bool is_bnxt_supported(struct rte_eth_dev *dev); bool bnxt_stratus_device(struct bnxt *bp); diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c index 1b046bbfac..38931799eb 100644 --- a/drivers/net/bnxt/bnxt_cpr.c +++ b/drivers/net/bnxt/bnxt_cpr.c @@ -90,6 +90,16 @@ void bnxt_handle_async_event(struct bnxt *bp, PMD_DRV_LOG(INFO, "recovery enabled(%d), master function(%d)\n", bnxt_is_recovery_enabled(bp), bnxt_is_master_func(bp)); + + if (bp->flags & BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED) + return; + + info->last_heart_beat = + bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG); + info->last_reset_counter = + bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG); + + bnxt_schedule_fw_health_check(bp); break; default: PMD_DRV_LOG(INFO, "handle_async_event id = 0x%x\n", event_id); diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c index a23ca2b53d..d28f1bd8f9 100644 --- a/drivers/net/bnxt/bnxt_ethdev.c +++ b/drivers/net/bnxt/bnxt_ethdev.c @@ -169,6 +169,7 @@ static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu); static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev); static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev); static int bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev); +static void bnxt_cancel_fw_health_check(struct bnxt *bp); int is_bnxt_in_error(struct bnxt *bp) { @@ -858,6 +859,7 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev) bp->flags |= BNXT_FLAG_INIT_DONE; eth_dev->data->dev_started = 1; bp->dev_stopped = 0; + bnxt_schedule_fw_health_check(bp); return 0; error: @@ -910,6 +912,8 @@ static void bnxt_dev_stop_op(struct rte_eth_dev *eth_dev) /* disable uio/vfio intr/eventfd mapping */ rte_intr_disable(intr_handle); + bnxt_cancel_fw_health_check(bp); + bp->flags &= ~BNXT_FLAG_INIT_DONE; if (bp->eth_dev->data->dev_started) { /* TBD: STOP HW queues DMA */ @@ -3682,6 +3686,99 @@ void bnxt_dev_reset_and_resume(void *arg) PMD_DRV_LOG(ERR, "Error setting recovery alarm"); } +uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index) +{ + struct bnxt_error_recovery_info *info = bp->recovery_info; + uint32_t reg = info->status_regs[index]; + uint32_t type, offset, val = 0; + + type = BNXT_FW_STATUS_REG_TYPE(reg); + offset = BNXT_FW_STATUS_REG_OFF(reg); + + switch (type) { + case BNXT_FW_STATUS_REG_TYPE_CFG: + rte_pci_read_config(bp->pdev, &val, sizeof(val), offset); + break; + case BNXT_FW_STATUS_REG_TYPE_GRC: + offset = info->mapped_status_regs[index]; + /* FALLTHROUGH */ + case BNXT_FW_STATUS_REG_TYPE_BAR0: + val = rte_le_to_cpu_32(rte_read32((uint8_t *)bp->bar0 + + offset)); + break; + } + + return val; +} + +/* Driver should poll FW heartbeat, reset_counter with the frequency + * advertised by FW in HWRM_ERROR_RECOVERY_QCFG. + * When the driver detects heartbeat stop or change in reset_counter, + * it has to trigger a reset to recover from the error condition. + * A “master PF” is the function who will have the privilege to + * initiate the chimp reset. The master PF will be elected by the + * firmware and will be notified through async message. + */ +static void bnxt_check_fw_health(void *arg) +{ + struct bnxt *bp = arg; + struct bnxt_error_recovery_info *info = bp->recovery_info; + uint32_t val = 0; + + if (!info || !bnxt_is_recovery_enabled(bp) || + is_bnxt_in_error(bp)) + return; + + val = bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG); + if (val == info->last_heart_beat) + goto reset; + + info->last_heart_beat = val; + + val = bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG); + if (val != info->last_reset_counter) + goto reset; + + info->last_reset_counter = val; + + rte_eal_alarm_set(US_PER_MS * info->driver_polling_freq, + bnxt_check_fw_health, (void *)bp); + + return; +reset: + /* Stop DMA to/from device */ + bp->flags |= BNXT_FLAG_FATAL_ERROR; + bp->flags |= BNXT_FLAG_FW_RESET; + + PMD_DRV_LOG(ERR, "Detected FW dead condition\n"); +} + +void bnxt_schedule_fw_health_check(struct bnxt *bp) +{ + uint32_t polling_freq; + + if (!bnxt_is_recovery_enabled(bp)) + return; + + if (bp->flags & BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED) + return; + + polling_freq = bp->recovery_info->driver_polling_freq; + + rte_eal_alarm_set(US_PER_MS * polling_freq, + bnxt_check_fw_health, (void *)bp); + bp->flags |= BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED; +} + +static void bnxt_cancel_fw_health_check(struct bnxt *bp) +{ + if (!bnxt_is_recovery_enabled(bp)) + return; + + rte_eal_alarm_cancel(bnxt_check_fw_health, (void *)bp); + bp->flags &= ~BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED; +} + static bool bnxt_vf_pciid(uint16_t id) { if (id == BROADCOM_DEV_ID_57304_VF || -- 2.20.1 (Apple Git-117)