From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id B0CC5A09FF; Thu, 24 Dec 2020 10:45:30 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id CB4C6CA59; Thu, 24 Dec 2020 10:44:44 +0100 (CET) Received: from relay.smtp-ext.broadcom.com (lpdvacalvio01.broadcom.com [192.19.229.182]) by dpdk.org (Postfix) with ESMTP id 921A1CA35 for ; Thu, 24 Dec 2020 10:44:42 +0100 (CET) Received: from dhcp-10-123-153-55.dhcp.broadcom.net (dhcp-10-123-153-55.dhcp.broadcom.net [10.123.153.55]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by relay.smtp-ext.broadcom.com (Postfix) with ESMTPS id CD1DF3D90E; Thu, 24 Dec 2020 01:44:39 -0800 (PST) DKIM-Filter: OpenDKIM Filter v2.11.0 relay.smtp-ext.broadcom.com CD1DF3D90E DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=broadcom.com; s=dkimrelay; t=1608803081; bh=6pz4tkNYA5M6bziJgRBJm5PBiHtNcAYrBdJ8WqvS/FY=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=LgjKQ4QhiBuYEw/Vrh8Mc/qCXjy85wbxv5A2gOKOzXthPmMs+xSoAD1uNFHbiiIEd rIK2lfm4uvTGgMyHY94/jjLQKP30T8JPGLTjuK2bJT3byTih607ZOLlxZbuBNYyHQz kuAtdWO2mTxCkfKmMSQUosUIY0tOjleaitzKHSd8= From: Somnath Kotur To: dev@dpdk.org Cc: ferruh.yigit@intel.com, Somnath Kotur Date: Thu, 24 Dec 2020 15:05:41 +0530 Message-Id: <20201224093541.13873-4-somnath.kotur@broadcom.com> X-Mailer: git-send-email 2.28.0.450.g3a238e5 In-Reply-To: <20201224093541.13873-1-somnath.kotur@broadcom.com> References: <20201224093541.13873-1-somnath.kotur@broadcom.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [dpdk-dev] [PATCH 3/3] net/bnxt: check for chip reset in dev stop/close ops X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" While the error recovery thread is running, an application can invoke dev_stop or dev_close_op thus triggering a race and unwanted consequences if dev_close is invoked while the recovery is not yet completed. Fix by having another lock to synchronize between the 2 threads and return EGAIN if adapter is in the middle of recovery when dev_stop or dev_close ops are invoked Signed-off-by: Somnath Kotur --- drivers/net/bnxt/bnxt.h | 5 ++++ drivers/net/bnxt/bnxt_cpr.c | 2 ++ drivers/net/bnxt/bnxt_ethdev.c | 49 +++++++++++++++++++++++++++++----- 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h index 8374e9fadc..7c135370f0 100644 --- a/drivers/net/bnxt/bnxt.h +++ b/drivers/net/bnxt/bnxt.h @@ -719,6 +719,11 @@ struct bnxt { * health_check_lock */ pthread_mutex_t health_check_lock; + /* synchronize between dev_stop/dev_close_op and + * error recovery thread triggered as part of + * HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY + */ + pthread_mutex_t err_recovery_lock; uint16_t max_req_len; uint16_t max_resp_len; uint16_t hwrm_max_ext_req_len; diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c index ee96ae81bf..6e172a9eea 100644 --- a/drivers/net/bnxt/bnxt_cpr.c +++ b/drivers/net/bnxt/bnxt_cpr.c @@ -133,6 +133,7 @@ void bnxt_handle_async_event(struct bnxt *bp, return; } + pthread_mutex_lock(&bp->err_recovery_lock); event_data = rte_le_to_cpu_32(async_cmp->event_data1); /* timestamp_lo/hi values are in units of 100ms */ bp->fw_reset_max_msecs = async_cmp->timestamp_hi ? @@ -152,6 +153,7 @@ void bnxt_handle_async_event(struct bnxt *bp, } bp->flags |= BNXT_FLAG_FW_RESET; + pthread_mutex_unlock(&bp->err_recovery_lock); rte_eal_alarm_set(US_PER_MS, bnxt_dev_reset_and_resume, (void *)bp); break; diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c index b6a9db1b66..a6794a417d 100644 --- a/drivers/net/bnxt/bnxt_ethdev.c +++ b/drivers/net/bnxt/bnxt_ethdev.c @@ -1276,8 +1276,7 @@ static void bnxt_free_switch_domain(struct bnxt *bp) } } -/* Unload the driver, release resources */ -static int bnxt_dev_stop_op(struct rte_eth_dev *eth_dev) +static int bnxt_dev_stop(struct rte_eth_dev *eth_dev) { struct bnxt *bp = eth_dev->data->dev_private; struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); @@ -1345,6 +1344,22 @@ static int bnxt_dev_stop_op(struct rte_eth_dev *eth_dev) return 0; } +/* Unload the driver, release resources */ +static int bnxt_dev_stop_op(struct rte_eth_dev *eth_dev) +{ + struct bnxt *bp = eth_dev->data->dev_private; + + pthread_mutex_lock(&bp->err_recovery_lock); + if (bp->flags & BNXT_FLAG_FW_RESET) { + PMD_DRV_LOG(ERR, + "Adapter recovering from error..Please retry\n"); + return -EAGAIN; + } + pthread_mutex_unlock(&bp->err_recovery_lock); + + return bnxt_dev_stop(eth_dev); +} + static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev) { struct bnxt *bp = eth_dev->data->dev_private; @@ -1411,7 +1426,7 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev) return 0; error: - bnxt_dev_stop_op(eth_dev); + bnxt_dev_stop(eth_dev); return rc; } @@ -1421,6 +1436,7 @@ bnxt_uninit_locks(struct bnxt *bp) pthread_mutex_destroy(&bp->flow_lock); pthread_mutex_destroy(&bp->def_cp_lock); pthread_mutex_destroy(&bp->health_check_lock); + pthread_mutex_destroy(&bp->err_recovery_lock); if (bp->rep_info) { pthread_mutex_destroy(&bp->rep_info->vfr_lock); pthread_mutex_destroy(&bp->rep_info->vfr_start_lock); @@ -1435,13 +1451,21 @@ static int bnxt_dev_close_op(struct rte_eth_dev *eth_dev) if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; + pthread_mutex_lock(&bp->err_recovery_lock); + if (bp->flags & BNXT_FLAG_FW_RESET) { + PMD_DRV_LOG(ERR, + "Adapter recovering from error...Please retry\n"); + return -EAGAIN; + } + pthread_mutex_unlock(&bp->err_recovery_lock); + /* cancel the recovery handler before remove dev */ rte_eal_alarm_cancel(bnxt_dev_reset_and_resume, (void *)bp); rte_eal_alarm_cancel(bnxt_dev_recover, (void *)bp); bnxt_cancel_fc_thread(bp); if (eth_dev->data->dev_started) - ret = bnxt_dev_stop_op(eth_dev); + ret = bnxt_dev_stop(eth_dev); bnxt_free_switch_domain(bp); @@ -3655,7 +3679,7 @@ static void bnxt_dev_cleanup(struct bnxt *bp) bp->eth_dev->data->dev_link.link_status = 0; bp->link_info->link_up = 0; if (bp->eth_dev->data->dev_started) - bnxt_dev_stop_op(bp->eth_dev); + bnxt_dev_stop(bp->eth_dev); bnxt_uninit_resources(bp, true); } @@ -3756,6 +3780,7 @@ static void bnxt_dev_recover(void *arg) int timeout = bp->fw_reset_max_msecs; int rc = 0; + pthread_mutex_lock(&bp->err_recovery_lock); /* Clear Error flag so that device re-init should happen */ bp->flags &= ~BNXT_FLAG_FATAL_ERROR; @@ -3792,12 +3817,15 @@ static void bnxt_dev_recover(void *arg) goto err_start; PMD_DRV_LOG(INFO, "Recovered from FW reset\n"); + pthread_mutex_unlock(&bp->err_recovery_lock); + return; err_start: - bnxt_dev_stop_op(bp->eth_dev); + bnxt_dev_stop(bp->eth_dev); err: bp->flags |= BNXT_FLAG_FATAL_ERROR; bnxt_uninit_resources(bp, false); + pthread_mutex_unlock(&bp->err_recovery_lock); PMD_DRV_LOG(ERR, "Failed to recover from FW reset\n"); } @@ -4733,8 +4761,15 @@ bnxt_init_locks(struct bnxt *bp) } err = pthread_mutex_init(&bp->health_check_lock, NULL); - if (err) + if (err) { PMD_DRV_LOG(ERR, "Unable to initialize health_check_lock\n"); + return err; + } + + err = pthread_mutex_init(&bp->err_recovery_lock, NULL); + if (err) + PMD_DRV_LOG(ERR, "Unable to initialize err_recovery_lock\n"); + return err; } -- 2.28.0.497.g54e85e7