From: Somnath Kotur <somnath.kotur@broadcom.com>
To: dev@dpdk.org
Cc: ferruh.yigit@intel.com, Somnath Kotur <somnath.kotur@broadcom.com>
Subject: [dpdk-dev] [PATCH 3/3] net/bnxt: check for chip reset in dev stop/close ops
Date: Thu, 24 Dec 2020 15:05:41 +0530 [thread overview]
Message-ID: <20201224093541.13873-4-somnath.kotur@broadcom.com> (raw)
In-Reply-To: <20201224093541.13873-1-somnath.kotur@broadcom.com>
While the error recovery thread is running, an application
can invoke dev_stop or dev_close_op thus triggering a race
and unwanted consequences if dev_close is invoked while the
recovery is not yet completed.
Fix by having another lock to synchronize between the 2 threads and
return EGAIN if adapter is in the middle of recovery when dev_stop
or dev_close ops are invoked
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
---
drivers/net/bnxt/bnxt.h | 5 ++++
drivers/net/bnxt/bnxt_cpr.c | 2 ++
drivers/net/bnxt/bnxt_ethdev.c | 49 +++++++++++++++++++++++++++++-----
3 files changed, 49 insertions(+), 7 deletions(-)
diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 8374e9fadc..7c135370f0 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -719,6 +719,11 @@ struct bnxt {
* health_check_lock
*/
pthread_mutex_t health_check_lock;
+ /* synchronize between dev_stop/dev_close_op and
+ * error recovery thread triggered as part of
+ * HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY
+ */
+ pthread_mutex_t err_recovery_lock;
uint16_t max_req_len;
uint16_t max_resp_len;
uint16_t hwrm_max_ext_req_len;
diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index ee96ae81bf..6e172a9eea 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -133,6 +133,7 @@ void bnxt_handle_async_event(struct bnxt *bp,
return;
}
+ pthread_mutex_lock(&bp->err_recovery_lock);
event_data = rte_le_to_cpu_32(async_cmp->event_data1);
/* timestamp_lo/hi values are in units of 100ms */
bp->fw_reset_max_msecs = async_cmp->timestamp_hi ?
@@ -152,6 +153,7 @@ void bnxt_handle_async_event(struct bnxt *bp,
}
bp->flags |= BNXT_FLAG_FW_RESET;
+ pthread_mutex_unlock(&bp->err_recovery_lock);
rte_eal_alarm_set(US_PER_MS, bnxt_dev_reset_and_resume,
(void *)bp);
break;
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index b6a9db1b66..a6794a417d 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -1276,8 +1276,7 @@ static void bnxt_free_switch_domain(struct bnxt *bp)
}
}
-/* Unload the driver, release resources */
-static int bnxt_dev_stop_op(struct rte_eth_dev *eth_dev)
+static int bnxt_dev_stop(struct rte_eth_dev *eth_dev)
{
struct bnxt *bp = eth_dev->data->dev_private;
struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
@@ -1345,6 +1344,22 @@ static int bnxt_dev_stop_op(struct rte_eth_dev *eth_dev)
return 0;
}
+/* Unload the driver, release resources */
+static int bnxt_dev_stop_op(struct rte_eth_dev *eth_dev)
+{
+ struct bnxt *bp = eth_dev->data->dev_private;
+
+ pthread_mutex_lock(&bp->err_recovery_lock);
+ if (bp->flags & BNXT_FLAG_FW_RESET) {
+ PMD_DRV_LOG(ERR,
+ "Adapter recovering from error..Please retry\n");
+ return -EAGAIN;
+ }
+ pthread_mutex_unlock(&bp->err_recovery_lock);
+
+ return bnxt_dev_stop(eth_dev);
+}
+
static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
{
struct bnxt *bp = eth_dev->data->dev_private;
@@ -1411,7 +1426,7 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
return 0;
error:
- bnxt_dev_stop_op(eth_dev);
+ bnxt_dev_stop(eth_dev);
return rc;
}
@@ -1421,6 +1436,7 @@ bnxt_uninit_locks(struct bnxt *bp)
pthread_mutex_destroy(&bp->flow_lock);
pthread_mutex_destroy(&bp->def_cp_lock);
pthread_mutex_destroy(&bp->health_check_lock);
+ pthread_mutex_destroy(&bp->err_recovery_lock);
if (bp->rep_info) {
pthread_mutex_destroy(&bp->rep_info->vfr_lock);
pthread_mutex_destroy(&bp->rep_info->vfr_start_lock);
@@ -1435,13 +1451,21 @@ static int bnxt_dev_close_op(struct rte_eth_dev *eth_dev)
if (rte_eal_process_type() != RTE_PROC_PRIMARY)
return 0;
+ pthread_mutex_lock(&bp->err_recovery_lock);
+ if (bp->flags & BNXT_FLAG_FW_RESET) {
+ PMD_DRV_LOG(ERR,
+ "Adapter recovering from error...Please retry\n");
+ return -EAGAIN;
+ }
+ pthread_mutex_unlock(&bp->err_recovery_lock);
+
/* cancel the recovery handler before remove dev */
rte_eal_alarm_cancel(bnxt_dev_reset_and_resume, (void *)bp);
rte_eal_alarm_cancel(bnxt_dev_recover, (void *)bp);
bnxt_cancel_fc_thread(bp);
if (eth_dev->data->dev_started)
- ret = bnxt_dev_stop_op(eth_dev);
+ ret = bnxt_dev_stop(eth_dev);
bnxt_free_switch_domain(bp);
@@ -3655,7 +3679,7 @@ static void bnxt_dev_cleanup(struct bnxt *bp)
bp->eth_dev->data->dev_link.link_status = 0;
bp->link_info->link_up = 0;
if (bp->eth_dev->data->dev_started)
- bnxt_dev_stop_op(bp->eth_dev);
+ bnxt_dev_stop(bp->eth_dev);
bnxt_uninit_resources(bp, true);
}
@@ -3756,6 +3780,7 @@ static void bnxt_dev_recover(void *arg)
int timeout = bp->fw_reset_max_msecs;
int rc = 0;
+ pthread_mutex_lock(&bp->err_recovery_lock);
/* Clear Error flag so that device re-init should happen */
bp->flags &= ~BNXT_FLAG_FATAL_ERROR;
@@ -3792,12 +3817,15 @@ static void bnxt_dev_recover(void *arg)
goto err_start;
PMD_DRV_LOG(INFO, "Recovered from FW reset\n");
+ pthread_mutex_unlock(&bp->err_recovery_lock);
+
return;
err_start:
- bnxt_dev_stop_op(bp->eth_dev);
+ bnxt_dev_stop(bp->eth_dev);
err:
bp->flags |= BNXT_FLAG_FATAL_ERROR;
bnxt_uninit_resources(bp, false);
+ pthread_mutex_unlock(&bp->err_recovery_lock);
PMD_DRV_LOG(ERR, "Failed to recover from FW reset\n");
}
@@ -4733,8 +4761,15 @@ bnxt_init_locks(struct bnxt *bp)
}
err = pthread_mutex_init(&bp->health_check_lock, NULL);
- if (err)
+ if (err) {
PMD_DRV_LOG(ERR, "Unable to initialize health_check_lock\n");
+ return err;
+ }
+
+ err = pthread_mutex_init(&bp->err_recovery_lock, NULL);
+ if (err)
+ PMD_DRV_LOG(ERR, "Unable to initialize err_recovery_lock\n");
+
return err;
}
--
2.28.0.497.g54e85e7
next prev parent reply other threads:[~2020-12-24 9:45 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-12-24 9:35 [dpdk-dev] [PATCH v2 0/3] bnxt patches Somnath Kotur
2020-12-24 9:35 ` [dpdk-dev] [PATCH 1/3] net/bnxt: fix to init/destroy locks only once Somnath Kotur
2020-12-24 9:35 ` [dpdk-dev] [PATCH 2/3] net/bnxt: fix error path handling of dev start op Somnath Kotur
2020-12-24 9:35 ` Somnath Kotur [this message]
-- strict thread matches above, loose matches on Subject: below --
2020-12-24 6:31 [dpdk-dev] [PATCH 0/3] bnxt patches Somnath Kotur
2020-12-24 6:31 ` [dpdk-dev] [PATCH 3/3] net/bnxt: check for chip reset in dev stop/close ops Somnath Kotur
2020-12-24 9:37 ` [dpdk-dev] [PATCH v2 0/3] bnxt patches Somnath Kotur
2020-12-24 9:37 ` [dpdk-dev] [PATCH 3/3] net/bnxt: check for chip reset in dev stop/close ops Somnath Kotur
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201224093541.13873-4-somnath.kotur@broadcom.com \
--to=somnath.kotur@broadcom.com \
--cc=dev@dpdk.org \
--cc=ferruh.yigit@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).