DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH 00/13] bnxt patchset to support device error recovery
@ 2019-08-22  5:53 Ajit Khaparde
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 01/13] net/bnxt: hsi version update Ajit Khaparde
                   ` (12 more replies)
  0 siblings, 13 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-22  5:53 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

This patchset adds support to moitor the health of the firmware and the
underlying device and recover to an operational state in case of error.
We can also detect if a FW upgrade is in progress and quiesce all
access to the device and recover once FW indicates everything is ready.

Patchset against dpdk-next-net. Please apply.

Kalesh AP (13):
  net/bnxt: hsi version update
  net/bnxt: prevent device access when device is in reset
  net/bnxt: handle reset notify async event from FW
  net/bnxt: inform firmware about IF state changes
  net/bnxt: handle fatal event from FW under error conditions
  net/bnxt: query firmware error recovery capabilities
  net/bnxt: map status registers for FW health monitoring
  net/bnxt: advertise error recovery capability and handle async event
  net/bnxt: add code for periodic FW health monitoring
  net/bnxt: use BIT macro instead of bit fields
  net/bnxt: reschedule the health check alarm correctly
  net/bnxt: add support for FW reset
  net/bnxt: reduce verbosity of logs

 drivers/net/bnxt/bnxt.h                | 130 +++-
 drivers/net/bnxt/bnxt_cpr.c            |  78 +++
 drivers/net/bnxt/bnxt_cpr.h            |  18 +
 drivers/net/bnxt/bnxt_ethdev.c         | 817 ++++++++++++++++++++-----
 drivers/net/bnxt/bnxt_hwrm.c           | 200 +++++-
 drivers/net/bnxt/bnxt_hwrm.h           |   7 +
 drivers/net/bnxt/bnxt_ring.c           |  39 +-
 drivers/net/bnxt/bnxt_ring.h           |   1 +
 drivers/net/bnxt/bnxt_rxq.c            |  25 +
 drivers/net/bnxt/bnxt_rxr.c            |  17 +
 drivers/net/bnxt/bnxt_rxr.h            |   2 +
 drivers/net/bnxt/bnxt_stats.c          |  34 +-
 drivers/net/bnxt/bnxt_txq.c            |   7 +
 drivers/net/bnxt/bnxt_txr.c            |  27 +
 drivers/net/bnxt/bnxt_txr.h            |   2 +
 drivers/net/bnxt/bnxt_util.h           |   4 +
 drivers/net/bnxt/bnxt_vnic.c           |   7 +-
 drivers/net/bnxt/hsi_struct_def_dpdk.h | 137 +++++
 18 files changed, 1339 insertions(+), 213 deletions(-)

-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH 01/13] net/bnxt: hsi version update
  2019-08-22  5:53 [dpdk-dev] [PATCH 00/13] bnxt patchset to support device error recovery Ajit Khaparde
@ 2019-08-22  5:53 ` Ajit Khaparde
  2019-08-27 13:51   ` Ferruh Yigit
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 02/13] net/bnxt: prevent device access when device is in reset Ajit Khaparde
                   ` (11 subsequent siblings)
  12 siblings, 1 reply; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-22  5:53 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/hsi_struct_def_dpdk.h | 137 +++++++++++++++++++++++++
 1 file changed, 137 insertions(+)

diff --git a/drivers/net/bnxt/hsi_struct_def_dpdk.h b/drivers/net/bnxt/hsi_struct_def_dpdk.h
index 6c98c1d6d..009571725 100644
--- a/drivers/net/bnxt/hsi_struct_def_dpdk.h
+++ b/drivers/net/bnxt/hsi_struct_def_dpdk.h
@@ -33621,4 +33621,141 @@ struct hwrm_nvm_validate_option_cmd_err {
 	uint8_t	unused_0[7];
 } __attribute__((packed));
 
+/*****************
+ * hwrm_fw_reset *
+ ******************/
+
+
+/* hwrm_fw_reset_input (size:192b/24B) */
+struct hwrm_fw_reset_input {
+	/* The HWRM command request type. */
+	uint16_t        req_type;
+	/*
+	 * The completion ring to send the completion event on. This should
+	 * be the NQ ID returned from the `nq_alloc` HWRM command.
+	 */
+	uint16_t        cmpl_ring;
+	/*
+	 * The sequence ID is used by the driver for tracking multiple
+	 * commands. This ID is treated as opaque data by the firmware and
+	 * the value is returned in the `hwrm_resp_hdr` upon completion.
+	 */
+	uint16_t        seq_id;
+	/*
+	 * The target ID of the command:
+	 * * 0x0-0xFFF8 - The function ID
+	 * * 0xFFF8-0xFFFE - Reserved for internal processors
+	 * * 0xFFFF - HWRM
+	 */
+	uint16_t        target_id;
+	/*
+	 * A physical address pointer pointing to a host buffer that the
+	 * command's response data will be written. This can be either a host
+	 * physical address (HPA) or a guest physical address (GPA) and must
+	 * point to a physically contiguous block of memory.
+	 */
+	uint64_t        resp_addr;
+	/* Type of embedded processor. */
+	uint8_t embedded_proc_type;
+	/* Boot Processor */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_BOOT \
+		UINT32_C(0x0)
+	/* Management Processor */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_MGMT \
+		UINT32_C(0x1)
+	/* Network control processor */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_NETCTRL \
+		UINT32_C(0x2)
+	/* RoCE control processor */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_ROCE \
+		UINT32_C(0x3)
+	/*
+	 * Host (in multi-host environment): This is only valid if requester is IPC.
+	 * Reinit host hardware resources and PCIe.
+	 */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_HOST \
+		UINT32_C(0x4)
+	/* AP processor complex (in multi-host environment). Use host_idx to control which core is reset */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_AP \
+		UINT32_C(0x5)
+	/* Reset all blocks of the chip (including all processors) */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_CHIP \
+		UINT32_C(0x6)
+	/*
+	 * Host (in multi-host environment): This is only valid if requester is IPC.
+	 * Reinit host hardware resources.
+	 */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_HOST_RESOURCE_REINIT \
+		UINT32_C(0x7)
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_LAST \
+		HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_HOST_RESOURCE_REINIT
+	/* Type of self reset. */
+	uint8_t selfrst_status;
+	/* No Self Reset */
+	#define HWRM_FW_RESET_INPUT_SELFRST_STATUS_SELFRSTNONE \
+		UINT32_C(0x0)
+	/* Self Reset as soon as possible to do so safely */
+	#define HWRM_FW_RESET_INPUT_SELFRST_STATUS_SELFRSTASAP \
+		UINT32_C(0x1)
+	/* Self Reset on PCIe Reset */
+	#define HWRM_FW_RESET_INPUT_SELFRST_STATUS_SELFRSTPCIERST \
+		UINT32_C(0x2)
+	/* Self Reset immediately after notification to all clients. */
+	#define HWRM_FW_RESET_INPUT_SELFRST_STATUS_SELFRSTIMMEDIATE \
+		UINT32_C(0x3)
+	#define HWRM_FW_RESET_INPUT_SELFRST_STATUS_LAST \
+		HWRM_FW_RESET_INPUT_SELFRST_STATUS_SELFRSTIMMEDIATE
+	/*
+	 * Indicate which host is being reset. 0 means first host.
+	 * Only valid when embedded_proc_type is host in multihost
+	 * environment
+	 */
+	uint8_t host_idx;
+	uint8_t flags;
+	/*
+	 * When this bit is '1', then the core firmware initiates
+	 * the reset only after graceful shut down of all registered instances.
+	 * If not, the device will continue with the existing firmware.
+	 */
+	#define HWRM_FW_RESET_INPUT_FLAGS_RESET_GRACEFUL     UINT32_C(0x1)
+	uint8_t unused_0[4];
+} __attribute__((packed));
+
+/* hwrm_fw_reset_output (size:128b/16B) */
+struct hwrm_fw_reset_output {
+	/* The specific error status for the command. */
+	uint16_t        error_code;
+	/* The HWRM command request type. */
+	uint16_t        req_type;
+	/* The sequence ID from the original command. */
+	uint16_t        seq_id;
+	/* The length of the response data in number of bytes. */
+	uint16_t        resp_len;
+	/* Type of self reset. */
+	uint8_t selfrst_status;
+	/* No Self Reset */
+	#define HWRM_FW_RESET_OUTPUT_SELFRST_STATUS_SELFRSTNONE \
+		UINT32_C(0x0)
+	/* Self Reset as soon as possible to do so safely */
+	#define HWRM_FW_RESET_OUTPUT_SELFRST_STATUS_SELFRSTASAP \
+		UINT32_C(0x1)
+	/* Self Reset on PCIe Reset */
+	#define HWRM_FW_RESET_OUTPUT_SELFRST_STATUS_SELFRSTPCIERST \
+		UINT32_C(0x2)
+	/* Self Reset immediately after notification to all clients. */
+	#define HWRM_FW_RESET_OUTPUT_SELFRST_STATUS_SELFRSTIMMEDIATE \
+		UINT32_C(0x3)
+	#define HWRM_FW_RESET_OUTPUT_SELFRST_STATUS_LAST \
+		HWRM_FW_RESET_OUTPUT_SELFRST_STATUS_SELFRSTIMMEDIATE
+	uint8_t unused_0[6];
+	/*
+	 * This field is used in Output records to indicate that the output
+	 * is completely written to RAM.  This field should be read as '1'
+	 * to indicate that the output has been completely written.
+	 * When writing a command completion or response to an internal processor,
+	 * the order of writes has to be such that this field is written last.
+	 */
+	uint8_t valid;
+} __attribute__((packed));
+
 #endif /* _HSI_STRUCT_DEF_DPDK_H_ */
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH 02/13] net/bnxt: prevent device access when device is in reset
  2019-08-22  5:53 [dpdk-dev] [PATCH 00/13] bnxt patchset to support device error recovery Ajit Khaparde
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 01/13] net/bnxt: hsi version update Ajit Khaparde
@ 2019-08-22  5:53 ` Ajit Khaparde
  2019-08-27 15:00   ` Ferruh Yigit
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 03/13] net/bnxt: handle reset notify async event from FW Ajit Khaparde
                   ` (10 subsequent siblings)
  12 siblings, 1 reply; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-22  5:53 UTC (permalink / raw)
  To: dev
  Cc: ferruh.yigit, Kalesh AP, Santoshkumar Karanappa Rastapur, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

Refactor init and uninit functions so that the driver can fail
the eth_dev_ops callbacks and accessing Tx and Rx queues
when device is in reset or in error state.

Transmit and receive queues are freed during reset cleanup and
reallocated during recovery. So we block all data path handling
in this state. The eth_dev dev_started field is updated depending
on the status of the device.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Santoshkumar Karanappa Rastapur <santosh.rastapur@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
---
 drivers/net/bnxt/bnxt.h        |   1 +
 drivers/net/bnxt/bnxt_ethdev.c | 455 ++++++++++++++++++++++-----------
 drivers/net/bnxt/bnxt_hwrm.c   |   2 -
 drivers/net/bnxt/bnxt_ring.c   |  32 +++
 drivers/net/bnxt/bnxt_ring.h   |   1 +
 drivers/net/bnxt/bnxt_rxq.c    |  25 ++
 drivers/net/bnxt/bnxt_rxr.c    |  17 ++
 drivers/net/bnxt/bnxt_rxr.h    |   2 +
 drivers/net/bnxt/bnxt_stats.c  |  34 ++-
 drivers/net/bnxt/bnxt_txq.c    |   7 +
 drivers/net/bnxt/bnxt_txr.c    |  27 ++
 drivers/net/bnxt/bnxt_txr.h    |   2 +
 12 files changed, 452 insertions(+), 153 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 0c9f994ea..49418cac9 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -465,6 +465,7 @@ struct bnxt {
 
 int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete);
 int bnxt_rcv_msg_from_vf(struct bnxt *bp, uint16_t vf_id, void *msg);
+int is_bnxt_in_error(struct bnxt *bp);
 
 bool is_bnxt_supported(struct rte_eth_dev *dev);
 bool bnxt_stratus_device(struct bnxt *bp);
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 6685ee7d9..33ff4a5a7 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -167,6 +167,16 @@ static void bnxt_print_link_info(struct rte_eth_dev *eth_dev);
 static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu);
 static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev);
 
+int is_bnxt_in_error(struct bnxt *bp)
+{
+	if (bp->flags & BNXT_FLAG_FATAL_ERROR)
+		return -EIO;
+	if (bp->flags & BNXT_FLAG_FW_RESET)
+		return -EBUSY;
+
+	return 0;
+}
+
 /***********************/
 
 /*
@@ -207,6 +217,10 @@ static int bnxt_alloc_mem(struct bnxt *bp)
 {
 	int rc;
 
+	rc = bnxt_alloc_ring_grps(bp);
+	if (rc)
+		goto alloc_mem_err;
+
 	rc = bnxt_alloc_async_ring_struct(bp);
 	if (rc)
 		goto alloc_mem_err;
@@ -501,6 +515,9 @@ static void bnxt_dev_info_get_op(struct rte_eth_dev *eth_dev,
 	uint16_t max_vnics, i, j, vpool, vrxq;
 	unsigned int max_rx_rings;
 
+	if (is_bnxt_in_error(bp))
+		return;
+
 	/* MAC Specifics */
 	dev_info->max_mac_addrs = bp->max_l2_ctx;
 	dev_info->max_hash_mac_addrs = 0;
@@ -602,6 +619,10 @@ static int bnxt_dev_configure_op(struct rte_eth_dev *eth_dev)
 	bp->tx_nr_rings = eth_dev->data->nb_tx_queues;
 	bp->rx_nr_rings = eth_dev->data->nb_rx_queues;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (BNXT_VF(bp) && (bp->flags & BNXT_FLAG_NEW_RM)) {
 		rc = bnxt_hwrm_check_vf_rings(bp);
 		if (rc) {
@@ -791,8 +812,10 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
 
 	eth_dev->rx_pkt_burst = bnxt_receive_function(eth_dev);
 	eth_dev->tx_pkt_burst = bnxt_transmit_function(eth_dev);
+
 	bnxt_enable_int(bp);
 	bp->flags |= BNXT_FLAG_INIT_DONE;
+	eth_dev->data->dev_started = 1;
 	bp->dev_stopped = 0;
 	return 0;
 
@@ -835,6 +858,11 @@ static void bnxt_dev_stop_op(struct rte_eth_dev *eth_dev)
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 
+	eth_dev->data->dev_started = 0;
+	/* Prevent crashes when queues are still in use */
+	eth_dev->rx_pkt_burst = &bnxt_dummy_recv_pkts;
+	eth_dev->tx_pkt_burst = &bnxt_dummy_xmit_pkts;
+
 	bnxt_disable_int(bp);
 
 	/* disable uio/vfio intr/eventfd mapping */
@@ -889,6 +917,9 @@ static void bnxt_mac_addr_remove_op(struct rte_eth_dev *eth_dev,
 	struct bnxt_filter_info *filter, *temp_filter;
 	uint32_t i;
 
+	if (is_bnxt_in_error(bp))
+		return;
+
 	/*
 	 * Loop through all VNICs from the specified filter flow pools to
 	 * remove the corresponding MAC addr filter
@@ -924,6 +955,10 @@ static int bnxt_mac_addr_add_op(struct rte_eth_dev *eth_dev,
 	struct bnxt_filter_info *filter;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (BNXT_VF(bp) & !BNXT_VF_IS_TRUSTED(bp)) {
 		PMD_DRV_LOG(ERR, "Cannot add MAC address to a VF interface\n");
 		return -ENOTSUP;
@@ -969,6 +1004,10 @@ int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete)
 	struct rte_eth_link new;
 	unsigned int cnt = BNXT_LINK_WAIT_CNT;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	memset(&new, 0, sizeof(new));
 	do {
 		/* Retrieve link info from hardware */
@@ -1009,6 +1048,9 @@ static void bnxt_promiscuous_enable_op(struct rte_eth_dev *eth_dev)
 	struct bnxt *bp = eth_dev->data->dev_private;
 	struct bnxt_vnic_info *vnic;
 
+	if (is_bnxt_in_error(bp))
+		return;
+
 	if (bp->vnic_info == NULL)
 		return;
 
@@ -1023,6 +1065,9 @@ static void bnxt_promiscuous_disable_op(struct rte_eth_dev *eth_dev)
 	struct bnxt *bp = eth_dev->data->dev_private;
 	struct bnxt_vnic_info *vnic;
 
+	if (is_bnxt_in_error(bp))
+		return;
+
 	if (bp->vnic_info == NULL)
 		return;
 
@@ -1037,6 +1082,9 @@ static void bnxt_allmulticast_enable_op(struct rte_eth_dev *eth_dev)
 	struct bnxt *bp = eth_dev->data->dev_private;
 	struct bnxt_vnic_info *vnic;
 
+	if (is_bnxt_in_error(bp))
+		return;
+
 	if (bp->vnic_info == NULL)
 		return;
 
@@ -1051,6 +1099,9 @@ static void bnxt_allmulticast_disable_op(struct rte_eth_dev *eth_dev)
 	struct bnxt *bp = eth_dev->data->dev_private;
 	struct bnxt_vnic_info *vnic;
 
+	if (is_bnxt_in_error(bp))
+		return;
+
 	if (bp->vnic_info == NULL)
 		return;
 
@@ -1100,7 +1151,11 @@ static int bnxt_reta_update_op(struct rte_eth_dev *eth_dev,
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
 	uint16_t tbl_size = bnxt_rss_hash_tbl_size(bp);
 	uint16_t idx, sft;
-	int i;
+	int i, rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	if (!vnic->rss_table)
 		return -EINVAL;
@@ -1156,6 +1211,11 @@ static int bnxt_reta_query_op(struct rte_eth_dev *eth_dev,
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
 	uint16_t tbl_size = bnxt_rss_hash_tbl_size(bp);
 	uint16_t idx, sft, i;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	/* Retrieve from the default VNIC */
 	if (!vnic)
@@ -1202,6 +1262,11 @@ static int bnxt_rss_hash_update_op(struct rte_eth_dev *eth_dev,
 	struct bnxt_vnic_info *vnic;
 	uint16_t hash_type = 0;
 	unsigned int i;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	/*
 	 * If RSS enablement were different than dev_configure,
@@ -1255,9 +1320,13 @@ static int bnxt_rss_hash_conf_get_op(struct rte_eth_dev *eth_dev,
 {
 	struct bnxt *bp = eth_dev->data->dev_private;
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
-	int len;
+	int len, rc;
 	uint32_t hash_types;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	/* RSS configuration is the same for all VNICs */
 	if (vnic && vnic->rss_hash_key) {
 		if (rss_conf->rss_key) {
@@ -1315,6 +1384,10 @@ static int bnxt_flow_ctrl_get_op(struct rte_eth_dev *dev,
 	struct rte_eth_link link_info;
 	int rc;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	rc = bnxt_get_hwrm_link_config(bp, &link_info);
 	if (rc)
 		return rc;
@@ -1344,6 +1417,11 @@ static int bnxt_flow_ctrl_set_op(struct rte_eth_dev *dev,
 			       struct rte_eth_fc_conf *fc_conf)
 {
 	struct bnxt *bp = dev->data->dev_private;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	if (!BNXT_SINGLE_PF(bp) || BNXT_VF(bp)) {
 		PMD_DRV_LOG(ERR, "Flow Control Settings cannot be modified\n");
@@ -1403,6 +1481,10 @@ bnxt_udp_tunnel_port_add_op(struct rte_eth_dev *eth_dev,
 	uint16_t tunnel_type = 0;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	switch (udp_tunnel->prot_type) {
 	case RTE_TUNNEL_TYPE_VXLAN:
 		if (bp->vxlan_port_cnt) {
@@ -1452,6 +1534,10 @@ bnxt_udp_tunnel_port_del_op(struct rte_eth_dev *eth_dev,
 	uint16_t port = 0;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	switch (udp_tunnel->prot_type) {
 	case RTE_TUNNEL_TYPE_VXLAN:
 		if (!bp->vxlan_port_cnt) {
@@ -1605,6 +1691,11 @@ static int bnxt_vlan_filter_set_op(struct rte_eth_dev *eth_dev,
 		uint16_t vlan_id, int on)
 {
 	struct bnxt *bp = eth_dev->data->dev_private;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	/* These operations apply to ALL existing MAC/VLAN filters */
 	if (on)
@@ -1619,6 +1710,11 @@ bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask)
 	struct bnxt *bp = dev->data->dev_private;
 	uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads;
 	unsigned int i;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	if (mask & ETH_VLAN_FILTER_MASK) {
 		if (!(rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER)) {
@@ -1660,6 +1756,10 @@ bnxt_set_default_mac_addr_op(struct rte_eth_dev *dev,
 	struct bnxt_filter_info *filter;
 	int rc;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (BNXT_VF(bp) && !BNXT_VF_IS_TRUSTED(bp))
 		return -EPERM;
 
@@ -1699,6 +1799,11 @@ bnxt_dev_set_mc_addr_list_op(struct rte_eth_dev *eth_dev,
 	char *mc_addr_list = (char *)mc_addr_set;
 	struct bnxt_vnic_info *vnic;
 	uint32_t off = 0, i = 0;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	vnic = &bp->vnic_info[0];
 
@@ -1784,6 +1889,10 @@ static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu)
 	uint32_t rc = 0;
 	uint32_t i;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	new_pkt_size = new_mtu + RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN +
 		       VLAN_TAG_SIZE * BNXT_NUM_VLANS;
 
@@ -1857,6 +1966,10 @@ bnxt_vlan_pvid_set_op(struct rte_eth_dev *dev, uint16_t pvid, int on)
 	uint16_t vlan = bp->vlan;
 	int rc;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (!BNXT_SINGLE_PF(bp) || BNXT_VF(bp)) {
 		PMD_DRV_LOG(ERR,
 			"PVID cannot be modified for this function\n");
@@ -1874,6 +1987,11 @@ static int
 bnxt_dev_led_on_op(struct rte_eth_dev *dev)
 {
 	struct bnxt *bp = dev->data->dev_private;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	return bnxt_hwrm_port_led_cfg(bp, true);
 }
@@ -1882,6 +2000,11 @@ static int
 bnxt_dev_led_off_op(struct rte_eth_dev *dev)
 {
 	struct bnxt *bp = dev->data->dev_private;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	return bnxt_hwrm_port_led_cfg(bp, false);
 }
@@ -1889,6 +2012,7 @@ bnxt_dev_led_off_op(struct rte_eth_dev *dev)
 static uint32_t
 bnxt_rx_queue_count_op(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
+	struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
 	uint32_t desc = 0, raw_cons = 0, cons;
 	struct bnxt_cp_ring_info *cpr;
 	struct bnxt_rx_queue *rxq;
@@ -1896,6 +2020,11 @@ bnxt_rx_queue_count_op(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 	uint16_t cmp_type;
 	uint8_t cmp = 1;
 	bool valid;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	rxq = dev->data->rx_queues[rx_queue_id];
 	cpr = rxq->cp_ring;
@@ -1940,10 +2069,15 @@ bnxt_rx_descriptor_status_op(void *rx_queue, uint16_t offset)
 	struct bnxt_sw_rx_bd *rx_buf;
 	struct rx_pkt_cmpl *rxcmp;
 	uint32_t cons, cp_cons;
+	int rc;
 
 	if (!rxq)
 		return -EINVAL;
 
+	rc = is_bnxt_in_error(rxq->bp);
+	if (rc)
+		return rc;
+
 	cpr = rxq->cp_ring;
 	rxr = rxq->rx_ring;
 
@@ -1978,10 +2112,15 @@ bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset)
 	struct bnxt_sw_tx_bd *tx_buf;
 	struct tx_pkt_cmpl *txcmp;
 	uint32_t cons, cp_cons;
+	int rc;
 
 	if (!txq)
 		return -EINVAL;
 
+	rc = is_bnxt_in_error(txq->bp);
+	if (rc)
+		return rc;
+
 	cpr = txq->cp_ring;
 	txr = txq->tx_ring;
 
@@ -2811,6 +2950,10 @@ bnxt_filter_ctrl_op(struct rte_eth_dev *dev __rte_unused,
 {
 	int ret = 0;
 
+	ret = is_bnxt_in_error(dev->data->dev_private);
+	if (ret)
+		return ret;
+
 	switch (filter_type) {
 	case RTE_ETH_FILTER_TUNNEL:
 		PMD_DRV_LOG(ERR,
@@ -3126,6 +3269,10 @@ bnxt_get_eeprom_length_op(struct rte_eth_dev *dev)
 	uint32_t dir_entries;
 	uint32_t entry_length;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	PMD_DRV_LOG(INFO, "%04x:%02x:%02x:%02x\n",
 		bp->pdev->addr.domain, bp->pdev->addr.bus,
 		bp->pdev->addr.devid, bp->pdev->addr.function);
@@ -3144,6 +3291,11 @@ bnxt_get_eeprom_op(struct rte_eth_dev *dev,
 	struct bnxt *bp = dev->data->dev_private;
 	uint32_t index;
 	uint32_t offset;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	PMD_DRV_LOG(INFO, "%04x:%02x:%02x:%02x in_eeprom->offset = %d "
 		"len = %d\n", bp->pdev->addr.domain,
@@ -3215,6 +3367,11 @@ bnxt_set_eeprom_op(struct rte_eth_dev *dev,
 	struct bnxt *bp = dev->data->dev_private;
 	uint8_t index, dir_op;
 	uint16_t type, ext, ordinal, attr;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	PMD_DRV_LOG(INFO, "%04x:%02x:%02x:%02x in_eeprom->offset = %d "
 		"len = %d\n", bp->pdev->addr.domain,
@@ -3768,19 +3925,139 @@ static int bnxt_setup_mac_addr(struct rte_eth_dev *eth_dev)
 	return rc;
 }
 
+static void bnxt_config_vf_req_fwd(struct bnxt *bp)
+{
+	if (!BNXT_PF(bp))
+		return;
+
 #define ALLOW_FUNC(x)	\
 	{ \
 		uint32_t arg = (x); \
 		bp->pf.vf_req_fwd[((arg) >> 5)] &= \
 		~rte_cpu_to_le_32(1 << ((arg) & 0x1f)); \
 	}
+
+	/* Forward all requests if firmware is new enough */
+	if (((bp->fw_ver >= ((20 << 24) | (6 << 16) | (100 << 8))) &&
+	     (bp->fw_ver < ((20 << 24) | (7 << 16)))) ||
+	    ((bp->fw_ver >= ((20 << 24) | (8 << 16))))) {
+		memset(bp->pf.vf_req_fwd, 0xff, sizeof(bp->pf.vf_req_fwd));
+	} else {
+		PMD_DRV_LOG(WARNING,
+			    "Firmware too old for VF mailbox functionality\n");
+		memset(bp->pf.vf_req_fwd, 0, sizeof(bp->pf.vf_req_fwd));
+	}
+
+	/*
+	 * The following are used for driver cleanup. If we disallow these,
+	 * VF drivers can't clean up cleanly.
+	 */
+	ALLOW_FUNC(HWRM_FUNC_DRV_UNRGTR);
+	ALLOW_FUNC(HWRM_VNIC_FREE);
+	ALLOW_FUNC(HWRM_RING_FREE);
+	ALLOW_FUNC(HWRM_RING_GRP_FREE);
+	ALLOW_FUNC(HWRM_VNIC_RSS_COS_LB_CTX_FREE);
+	ALLOW_FUNC(HWRM_CFA_L2_FILTER_FREE);
+	ALLOW_FUNC(HWRM_STAT_CTX_FREE);
+	ALLOW_FUNC(HWRM_PORT_PHY_QCFG);
+	ALLOW_FUNC(HWRM_VNIC_TPA_CFG);
+}
+
+static int bnxt_init_fw(struct bnxt *bp)
+{
+	uint16_t mtu;
+	int rc = 0;
+
+	rc = bnxt_hwrm_ver_get(bp);
+	if (rc)
+		return rc;
+
+	rc = bnxt_hwrm_func_reset(bp);
+	if (rc)
+		return -EIO;
+
+	rc = bnxt_hwrm_queue_qportcfg(bp);
+	if (rc)
+		return rc;
+
+	/* Get the MAX capabilities for this function */
+	rc = bnxt_hwrm_func_qcaps(bp);
+	if (rc)
+		return rc;
+
+	rc = bnxt_hwrm_func_qcfg(bp, &mtu);
+	if (rc)
+		return rc;
+
+	if (mtu >= RTE_ETHER_MIN_MTU && mtu <= BNXT_MAX_MTU &&
+	    mtu != bp->eth_dev->data->mtu)
+		bp->eth_dev->data->mtu = mtu;
+
+	bnxt_hwrm_port_led_qcaps(bp);
+
+	return 0;
+}
+
+static int bnxt_init_resources(struct bnxt *bp)
+{
+	int rc;
+
+	rc = bnxt_init_fw(bp);
+	if (rc)
+		return rc;
+
+	rc = bnxt_setup_mac_addr(bp->eth_dev);
+	if (rc)
+		return rc;
+
+	bnxt_config_vf_req_fwd(bp);
+
+	rc = bnxt_hwrm_func_driver_register(bp);
+	if (rc) {
+		PMD_DRV_LOG(ERR, "Failed to register driver");
+		return -EBUSY;
+	}
+
+	if (BNXT_PF(bp)) {
+		if (bp->pdev->max_vfs) {
+			rc = bnxt_hwrm_allocate_vfs(bp, bp->pdev->max_vfs);
+			if (rc) {
+				PMD_DRV_LOG(ERR, "Failed to allocate VFs\n");
+				return rc;
+			}
+		} else {
+			rc = bnxt_hwrm_allocate_pf_only(bp);
+			if (rc) {
+				PMD_DRV_LOG(ERR,
+					    "Failed to allocate PF resources");
+				return rc;
+			}
+		}
+	}
+
+	rc = bnxt_alloc_mem(bp);
+	if (rc)
+		return rc;
+
+	rc = bnxt_setup_int(bp);
+	if (rc)
+		return rc;
+
+	bnxt_init_nic(bp);
+
+	rc = bnxt_request_int(bp);
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
 static int
 bnxt_dev_init(struct rte_eth_dev *eth_dev)
 {
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 	static int version_printed;
 	struct bnxt *bp;
-	uint16_t mtu;
 	int rc;
 
 	if (version_printed++ == 0)
@@ -3822,166 +4099,50 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
 	rc = bnxt_init_board(eth_dev);
 	if (rc) {
 		PMD_DRV_LOG(ERR,
-			"Board initialization failed rc: %x\n", rc);
-		goto error;
+			    "Failed to initialize board rc: %x\n", rc);
+		return rc;
 	}
 
 	rc = bnxt_alloc_hwrm_resources(bp);
 	if (rc) {
 		PMD_DRV_LOG(ERR,
-			"hwrm resource allocation failure rc: %x\n", rc);
+			    "Failed to allocate hwrm resource rc: %x\n", rc);
 		goto error_free;
 	}
-	rc = bnxt_hwrm_ver_get(bp);
+	rc = bnxt_init_resources(bp);
 	if (rc)
 		goto error_free;
 
-	rc = bnxt_hwrm_func_reset(bp);
-	if (rc) {
-		PMD_DRV_LOG(ERR, "hwrm chip reset failure rc: %x\n", rc);
-		rc = -EIO;
-		goto error_free;
-	}
-
-	rc = bnxt_hwrm_queue_qportcfg(bp);
-	if (rc) {
-		PMD_DRV_LOG(ERR, "hwrm queue qportcfg failed\n");
-		goto error_free;
-	}
-	/* Get the MAX capabilities for this function */
-	rc = bnxt_hwrm_func_qcaps(bp);
-	if (rc) {
-		PMD_DRV_LOG(ERR, "hwrm query capability failure rc: %x\n", rc);
-		goto error_free;
-	}
-
 	rc = bnxt_alloc_stats_mem(bp);
 	if (rc)
 		goto error_free;
 
-	if (bp->max_tx_rings == 0) {
-		PMD_DRV_LOG(ERR, "No TX rings available!\n");
-		rc = -EBUSY;
-		goto error_free;
-	}
-
-	rc = bnxt_setup_mac_addr(eth_dev);
-	if (rc)
-		goto error_free;
-
-	/* THOR does not support ring groups.
-	 * But we will use the array to save RSS context IDs.
-	 */
-	if (BNXT_CHIP_THOR(bp)) {
-		bp->max_ring_grps = BNXT_MAX_RSS_CTXTS_THOR;
-	} else if (bp->max_ring_grps < bp->rx_cp_nr_rings) {
-		/* 1 ring is for default completion ring */
-		PMD_DRV_LOG(ERR, "Insufficient resource: Ring Group\n");
-		rc = -ENOSPC;
-		goto error_free;
-	}
-
-	if (BNXT_HAS_RING_GRPS(bp)) {
-		bp->grp_info = rte_zmalloc("bnxt_grp_info",
-					sizeof(*bp->grp_info) *
-						bp->max_ring_grps, 0);
-		if (!bp->grp_info) {
-			PMD_DRV_LOG(ERR,
-				"Failed to alloc %zu bytes for grp info tbl.\n",
-				sizeof(*bp->grp_info) * bp->max_ring_grps);
-			rc = -ENOMEM;
-			goto error_free;
-		}
-	}
-
-	/* Forward all requests if firmware is new enough */
-	if (((bp->fw_ver >= ((20 << 24) | (6 << 16) | (100 << 8))) &&
-	    (bp->fw_ver < ((20 << 24) | (7 << 16)))) ||
-	    ((bp->fw_ver >= ((20 << 24) | (8 << 16))))) {
-		memset(bp->pf.vf_req_fwd, 0xff, sizeof(bp->pf.vf_req_fwd));
-	} else {
-		PMD_DRV_LOG(WARNING,
-			"Firmware too old for VF mailbox functionality\n");
-		memset(bp->pf.vf_req_fwd, 0, sizeof(bp->pf.vf_req_fwd));
-	}
-
-	/*
-	 * The following are used for driver cleanup.  If we disallow these,
-	 * VF drivers can't clean up cleanly.
-	 */
-	ALLOW_FUNC(HWRM_FUNC_DRV_UNRGTR);
-	ALLOW_FUNC(HWRM_VNIC_FREE);
-	ALLOW_FUNC(HWRM_RING_FREE);
-	ALLOW_FUNC(HWRM_RING_GRP_FREE);
-	ALLOW_FUNC(HWRM_VNIC_RSS_COS_LB_CTX_FREE);
-	ALLOW_FUNC(HWRM_CFA_L2_FILTER_FREE);
-	ALLOW_FUNC(HWRM_STAT_CTX_FREE);
-	ALLOW_FUNC(HWRM_PORT_PHY_QCFG);
-	ALLOW_FUNC(HWRM_VNIC_TPA_CFG);
-	rc = bnxt_hwrm_func_driver_register(bp);
-	if (rc) {
-		PMD_DRV_LOG(ERR,
-			"Failed to register driver");
-		rc = -EBUSY;
-		goto error_free;
-	}
-
 	PMD_DRV_LOG(INFO,
-		DRV_MODULE_NAME " found at mem %" PRIx64 ", node addr %pM\n",
-		pci_dev->mem_resource[0].phys_addr,
-		pci_dev->mem_resource[0].addr);
-
-	rc = bnxt_hwrm_func_qcfg(bp, &mtu);
-	if (rc) {
-		PMD_DRV_LOG(ERR, "hwrm func qcfg failed\n");
-		goto error_free;
-	}
-
-	if (mtu >= RTE_ETHER_MIN_MTU && mtu <= BNXT_MAX_MTU &&
-	    mtu != eth_dev->data->mtu)
-		eth_dev->data->mtu = mtu;
-
-	if (BNXT_PF(bp)) {
-		//if (bp->pf.active_vfs) {
-			// TODO: Deallocate VF resources?
-		//}
-		if (bp->pdev->max_vfs) {
-			rc = bnxt_hwrm_allocate_vfs(bp, bp->pdev->max_vfs);
-			if (rc) {
-				PMD_DRV_LOG(ERR, "Failed to allocate VFs\n");
-				goto error_free;
-			}
-		} else {
-			rc = bnxt_hwrm_allocate_pf_only(bp);
-			if (rc) {
-				PMD_DRV_LOG(ERR,
-					"Failed to allocate PF resources\n");
-				goto error_free;
-			}
-		}
-	}
-
-	bnxt_hwrm_port_led_qcaps(bp);
-
-	rc = bnxt_setup_int(bp);
-	if (rc)
-		goto error_free;
-
-	rc = bnxt_alloc_mem(bp);
-	if (rc)
-		goto error_free;
-
-	bnxt_init_nic(bp);
-
-	rc = bnxt_request_int(bp);
-	if (rc)
-		goto error_free;
+		    DRV_MODULE_NAME "found at mem %" PRIX64 ", node addr %pM\n",
+		    pci_dev->mem_resource[0].phys_addr,
+		    pci_dev->mem_resource[0].addr);
 
 	return 0;
 
 error_free:
 	bnxt_dev_uninit(eth_dev);
-error:
+	return rc;
+}
+
+static int
+bnxt_uninit_resources(struct bnxt *bp)
+{
+	int rc;
+
+	bnxt_disable_int(bp);
+	bnxt_free_int(bp);
+	bnxt_free_mem(bp);
+	bnxt_hwrm_func_buf_unrgtr(bp);
+	rc = bnxt_hwrm_func_driver_unregister(bp, 0);
+	bp->flags &= ~BNXT_FLAG_REGISTERED;
+	bnxt_free_ctx_mem(bp);
+	bnxt_free_hwrm_resources(bp);
+
 	return rc;
 }
 
@@ -3995,18 +4156,13 @@ bnxt_dev_uninit(struct rte_eth_dev *eth_dev)
 		return -EPERM;
 
 	PMD_DRV_LOG(DEBUG, "Calling Device uninit\n");
-	bnxt_disable_int(bp);
-	bnxt_free_int(bp);
-	bnxt_free_mem(bp);
 
-	bnxt_hwrm_func_buf_unrgtr(bp);
+	rc = bnxt_uninit_resources(bp);
 
 	if (bp->grp_info != NULL) {
 		rte_free(bp->grp_info);
 		bp->grp_info = NULL;
 	}
-	rc = bnxt_hwrm_func_driver_unregister(bp, 0);
-	bnxt_free_hwrm_resources(bp);
 
 	if (bp->tx_mem_zone) {
 		rte_memzone_free((const struct rte_memzone *)bp->tx_mem_zone);
@@ -4022,7 +4178,6 @@ bnxt_dev_uninit(struct rte_eth_dev *eth_dev)
 		bnxt_dev_close_op(eth_dev);
 	if (bp->pf.vf_info)
 		rte_free(bp->pf.vf_info);
-	bnxt_free_ctx_mem(bp);
 	eth_dev->dev_ops = NULL;
 	eth_dev->rx_pkt_burst = NULL;
 	eth_dev->tx_pkt_burst = NULL;
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 9883fb506..24a5a0914 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -964,8 +964,6 @@ int bnxt_hwrm_func_driver_unregister(struct bnxt *bp, uint32_t flags)
 	HWRM_CHECK_RESULT();
 	HWRM_UNLOCK();
 
-	bp->flags &= ~BNXT_FLAG_REGISTERED;
-
 	return rc;
 }
 
diff --git a/drivers/net/bnxt/bnxt_ring.c b/drivers/net/bnxt/bnxt_ring.c
index be15b4bd1..f19865c83 100644
--- a/drivers/net/bnxt/bnxt_ring.c
+++ b/drivers/net/bnxt/bnxt_ring.c
@@ -50,6 +50,38 @@ int bnxt_init_ring_grps(struct bnxt *bp)
 	return 0;
 }
 
+int bnxt_alloc_ring_grps(struct bnxt *bp)
+{
+	if (bp->max_tx_rings == 0) {
+		PMD_DRV_LOG(ERR, "No TX rings available!\n");
+		return -EBUSY;
+	}
+
+	/* THOR does not support ring groups.
+	 * But we will use the array to save RSS context IDs.
+	 */
+	if (BNXT_CHIP_THOR(bp)) {
+		bp->max_ring_grps = BNXT_MAX_RSS_CTXTS_THOR;
+	} else if (bp->max_ring_grps < bp->rx_cp_nr_rings) {
+		/* 1 ring is for default completion ring */
+		PMD_DRV_LOG(ERR, "Insufficient resource: Ring Group\n");
+		return -ENOSPC;
+	}
+
+	if (BNXT_HAS_RING_GRPS(bp)) {
+		bp->grp_info = rte_zmalloc("bnxt_grp_info",
+					   sizeof(*bp->grp_info) *
+					   bp->max_ring_grps, 0);
+		if (!bp->grp_info) {
+			PMD_DRV_LOG(ERR,
+				    "Failed to alloc grp info tbl.\n");
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * Allocates a completion ring with vmem and stats optionally also allocating
  * a TX and/or RX ring.  Passing NULL as tx_ring_info and/or rx_ring_info
diff --git a/drivers/net/bnxt/bnxt_ring.h b/drivers/net/bnxt/bnxt_ring.h
index 04c7b04b8..a31d59ea3 100644
--- a/drivers/net/bnxt/bnxt_ring.h
+++ b/drivers/net/bnxt/bnxt_ring.h
@@ -67,6 +67,7 @@ struct bnxt_rx_ring_info;
 struct bnxt_cp_ring_info;
 void bnxt_free_ring(struct bnxt_ring *ring);
 int bnxt_init_ring_grps(struct bnxt *bp);
+int bnxt_alloc_ring_grps(struct bnxt *bp);
 int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
 			    struct bnxt_tx_queue *txq,
 			    struct bnxt_rx_queue *rxq,
diff --git a/drivers/net/bnxt/bnxt_rxq.c b/drivers/net/bnxt/bnxt_rxq.c
index 1d95f1139..d5fc5268d 100644
--- a/drivers/net/bnxt/bnxt_rxq.c
+++ b/drivers/net/bnxt/bnxt_rxq.c
@@ -263,6 +263,9 @@ void bnxt_rx_queue_release_op(void *rx_queue)
 	struct bnxt_rx_queue *rxq = (struct bnxt_rx_queue *)rx_queue;
 
 	if (rxq) {
+		if (is_bnxt_in_error(rxq->bp))
+			return;
+
 		bnxt_rx_queue_release_mbufs(rxq);
 
 		/* Free RX ring hardware descriptors */
@@ -294,6 +297,10 @@ int bnxt_rx_queue_setup_op(struct rte_eth_dev *eth_dev,
 	int rc = 0;
 	uint8_t queue_state;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (queue_idx >= bp->max_rx_rings) {
 		PMD_DRV_LOG(ERR,
 			"Cannot create Rx ring %d. Only %d rings available\n",
@@ -363,10 +370,15 @@ int bnxt_rx_queue_setup_op(struct rte_eth_dev *eth_dev,
 int
 bnxt_rx_queue_intr_enable_op(struct rte_eth_dev *eth_dev, uint16_t queue_id)
 {
+	struct bnxt *bp = eth_dev->data->dev_private;
 	struct bnxt_rx_queue *rxq;
 	struct bnxt_cp_ring_info *cpr;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (eth_dev->data->rx_queues) {
 		rxq = eth_dev->data->rx_queues[queue_id];
 		if (!rxq) {
@@ -382,10 +394,15 @@ bnxt_rx_queue_intr_enable_op(struct rte_eth_dev *eth_dev, uint16_t queue_id)
 int
 bnxt_rx_queue_intr_disable_op(struct rte_eth_dev *eth_dev, uint16_t queue_id)
 {
+	struct bnxt *bp = eth_dev->data->dev_private;
 	struct bnxt_rx_queue *rxq;
 	struct bnxt_cp_ring_info *cpr;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (eth_dev->data->rx_queues) {
 		rxq = eth_dev->data->rx_queues[queue_id];
 		if (!rxq) {
@@ -406,6 +423,10 @@ int bnxt_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 	struct bnxt_vnic_info *vnic = NULL;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (rxq == NULL) {
 		PMD_DRV_LOG(ERR, "Invalid Rx queue %d\n", rx_queue_id);
 		return -EINVAL;
@@ -458,6 +479,10 @@ int bnxt_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 	struct bnxt_rx_queue *rxq = NULL;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	/* For the stingray platform and other platforms needing tighter
 	 * control of resource utilization, Rx CQ 0 also works as
 	 * Default CQ for async notifications
diff --git a/drivers/net/bnxt/bnxt_rxr.c b/drivers/net/bnxt/bnxt_rxr.c
index 185a0e376..12313dd53 100644
--- a/drivers/net/bnxt/bnxt_rxr.c
+++ b/drivers/net/bnxt/bnxt_rxr.c
@@ -539,6 +539,9 @@ uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	int rc = 0;
 	bool evt = false;
 
+	if (unlikely(is_bnxt_in_error(rxq->bp)))
+		return 0;
+
 	/* If Rx Q was stopped return. RxQ0 cannot be stopped. */
 	if (unlikely(((rxq->rx_deferred_start ||
 		       !rte_spinlock_trylock(&rxq->lock)) &&
@@ -625,6 +628,20 @@ uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	return nb_rx_pkts;
 }
 
+/*
+ * Dummy DPDK callback for RX.
+ *
+ * This function is used to temporarily replace the real callback during
+ * unsafe control operations on the queue, or in case of error.
+ */
+uint16_t
+bnxt_dummy_recv_pkts(void *rx_queue __rte_unused,
+		     struct rte_mbuf **rx_pkts __rte_unused,
+		     uint16_t nb_pkts __rte_unused)
+{
+	return 0;
+}
+
 void bnxt_free_rx_rings(struct bnxt *bp)
 {
 	int i;
diff --git a/drivers/net/bnxt/bnxt_rxr.h b/drivers/net/bnxt/bnxt_rxr.h
index 6a80c37c8..493b75406 100644
--- a/drivers/net/bnxt/bnxt_rxr.h
+++ b/drivers/net/bnxt/bnxt_rxr.h
@@ -185,6 +185,8 @@ struct bnxt_rx_ring_info {
 
 uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			       uint16_t nb_pkts);
+uint16_t bnxt_dummy_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+			      uint16_t nb_pkts);
 void bnxt_free_rx_rings(struct bnxt *bp);
 int bnxt_init_rx_ring_struct(struct bnxt_rx_queue *rxq, unsigned int socket_id);
 int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq);
diff --git a/drivers/net/bnxt/bnxt_stats.c b/drivers/net/bnxt/bnxt_stats.c
index 69ac2dd91..79f23746c 100644
--- a/drivers/net/bnxt/bnxt_stats.c
+++ b/drivers/net/bnxt/bnxt_stats.c
@@ -353,6 +353,10 @@ int bnxt_stats_get_op(struct rte_eth_dev *eth_dev,
 	struct bnxt *bp = eth_dev->data->dev_private;
 	unsigned int num_q_stats;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	memset(bnxt_stats, 0, sizeof(*bnxt_stats));
 	if (!(bp->flags & BNXT_FLAG_INIT_DONE)) {
 		PMD_DRV_LOG(ERR, "Device Initialization not complete!\n");
@@ -397,6 +401,9 @@ void bnxt_stats_reset_op(struct rte_eth_dev *eth_dev)
 	struct bnxt *bp = eth_dev->data->dev_private;
 	unsigned int i;
 
+	if (is_bnxt_in_error(bp))
+		return;
+
 	if (!(bp->flags & BNXT_FLAG_INIT_DONE)) {
 		PMD_DRV_LOG(ERR, "Device Initialization not complete!\n");
 		return;
@@ -414,13 +421,17 @@ int bnxt_dev_xstats_get_op(struct rte_eth_dev *eth_dev,
 			   struct rte_eth_xstat *xstats, unsigned int n)
 {
 	struct bnxt *bp = eth_dev->data->dev_private;
-
 	unsigned int count, i;
 	uint64_t tx_drop_pkts;
 	unsigned int rx_port_stats_ext_cnt;
 	unsigned int tx_port_stats_ext_cnt;
 	unsigned int stat_size = sizeof(uint64_t);
 	unsigned int stat_count;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	memset(xstats, 0, sizeof(*xstats));
 
@@ -499,7 +510,13 @@ int bnxt_dev_xstats_get_names_op(__rte_unused struct rte_eth_dev *eth_dev,
 				RTE_DIM(bnxt_tx_stats_strings) + 1 +
 				RTE_DIM(bnxt_rx_ext_stats_strings) +
 				RTE_DIM(bnxt_tx_ext_stats_strings);
+	struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
 	unsigned int i, count;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	if (xstats_names != NULL) {
 		count = 0;
@@ -547,6 +564,9 @@ void bnxt_dev_xstats_reset_op(struct rte_eth_dev *eth_dev)
 {
 	struct bnxt *bp = eth_dev->data->dev_private;
 
+	if (is_bnxt_in_error(bp))
+		return;
+
 	if (bp->flags & BNXT_FLAG_PORT_STATS && BNXT_SINGLE_PF(bp))
 		bnxt_hwrm_port_clr_stats(bp);
 
@@ -566,9 +586,15 @@ int bnxt_dev_xstats_get_by_id_op(struct rte_eth_dev *dev, const uint64_t *ids,
 				RTE_DIM(bnxt_tx_stats_strings) + 1 +
 				RTE_DIM(bnxt_rx_ext_stats_strings) +
 				RTE_DIM(bnxt_tx_ext_stats_strings);
+	struct bnxt *bp = dev->data->dev_private;
 	struct rte_eth_xstat xstats[stat_cnt];
 	uint64_t values_copy[stat_cnt];
 	uint16_t i;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	if (!ids)
 		return bnxt_dev_xstats_get_op(dev, xstats, stat_cnt);
@@ -594,7 +620,13 @@ int bnxt_dev_xstats_get_names_by_id_op(struct rte_eth_dev *dev,
 				RTE_DIM(bnxt_rx_ext_stats_strings) +
 				RTE_DIM(bnxt_tx_ext_stats_strings);
 	struct rte_eth_xstat_name xstats_names_copy[stat_cnt];
+	struct bnxt *bp = dev->data->dev_private;
 	uint16_t i;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	if (!ids)
 		return bnxt_dev_xstats_get_names_op(dev, xstats_names,
diff --git a/drivers/net/bnxt/bnxt_txq.c b/drivers/net/bnxt/bnxt_txq.c
index 43b3496c1..090132479 100644
--- a/drivers/net/bnxt/bnxt_txq.c
+++ b/drivers/net/bnxt/bnxt_txq.c
@@ -58,6 +58,9 @@ void bnxt_tx_queue_release_op(void *tx_queue)
 	struct bnxt_tx_queue *txq = (struct bnxt_tx_queue *)tx_queue;
 
 	if (txq) {
+		if (is_bnxt_in_error(txq->bp))
+			return;
+
 		/* Free TX ring hardware descriptors */
 		bnxt_tx_queue_release_mbufs(txq);
 		bnxt_free_ring(txq->tx_ring->tx_ring_struct);
@@ -84,6 +87,10 @@ int bnxt_tx_queue_setup_op(struct rte_eth_dev *eth_dev,
 	struct bnxt_tx_queue *txq;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (queue_idx >= bp->max_tx_rings) {
 		PMD_DRV_LOG(ERR,
 			"Cannot create Tx ring %d. Only %d rings available\n",
diff --git a/drivers/net/bnxt/bnxt_txr.c b/drivers/net/bnxt/bnxt_txr.c
index c71e6f189..35e7166be 100644
--- a/drivers/net/bnxt/bnxt_txr.c
+++ b/drivers/net/bnxt/bnxt_txr.c
@@ -148,6 +148,9 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
 		TX_BD_LONG_FLAGS_LHINT_LT2K
 	};
 
+	if (unlikely(is_bnxt_in_error(txq->bp)))
+		return -EIO;
+
 	if (tx_pkt->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM |
 				PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM |
 				PKT_TX_VLAN_PKT | PKT_TX_OUTER_IP_CKSUM |
@@ -485,10 +488,29 @@ uint16_t bnxt_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	return nb_tx_pkts;
 }
 
+/*
+ * Dummy DPDK callback for TX.
+ *
+ * This function is used to temporarily replace the real callback during
+ * unsafe control operations on the queue, or in case of error.
+ */
+uint16_t
+bnxt_dummy_xmit_pkts(void *tx_queue __rte_unused,
+		     struct rte_mbuf **tx_pkts __rte_unused,
+		     uint16_t nb_pkts __rte_unused)
+{
+	return 0;
+}
+
 int bnxt_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 {
 	struct bnxt *bp = dev->data->dev_private;
 	struct bnxt_tx_queue *txq = bp->tx_queues[tx_queue_id];
+	int rc = 0;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
 	txq->tx_deferred_start = false;
@@ -501,6 +523,11 @@ int bnxt_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 {
 	struct bnxt *bp = dev->data->dev_private;
 	struct bnxt_tx_queue *txq = bp->tx_queues[tx_queue_id];
+	int rc = 0;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	/* Handle TX completions */
 	bnxt_handle_tx_cp(txq);
diff --git a/drivers/net/bnxt/bnxt_txr.h b/drivers/net/bnxt/bnxt_txr.h
index 08fd2e014..e7f43f9d1 100644
--- a/drivers/net/bnxt/bnxt_txr.h
+++ b/drivers/net/bnxt/bnxt_txr.h
@@ -57,6 +57,8 @@ int bnxt_init_one_tx_ring(struct bnxt_tx_queue *txq);
 int bnxt_init_tx_ring_struct(struct bnxt_tx_queue *txq, unsigned int socket_id);
 uint16_t bnxt_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			       uint16_t nb_pkts);
+uint16_t bnxt_dummy_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+			      uint16_t nb_pkts);
 #ifdef RTE_ARCH_X86
 uint16_t bnxt_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 			    uint16_t nb_pkts);
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH 03/13] net/bnxt: handle reset notify async event from FW
  2019-08-22  5:53 [dpdk-dev] [PATCH 00/13] bnxt patchset to support device error recovery Ajit Khaparde
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 01/13] net/bnxt: hsi version update Ajit Khaparde
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 02/13] net/bnxt: prevent device access when device is in reset Ajit Khaparde
@ 2019-08-22  5:53 ` Ajit Khaparde
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 04/13] net/bnxt: inform firmware about IF state changes Ajit Khaparde
                   ` (9 subsequent siblings)
  12 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-22  5:53 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

When the FW upgrade is initiated the current instance
of FW issues a HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY
async notification to the driver. On receiving this notification,
the PMD shall quiesce itself and poll on the HWRM_VER_GET FW
command at regular intervals.

Once the VER_GET command succeeds, the driver should go through
the rediscovery process and re-initialize the device.

Also register with FW for the reset notify async event.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
---
 drivers/net/bnxt/bnxt.h        |  15 +++++
 drivers/net/bnxt/bnxt_cpr.c    |  14 +++++
 drivers/net/bnxt/bnxt_cpr.h    |   1 +
 drivers/net/bnxt/bnxt_ethdev.c | 110 ++++++++++++++++++++++++++++-----
 drivers/net/bnxt/bnxt_hwrm.c   |  39 +++++++++---
 drivers/net/bnxt/bnxt_hwrm.h   |   2 +
 6 files changed, 158 insertions(+), 23 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 49418cac9..8797b032e 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -333,6 +333,16 @@ struct bnxt_ctx_mem_info {
 	struct bnxt_ctx_pg_info *tqm_mem[BNXT_MAX_TC_Q];
 };
 
+/* Maximum Firmware Reset bail out value in milliseconds */
+#define BNXT_MAX_FW_RESET_TIMEOUT	6000
+/* Minimum time required for the firmware readiness in milliseconds */
+#define BNXT_MIN_FW_READY_TIMEOUT	2000
+/* Frequency for the firmware readiness check in milliseconds */
+#define BNXT_FW_READY_WAIT_INTERVAL	100
+
+#define US_PER_MS			1000
+#define NS_PER_US			1000
+
 #define BNXT_HWRM_SHORT_REQ_LEN		sizeof(struct hwrm_short_input)
 struct bnxt {
 	void				*bar0;
@@ -358,6 +368,8 @@ struct bnxt {
 #define BNXT_FLAG_DFLT_VNIC_SET	(1 << 12)
 #define BNXT_FLAG_THOR_CHIP	(1 << 13)
 #define BNXT_FLAG_STINGRAY	(1 << 14)
+#define BNXT_FLAG_FW_RESET	(1 << 15)
+#define BNXT_FLAG_FATAL_ERROR	(1 << 16)
 #define BNXT_FLAG_EXT_STATS_SUPPORTED	(1 << 29)
 #define BNXT_FLAG_NEW_RM	(1 << 30)
 #define BNXT_FLAG_INIT_DONE	(1U << 31)
@@ -461,6 +473,9 @@ struct bnxt {
 	struct bnxt_ptp_cfg     *ptp_cfg;
 	uint16_t		vf_resv_strategy;
 	struct bnxt_ctx_mem_info        *ctx;
+
+	uint16_t		fw_reset_min_msecs;
+	uint16_t		fw_reset_max_msecs;
 };
 
 int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete);
diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index 655bcf1a8..cefb5db2a 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -40,6 +40,20 @@ void bnxt_handle_async_event(struct bnxt *bp,
 	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED:
 		PMD_DRV_LOG(INFO, "Port conn async event\n");
 		break;
+	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY:
+		/* timestamp_lo/hi values are in units of 100ms */
+		bp->fw_reset_max_msecs = async_cmp->timestamp_hi ?
+			rte_le_to_cpu_16(async_cmp->timestamp_hi) * 100 :
+			BNXT_MAX_FW_RESET_TIMEOUT;
+		bp->fw_reset_min_msecs = async_cmp->timestamp_lo ?
+			async_cmp->timestamp_lo * 100 :
+			BNXT_MIN_FW_READY_TIMEOUT;
+		PMD_DRV_LOG(INFO,
+			    "Firmware non-fatal reset event received\n");
+
+		bp->flags |= BNXT_FLAG_FW_RESET;
+		bnxt_dev_reset_and_resume(bp);
+		break;
 	default:
 		PMD_DRV_LOG(INFO, "handle_async_event id = 0x%x\n", event_id);
 		break;
diff --git a/drivers/net/bnxt/bnxt_cpr.h b/drivers/net/bnxt/bnxt_cpr.h
index 8c6a34b61..4f86e3f60 100644
--- a/drivers/net/bnxt/bnxt_cpr.h
+++ b/drivers/net/bnxt/bnxt_cpr.h
@@ -106,5 +106,6 @@ struct bnxt;
 void bnxt_handle_async_event(struct bnxt *bp, struct cmpl_base *cmp);
 void bnxt_handle_fwd_req(struct bnxt *bp, struct cmpl_base *cmp);
 int bnxt_event_hwrm_resp_handler(struct bnxt *bp, struct cmpl_base *cmp);
+int bnxt_dev_reset_and_resume(struct bnxt *bp);
 
 #endif
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 33ff4a5a7..1aef227f2 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -11,6 +11,7 @@
 #include <rte_ethdev_pci.h>
 #include <rte_malloc.h>
 #include <rte_cycles.h>
+#include <rte_alarm.h>
 
 #include "bnxt.h"
 #include "bnxt_cpr.h"
@@ -166,6 +167,8 @@ static int bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask);
 static void bnxt_print_link_info(struct rte_eth_dev *eth_dev);
 static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu);
 static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev);
+static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev);
+static int bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev);
 
 int is_bnxt_in_error(struct bnxt *bp)
 {
@@ -201,19 +204,25 @@ static uint16_t  bnxt_rss_hash_tbl_size(const struct bnxt *bp)
 	return bnxt_rss_ctxts(bp) * BNXT_RSS_ENTRIES_PER_CTX_THOR;
 }
 
-static void bnxt_free_mem(struct bnxt *bp)
+static void bnxt_free_mem(struct bnxt *bp, bool reconfig)
 {
 	bnxt_free_filter_mem(bp);
 	bnxt_free_vnic_attributes(bp);
 	bnxt_free_vnic_mem(bp);
 
-	bnxt_free_stats(bp);
-	bnxt_free_tx_rings(bp);
-	bnxt_free_rx_rings(bp);
+	/* tx/rx rings are configured as part of *_queue_setup callbacks.
+	 * If the number of rings change across fw update,
+	 * we don't have much choice except to warn the user.
+	 */
+	if (!reconfig) {
+		bnxt_free_stats(bp);
+		bnxt_free_tx_rings(bp);
+		bnxt_free_rx_rings(bp);
+	}
 	bnxt_free_async_cp_ring(bp);
 }
 
-static int bnxt_alloc_mem(struct bnxt *bp)
+static int bnxt_alloc_mem(struct bnxt *bp, bool reconfig)
 {
 	int rc;
 
@@ -244,7 +253,7 @@ static int bnxt_alloc_mem(struct bnxt *bp)
 	return 0;
 
 alloc_mem_err:
-	bnxt_free_mem(bp);
+	bnxt_free_mem(bp, reconfig);
 	return rc;
 }
 
@@ -3483,6 +3492,72 @@ static const struct eth_dev_ops bnxt_dev_ops = {
 	.timesync_read_tx_timestamp = bnxt_timesync_read_tx_timestamp,
 };
 
+static void bnxt_dev_cleanup(struct bnxt *bp)
+{
+	bnxt_set_hwrm_link_config(bp, false);
+	bp->link_info.link_up = 0;
+	if (bp->dev_stopped == 0)
+		bnxt_dev_stop_op(bp->eth_dev);
+
+	bnxt_uninit_resources(bp, true);
+}
+
+static void bnxt_dev_recover(void *arg)
+{
+	struct bnxt *bp = arg;
+	int timeout = bp->fw_reset_max_msecs;
+	int rc = 0;
+
+	do {
+		rc = bnxt_hwrm_ver_get(bp);
+		if (rc == 0)
+			break;
+		rte_delay_ms(BNXT_FW_READY_WAIT_INTERVAL);
+		timeout -= BNXT_FW_READY_WAIT_INTERVAL;
+	} while (rc && timeout);
+
+	if (rc) {
+		PMD_DRV_LOG(ERR, "FW is not Ready after reset\n");
+		goto err;
+	}
+
+	rc = bnxt_init_resources(bp, true);
+	if (rc) {
+		PMD_DRV_LOG(ERR,
+			    "Failed to initialize resources after reset\n");
+		goto err;
+	}
+	/* clear reset flag as the device is initialized now */
+	bp->flags &= ~BNXT_FLAG_FW_RESET;
+
+	rc = bnxt_dev_start_op(bp->eth_dev);
+	if (rc) {
+		PMD_DRV_LOG(ERR, "Failed to start port after reset\n");
+		goto err;
+	}
+
+	PMD_DRV_LOG(INFO, "Recovered from FW reset\n");
+	return;
+err:
+	bp->flags |= BNXT_FLAG_FATAL_ERROR;
+	bnxt_uninit_resources(bp, false);
+	PMD_DRV_LOG(ERR, "Failed to recover from FW reset\n");
+}
+
+int bnxt_dev_reset_and_resume(struct bnxt *bp)
+{
+	int rc;
+
+	bnxt_dev_cleanup(bp);
+
+	rc = rte_eal_alarm_set(US_PER_MS * bp->fw_reset_min_msecs,
+			       bnxt_dev_recover, (void *)bp);
+	if (rc)
+		PMD_DRV_LOG(ERR, "Error setting recovery alarm");
+
+	return rc;
+}
+
 static bool bnxt_vf_pciid(uint16_t id)
 {
 	if (id == BROADCOM_DEV_ID_57304_VF ||
@@ -3998,7 +4073,7 @@ static int bnxt_init_fw(struct bnxt *bp)
 	return 0;
 }
 
-static int bnxt_init_resources(struct bnxt *bp)
+static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev)
 {
 	int rc;
 
@@ -4006,9 +4081,11 @@ static int bnxt_init_resources(struct bnxt *bp)
 	if (rc)
 		return rc;
 
-	rc = bnxt_setup_mac_addr(bp->eth_dev);
-	if (rc)
-		return rc;
+	if (!reconfig_dev) {
+		rc = bnxt_setup_mac_addr(bp->eth_dev);
+		if (rc)
+			return rc;
+	}
 
 	bnxt_config_vf_req_fwd(bp);
 
@@ -4035,7 +4112,7 @@ static int bnxt_init_resources(struct bnxt *bp)
 		}
 	}
 
-	rc = bnxt_alloc_mem(bp);
+	rc = bnxt_alloc_mem(bp, reconfig_dev);
 	if (rc)
 		return rc;
 
@@ -4109,7 +4186,7 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
 			    "Failed to allocate hwrm resource rc: %x\n", rc);
 		goto error_free;
 	}
-	rc = bnxt_init_resources(bp);
+	rc = bnxt_init_resources(bp, false);
 	if (rc)
 		goto error_free;
 
@@ -4130,18 +4207,19 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
 }
 
 static int
-bnxt_uninit_resources(struct bnxt *bp)
+bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev)
 {
 	int rc;
 
 	bnxt_disable_int(bp);
 	bnxt_free_int(bp);
-	bnxt_free_mem(bp);
+	bnxt_free_mem(bp, reconfig_dev);
 	bnxt_hwrm_func_buf_unrgtr(bp);
 	rc = bnxt_hwrm_func_driver_unregister(bp, 0);
 	bp->flags &= ~BNXT_FLAG_REGISTERED;
 	bnxt_free_ctx_mem(bp);
-	bnxt_free_hwrm_resources(bp);
+	if (!reconfig_dev)
+		bnxt_free_hwrm_resources(bp);
 
 	return rc;
 }
@@ -4157,7 +4235,7 @@ bnxt_dev_uninit(struct rte_eth_dev *eth_dev)
 
 	PMD_DRV_LOG(DEBUG, "Calling Device uninit\n");
 
-	rc = bnxt_uninit_resources(bp);
+	rc = bnxt_uninit_resources(bp, false);
 
 	if (bp->grp_info != NULL) {
 		rte_free(bp->grp_info);
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 24a5a0914..b27dbe87e 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -26,7 +26,7 @@
 
 #include <rte_io.h>
 
-#define HWRM_CMD_TIMEOUT		6000000
+#define HWRM_SHORT_CMD_TIMEOUT		50000
 #define HWRM_SPEC_CODE_1_8_3		0x10803
 #define HWRM_VERSION_1_9_1		0x10901
 #define HWRM_VERSION_1_9_2		0x10903
@@ -97,6 +97,14 @@ static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg,
 		GRCPF_REG_KONG_CHANNEL_OFFSET : GRCPF_REG_CHIMP_CHANNEL_OFFSET;
 	uint16_t mb_trigger_offset = use_kong_mb ?
 		GRCPF_REG_KONG_COMM_TRIGGER : GRCPF_REG_CHIMP_COMM_TRIGGER;
+	uint32_t timeout;
+
+	/* Do not send HWRM commands to firmware in error state */
+	if (bp->flags & BNXT_FLAG_FATAL_ERROR)
+		return 0;
+
+	/* For VER_GET command, set timeout as 50ms */
+	timeout = HWRM_SHORT_CMD_TIMEOUT;
 
 	if (bp->flags & BNXT_FLAG_SHORT_CMD ||
 	    msg_len > bp->max_req_len) {
@@ -139,7 +147,7 @@ static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg,
 	rte_write32(1, bar);
 
 	/* Poll for the valid bit */
-	for (i = 0; i < HWRM_CMD_TIMEOUT; i++) {
+	for (i = 0; i < timeout; i++) {
 		/* Sanity check on the resp->resp_len */
 		rte_rmb();
 		if (resp->resp_len && resp->resp_len <= bp->max_resp_len) {
@@ -151,7 +159,12 @@ static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg,
 		rte_delay_us(1);
 	}
 
-	if (i >= HWRM_CMD_TIMEOUT) {
+	if (i >= timeout) {
+		/* Suppress VER_GET timeout messages during reset recovery */
+		if (bp->flags & BNXT_FLAG_FW_RESET &&
+		    rte_cpu_to_le_16(req->req_type) == HWRM_VER_GET)
+			return -ETIMEDOUT;
+
 		PMD_DRV_LOG(ERR, "Error(timeout) sending msg 0x%04x\n",
 			    req->req_type);
 		return -ETIMEDOUT;
@@ -657,12 +670,15 @@ int bnxt_hwrm_func_reset(struct bnxt *bp)
 int bnxt_hwrm_func_driver_register(struct bnxt *bp)
 {
 	int rc;
+	uint32_t flags = 0;
 	struct hwrm_func_drv_rgtr_input req = {.req_type = 0 };
 	struct hwrm_func_drv_rgtr_output *resp = bp->hwrm_cmd_resp_addr;
 
 	if (bp->flags & BNXT_FLAG_REGISTERED)
 		return 0;
 
+	flags = HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_HOT_RESET_SUPPORT;
+
 	HWRM_PREP(req, FUNC_DRV_RGTR, BNXT_USE_CHIMP_MB);
 	req.enables = rte_cpu_to_le_32(HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_VER |
 			HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_ASYNC_EVENT_FWD);
@@ -683,14 +699,16 @@ int bnxt_hwrm_func_driver_register(struct bnxt *bp)
 		 * this HWRM sniffer list in FW because DPDK PF driver does
 		 * not support this.
 		 */
-		req.flags =
-		rte_cpu_to_le_32(HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_FWD_NONE_MODE);
+		flags |= HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_FWD_NONE_MODE;
 	}
 
+	req.flags = rte_cpu_to_le_32(flags);
+
 	req.async_event_fwd[0] |=
 		rte_cpu_to_le_32(ASYNC_CMPL_EVENT_ID_LINK_STATUS_CHANGE |
 				 ASYNC_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED |
-				 ASYNC_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE);
+				 ASYNC_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE |
+				 ASYNC_CMPL_EVENT_ID_RESET_NOTIFY);
 	req.async_event_fwd[1] |=
 		rte_cpu_to_le_32(ASYNC_CMPL_EVENT_ID_PF_DRVR_UNLOAD |
 				 ASYNC_CMPL_EVENT_ID_VF_CFG_CHANGE);
@@ -837,7 +855,10 @@ int bnxt_hwrm_ver_get(struct bnxt *bp)
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
-	HWRM_CHECK_RESULT();
+	if (bp->flags & BNXT_FLAG_FW_RESET)
+		HWRM_CHECK_RESULT_SILENT();
+	else
+		HWRM_CHECK_RESULT();
 
 	PMD_DRV_LOG(INFO, "%d.%d.%d:%d.%d.%d\n",
 		resp->hwrm_intf_maj_8b, resp->hwrm_intf_min_8b,
@@ -2685,6 +2706,10 @@ int bnxt_hwrm_func_qcfg(struct bnxt *bp, uint16_t *mtu)
 	if (BNXT_VF(bp) && (flags & HWRM_FUNC_QCFG_OUTPUT_FLAGS_TRUSTED_VF)) {
 		bp->flags |= BNXT_FLAG_TRUSTED_VF_EN;
 		PMD_DRV_LOG(INFO, "Trusted VF cap enabled\n");
+	} else if (BNXT_VF(bp) &&
+		   !(flags & HWRM_FUNC_QCFG_OUTPUT_FLAGS_TRUSTED_VF)) {
+		bp->flags &= ~BNXT_FLAG_TRUSTED_VF_EN;
+		PMD_DRV_LOG(INFO, "Trusted VF cap disabled\n");
 	}
 
 	if (mtu)
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index c882fc2a1..a03620532 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -21,6 +21,8 @@ struct bnxt_cp_ring_info;
 	(1 << HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED)
 #define ASYNC_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE	\
 	(1 << HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE)
+#define ASYNC_CMPL_EVENT_ID_RESET_NOTIFY \
+	(1 << HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY)
 #define ASYNC_CMPL_EVENT_ID_PF_DRVR_UNLOAD	\
 	(1 << (HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD - 32))
 #define ASYNC_CMPL_EVENT_ID_VF_CFG_CHANGE	\
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH 04/13] net/bnxt: inform firmware about IF state changes
  2019-08-22  5:53 [dpdk-dev] [PATCH 00/13] bnxt patchset to support device error recovery Ajit Khaparde
                   ` (2 preceding siblings ...)
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 03/13] net/bnxt: handle reset notify async event from FW Ajit Khaparde
@ 2019-08-22  5:53 ` Ajit Khaparde
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 05/13] net/bnxt: handle fatal event from FW under error conditions Ajit Khaparde
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-22  5:53 UTC (permalink / raw)
  To: dev
  Cc: ferruh.yigit, Kalesh AP, Santoshkumar Karanappa Rastapur, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

Use latest firmware API to inform firmware about IF state changes.
Firmware has the option to clean up resources during IF down and
to require the driver to reserve resources again during IF up.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Santoshkumar Karanappa Rastapur <santosh.rastapur@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt.h        |  1 +
 drivers/net/bnxt/bnxt_ethdev.c |  4 ++++
 drivers/net/bnxt/bnxt_hwrm.c   | 35 ++++++++++++++++++++++++++++++++++
 drivers/net/bnxt/bnxt_hwrm.h   |  1 +
 4 files changed, 41 insertions(+)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 8797b032e..394a2a941 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -370,6 +370,7 @@ struct bnxt {
 #define BNXT_FLAG_STINGRAY	(1 << 14)
 #define BNXT_FLAG_FW_RESET	(1 << 15)
 #define BNXT_FLAG_FATAL_ERROR	(1 << 16)
+#define BNXT_FLAG_FW_CAP_IF_CHANGE	(1 << 17)
 #define BNXT_FLAG_EXT_STATS_SUPPORTED	(1 << 29)
 #define BNXT_FLAG_NEW_RM	(1 << 30)
 #define BNXT_FLAG_INIT_DONE	(1U << 31)
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 1aef227f2..f7b2ef179 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -803,6 +803,8 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
 			bp->rx_cp_nr_rings, RTE_ETHDEV_QUEUE_STAT_CNTRS);
 	}
 
+	bnxt_hwrm_if_change(bp, 1);
+
 	rc = bnxt_init_chip(bp);
 	if (rc)
 		goto error;
@@ -829,6 +831,7 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
 	return 0;
 
 error:
+	bnxt_hwrm_if_change(bp, 0);
 	bnxt_shutdown_nic(bp);
 	bnxt_free_tx_mbufs(bp);
 	bnxt_free_rx_mbufs(bp);
@@ -895,6 +898,7 @@ static void bnxt_dev_stop_op(struct rte_eth_dev *eth_dev)
 	bnxt_free_tx_mbufs(bp);
 	bnxt_free_rx_mbufs(bp);
 	bnxt_shutdown_nic(bp);
+	bnxt_hwrm_if_change(bp, 0);
 	bp->dev_stopped = 1;
 }
 
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index b27dbe87e..17c7b5e9e 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -716,6 +716,11 @@ int bnxt_hwrm_func_driver_register(struct bnxt *bp)
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
 	HWRM_CHECK_RESULT();
+
+	flags = rte_le_to_cpu_32(resp->flags);
+	if (flags & HWRM_FUNC_DRV_RGTR_OUTPUT_FLAGS_IF_CHANGE_SUPPORTED)
+		bp->flags |= BNXT_FLAG_FW_CAP_IF_CHANGE;
+
 	HWRM_UNLOCK();
 
 	bp->flags |= BNXT_FLAG_REGISTERED;
@@ -4649,3 +4654,33 @@ int bnxt_hwrm_set_mac(struct bnxt *bp)
 
 	return rc;
 }
+
+int bnxt_hwrm_if_change(struct bnxt *bp, bool state)
+{
+	struct hwrm_func_drv_if_change_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_func_drv_if_change_input req = {0};
+	int rc;
+
+	if (!(bp->flags & BNXT_FLAG_FW_CAP_IF_CHANGE))
+		return 0;
+
+	/* Do not issue FUNC_DRV_IF_CHANGE during reset recovery.
+	 * If we issue FUNC_DRV_IF_CHANGE with flags down before
+	 * FUNC_DRV_UNRGTR, FW resets before FUNC_DRV_UNRGTR
+	 */
+	if (!state && (bp->flags & BNXT_FLAG_FW_RESET))
+		return 0;
+
+	HWRM_PREP(req, FUNC_DRV_IF_CHANGE, BNXT_USE_CHIMP_MB);
+
+	if (state)
+		req.flags =
+		rte_cpu_to_le_32(HWRM_FUNC_DRV_IF_CHANGE_INPUT_FLAGS_UP);
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
+
+	return rc;
+}
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index a03620532..2f57e950b 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -201,4 +201,5 @@ int bnxt_hwrm_tunnel_redirect_query(struct bnxt *bp, uint32_t *type);
 int bnxt_hwrm_tunnel_redirect_info(struct bnxt *bp, uint8_t tun_type,
 				   uint16_t *dst_fid);
 int bnxt_hwrm_set_mac(struct bnxt *bp);
+int bnxt_hwrm_if_change(struct bnxt *bp, bool state);
 #endif
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH 05/13] net/bnxt: handle fatal event from FW under error conditions
  2019-08-22  5:53 [dpdk-dev] [PATCH 00/13] bnxt patchset to support device error recovery Ajit Khaparde
                   ` (3 preceding siblings ...)
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 04/13] net/bnxt: inform firmware about IF state changes Ajit Khaparde
@ 2019-08-22  5:53 ` Ajit Khaparde
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 06/13] net/bnxt: query firmware error recovery capabilities Ajit Khaparde
                   ` (7 subsequent siblings)
  12 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-22  5:53 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

When firmware hit some unrecoverable error conditions, firmware initiate
the recovery by sending an async event EVENT_CMPL_EVENT_ID_RESET_NOTIFY
with data1 set to RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL
to all host drivers and will reset the chip.

The recovery procedure is same sequence as the one for hot FW upgrade.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt_cpr.c    | 13 +++++++++++--
 drivers/net/bnxt/bnxt_cpr.h    |  5 +++++
 drivers/net/bnxt/bnxt_ethdev.c |  3 +++
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index cefb5db2a..6e0b1d67e 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -20,6 +20,7 @@ void bnxt_handle_async_event(struct bnxt *bp,
 	struct hwrm_async_event_cmpl *async_cmp =
 				(struct hwrm_async_event_cmpl *)cmp;
 	uint16_t event_id = rte_le_to_cpu_16(async_cmp->event_id);
+	uint32_t event_data;
 
 	/* TODO: HWRM async events are not defined yet */
 	/* Needs to handle: link events, error events, etc. */
@@ -41,6 +42,7 @@ void bnxt_handle_async_event(struct bnxt *bp,
 		PMD_DRV_LOG(INFO, "Port conn async event\n");
 		break;
 	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY:
+		event_data = rte_le_to_cpu_32(async_cmp->event_data1);
 		/* timestamp_lo/hi values are in units of 100ms */
 		bp->fw_reset_max_msecs = async_cmp->timestamp_hi ?
 			rte_le_to_cpu_16(async_cmp->timestamp_hi) * 100 :
@@ -48,8 +50,15 @@ void bnxt_handle_async_event(struct bnxt *bp,
 		bp->fw_reset_min_msecs = async_cmp->timestamp_lo ?
 			async_cmp->timestamp_lo * 100 :
 			BNXT_MIN_FW_READY_TIMEOUT;
-		PMD_DRV_LOG(INFO,
-			    "Firmware non-fatal reset event received\n");
+		if ((event_data & EVENT_DATA1_REASON_CODE_MASK) ==
+		    EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL) {
+			PMD_DRV_LOG(INFO,
+				    "Firmware fatal reset event received\n");
+			bp->flags |= BNXT_FLAG_FATAL_ERROR;
+		} else {
+			PMD_DRV_LOG(INFO,
+				    "Firmware non-fatal reset event received\n");
+		}
 
 		bp->flags |= BNXT_FLAG_FW_RESET;
 		bnxt_dev_reset_and_resume(bp);
diff --git a/drivers/net/bnxt/bnxt_cpr.h b/drivers/net/bnxt/bnxt_cpr.h
index 4f86e3f60..4e63fd12f 100644
--- a/drivers/net/bnxt/bnxt_cpr.h
+++ b/drivers/net/bnxt/bnxt_cpr.h
@@ -108,4 +108,9 @@ void bnxt_handle_fwd_req(struct bnxt *bp, struct cmpl_base *cmp);
 int bnxt_event_hwrm_resp_handler(struct bnxt *bp, struct cmpl_base *cmp);
 int bnxt_dev_reset_and_resume(struct bnxt *bp);
 
+#define EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL     \
+	HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL
+#define EVENT_DATA1_REASON_CODE_MASK                   \
+	HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MASK
+
 #endif
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index f7b2ef179..a0b9e8f9e 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -3512,6 +3512,9 @@ static void bnxt_dev_recover(void *arg)
 	int timeout = bp->fw_reset_max_msecs;
 	int rc = 0;
 
+	/* Clear Error flag so that device re-init should happen */
+	bp->flags &= ~BNXT_FLAG_FATAL_ERROR;
+
 	do {
 		rc = bnxt_hwrm_ver_get(bp);
 		if (rc == 0)
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH 06/13] net/bnxt: query firmware error recovery capabilities
  2019-08-22  5:53 [dpdk-dev] [PATCH 00/13] bnxt patchset to support device error recovery Ajit Khaparde
                   ` (4 preceding siblings ...)
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 05/13] net/bnxt: handle fatal event from FW under error conditions Ajit Khaparde
@ 2019-08-22  5:53 ` Ajit Khaparde
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 07/13] net/bnxt: map status registers for FW health monitoring Ajit Khaparde
                   ` (6 subsequent siblings)
  12 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-22  5:53 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

In Driver initiated error recovery process, driver has to know about
the registers offset and values to initiate FW reset. The HWRM command
HWRM_ERROR_RECOVERY_QCFG is used to obtain all the registers and values
required to initiate FW reset. This command response includes
FW heart_beat register, health status register, Error counter register,
register offsets and values to do chip reset if firmware crashes and
becomes unresponsive.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt.h        | 27 +++++++++++
 drivers/net/bnxt/bnxt_ethdev.c | 10 ++++
 drivers/net/bnxt/bnxt_hwrm.c   | 89 ++++++++++++++++++++++++++++++++++
 drivers/net/bnxt/bnxt_hwrm.h   |  1 +
 4 files changed, 127 insertions(+)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 394a2a941..19bd13a7f 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -343,6 +343,29 @@ struct bnxt_ctx_mem_info {
 #define US_PER_MS			1000
 #define NS_PER_US			1000
 
+struct bnxt_error_recovery_info {
+	/* All units in milliseconds */
+	uint32_t	driver_polling_freq;
+	uint32_t	master_func_wait_period;
+	uint32_t	normal_func_wait_period;
+	uint32_t	master_func_wait_period_after_reset;
+	uint32_t	max_bailout_time_after_reset;
+#define BNXT_FW_STATUS_REG		0
+#define BNXT_FW_HEARTBEAT_CNT_REG	1
+#define BNXT_FW_RECOVERY_CNT_REG	2
+#define BNXT_FW_RESET_INPROG_REG	3
+	uint32_t	status_regs[4];
+	uint32_t	reset_inprogress_reg_mask;
+#define BNXT_NUM_RESET_REG	16
+	uint8_t		reg_array_cnt;
+	uint32_t	reset_reg[BNXT_NUM_RESET_REG];
+	uint32_t	reset_reg_val[BNXT_NUM_RESET_REG];
+	uint8_t		delay_after_reset[BNXT_NUM_RESET_REG];
+#define BNXT_FLAG_ERROR_RECOVERY_HOST	(1 << 0)
+#define BNXT_FLAG_ERROR_RECOVERY_CO_CPU	(1 << 1)
+	uint32_t	flags;
+};
+
 #define BNXT_HWRM_SHORT_REQ_LEN		sizeof(struct hwrm_short_input)
 struct bnxt {
 	void				*bar0;
@@ -371,6 +394,7 @@ struct bnxt {
 #define BNXT_FLAG_FW_RESET	(1 << 15)
 #define BNXT_FLAG_FATAL_ERROR	(1 << 16)
 #define BNXT_FLAG_FW_CAP_IF_CHANGE	(1 << 17)
+#define BNXT_FLAG_FW_CAP_ERROR_RECOVERY	(1 << 18)
 #define BNXT_FLAG_EXT_STATS_SUPPORTED	(1 << 29)
 #define BNXT_FLAG_NEW_RM	(1 << 30)
 #define BNXT_FLAG_INIT_DONE	(1U << 31)
@@ -477,6 +501,9 @@ struct bnxt {
 
 	uint16_t		fw_reset_min_msecs;
 	uint16_t		fw_reset_max_msecs;
+
+	/* Struct to hold adapter error recovery related info */
+	struct bnxt_error_recovery_info *recovery_info;
 };
 
 int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete);
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index a0b9e8f9e..18046c00a 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -4071,6 +4071,11 @@ static int bnxt_init_fw(struct bnxt *bp)
 	if (rc)
 		return rc;
 
+	/* Get the adapter error recovery support info */
+	rc = bnxt_hwrm_error_recovery_qcfg(bp);
+	if (rc)
+		bp->flags &= ~BNXT_FLAG_FW_CAP_ERROR_RECOVERY;
+
 	if (mtu >= RTE_ETHER_MIN_MTU && mtu <= BNXT_MAX_MTU &&
 	    mtu != bp->eth_dev->data->mtu)
 		bp->eth_dev->data->mtu = mtu;
@@ -4228,6 +4233,11 @@ bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev)
 	if (!reconfig_dev)
 		bnxt_free_hwrm_resources(bp);
 
+	if (bp->recovery_info != NULL) {
+		rte_free(bp->recovery_info);
+		bp->recovery_info = NULL;
+	}
+
 	return rc;
 }
 
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 17c7b5e9e..e2c993936 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -626,6 +626,13 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
 	if (flags & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT_STATS_SUPPORTED)
 		bp->flags |= BNXT_FLAG_EXT_STATS_SUPPORTED;
 
+	if (flags & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_ERROR_RECOVERY_CAPABLE) {
+		bp->flags |= BNXT_FLAG_FW_CAP_ERROR_RECOVERY;
+		PMD_DRV_LOG(DEBUG, "Adapter Error recovery SUPPORTED\n");
+	} else {
+		bp->flags &= ~BNXT_FLAG_FW_CAP_ERROR_RECOVERY;
+	}
+
 	HWRM_UNLOCK();
 
 	return rc;
@@ -4684,3 +4691,85 @@ int bnxt_hwrm_if_change(struct bnxt *bp, bool state)
 
 	return rc;
 }
+
+int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp)
+{
+	struct hwrm_error_recovery_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+	struct bnxt_error_recovery_info *info;
+	struct hwrm_error_recovery_qcfg_input req = {0};
+	uint32_t flags = 0;
+	unsigned int i;
+	int rc;
+
+	/* Older FW does not have error recovery support */
+	if (!(bp->flags & BNXT_FLAG_FW_CAP_ERROR_RECOVERY))
+		return 0;
+
+	info = rte_zmalloc("bnxt_hwrm_error_recovery_qcfg",
+			   sizeof(*info), 0);
+	bp->recovery_info = info;
+	if (info == NULL)
+		return -ENOMEM;
+
+	HWRM_PREP(req, ERROR_RECOVERY_QCFG, BNXT_USE_CHIMP_MB);
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
+
+	HWRM_CHECK_RESULT();
+
+	flags = rte_le_to_cpu_32(resp->flags);
+	if (flags & HWRM_ERROR_RECOVERY_QCFG_OUTPUT_FLAGS_HOST)
+		info->flags |= BNXT_FLAG_ERROR_RECOVERY_HOST;
+	else if (flags & HWRM_ERROR_RECOVERY_QCFG_OUTPUT_FLAGS_CO_CPU)
+		info->flags |= BNXT_FLAG_ERROR_RECOVERY_CO_CPU;
+
+	if ((info->flags & BNXT_FLAG_ERROR_RECOVERY_CO_CPU) &&
+	    !(bp->flags & BNXT_FLAG_KONG_MB_EN)) {
+		rc = -EINVAL;
+		goto err;
+	}
+
+	/* FW returned values are in units of 100msec */
+	info->driver_polling_freq =
+		rte_le_to_cpu_32(resp->driver_polling_freq) * 100;
+	info->master_func_wait_period =
+		rte_le_to_cpu_32(resp->master_func_wait_period) * 100;
+	info->normal_func_wait_period =
+		rte_le_to_cpu_32(resp->normal_func_wait_period) * 100;
+	info->master_func_wait_period_after_reset =
+		rte_le_to_cpu_32(resp->master_func_wait_period_after_reset) * 100;
+	info->max_bailout_time_after_reset =
+		rte_le_to_cpu_32(resp->max_bailout_time_after_reset) * 100;
+	info->status_regs[BNXT_FW_STATUS_REG] =
+		rte_le_to_cpu_32(resp->fw_health_status_reg);
+	info->status_regs[BNXT_FW_HEARTBEAT_CNT_REG] =
+		rte_le_to_cpu_32(resp->fw_heartbeat_reg);
+	info->status_regs[BNXT_FW_RECOVERY_CNT_REG] =
+		rte_le_to_cpu_32(resp->fw_reset_cnt_reg);
+	info->status_regs[BNXT_FW_RESET_INPROG_REG] =
+		rte_le_to_cpu_32(resp->reset_inprogress_reg);
+	info->reg_array_cnt =
+		rte_le_to_cpu_32(resp->reg_array_cnt);
+
+	if (info->reg_array_cnt >= BNXT_NUM_RESET_REG) {
+		rc = -EINVAL;
+		goto err;
+	}
+
+	for (i = 0; i < info->reg_array_cnt; i++) {
+		info->reset_reg[i] =
+			rte_le_to_cpu_32(resp->reset_reg[i]);
+		info->reset_reg_val[i] =
+			rte_le_to_cpu_32(resp->reset_reg_val[i]);
+		info->delay_after_reset[i] =
+			resp->delay_after_reset[i];
+	}
+err:
+	HWRM_UNLOCK();
+
+	if (rc) {
+		rte_free(bp->recovery_info);
+		bp->recovery_info = NULL;
+	}
+	return rc;
+}
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index 2f57e950b..c332c129d 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -202,4 +202,5 @@ int bnxt_hwrm_tunnel_redirect_info(struct bnxt *bp, uint8_t tun_type,
 				   uint16_t *dst_fid);
 int bnxt_hwrm_set_mac(struct bnxt *bp);
 int bnxt_hwrm_if_change(struct bnxt *bp, bool state);
+int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp);
 #endif
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH 07/13] net/bnxt: map status registers for FW health monitoring
  2019-08-22  5:53 [dpdk-dev] [PATCH 00/13] bnxt patchset to support device error recovery Ajit Khaparde
                   ` (5 preceding siblings ...)
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 06/13] net/bnxt: query firmware error recovery capabilities Ajit Khaparde
@ 2019-08-22  5:53 ` Ajit Khaparde
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 08/13] net/bnxt: advertise error recovery capability and handle async event Ajit Khaparde
                   ` (5 subsequent siblings)
  12 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-22  5:53 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

HWRM_ERROR_RECOVERY_QCFG command returns the FW status registers offset
for periodic firmware health check monitoring. Map them to GRC window 2.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt.h        | 22 ++++++++++++++++-
 drivers/net/bnxt/bnxt_ethdev.c | 44 ++++++++++++++++++++++++++++++++++
 drivers/net/bnxt/bnxt_hwrm.c   |  4 ++++
 3 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 19bd13a7f..1da09569d 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -354,7 +354,9 @@ struct bnxt_error_recovery_info {
 #define BNXT_FW_HEARTBEAT_CNT_REG	1
 #define BNXT_FW_RECOVERY_CNT_REG	2
 #define BNXT_FW_RESET_INPROG_REG	3
-	uint32_t	status_regs[4];
+#define BNXT_FW_STATUS_REG_CNT		4
+	uint32_t	status_regs[BNXT_FW_STATUS_REG_CNT];
+	uint32_t	mapped_status_regs[BNXT_FW_STATUS_REG_CNT];
 	uint32_t	reset_inprogress_reg_mask;
 #define BNXT_NUM_RESET_REG	16
 	uint8_t		reg_array_cnt;
@@ -366,6 +368,22 @@ struct bnxt_error_recovery_info {
 	uint32_t	flags;
 };
 
+/* address space location of register */
+#define BNXT_FW_STATUS_REG_TYPE_MASK	3
+/* register is located in PCIe config space */
+#define BNXT_FW_STATUS_REG_TYPE_CFG	0
+/* register is located in GRC address space */
+#define BNXT_FW_STATUS_REG_TYPE_GRC	1
+/* register is located in BAR0  */
+#define BNXT_FW_STATUS_REG_TYPE_BAR0	2
+/* register is located in BAR1  */
+#define BNXT_FW_STATUS_REG_TYPE_BAR1	3
+
+#define BNXT_FW_STATUS_REG_TYPE(reg)	((reg) & BNXT_FW_STATUS_REG_TYPE_MASK)
+#define BNXT_FW_STATUS_REG_OFF(reg)	((reg) & ~BNXT_FW_STATUS_REG_TYPE_MASK)
+
+#define BNXT_GRCP_WINDOW_2_BASE		0x2000
+
 #define BNXT_HWRM_SHORT_REQ_LEN		sizeof(struct hwrm_short_input)
 struct bnxt {
 	void				*bar0;
@@ -510,6 +528,8 @@ int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete);
 int bnxt_rcv_msg_from_vf(struct bnxt *bp, uint16_t vf_id, void *msg);
 int is_bnxt_in_error(struct bnxt *bp);
 
+int bnxt_map_fw_health_status_regs(struct bnxt *bp);
+
 bool is_bnxt_supported(struct rte_eth_dev *dev);
 bool bnxt_stratus_device(struct bnxt *bp);
 extern const struct rte_flow_ops bnxt_flow_ops;
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 18046c00a..52c460d2c 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -3496,6 +3496,49 @@ static const struct eth_dev_ops bnxt_dev_ops = {
 	.timesync_read_tx_timestamp = bnxt_timesync_read_tx_timestamp,
 };
 
+int bnxt_map_fw_health_status_regs(struct bnxt *bp)
+{
+	struct bnxt_error_recovery_info *info = bp->recovery_info;
+	uint32_t reg_base = 0xffffffff;
+	int i;
+
+	/* Only pre-map the monitoring GRC registers using window 2 */
+	for (i = 0; i < BNXT_FW_STATUS_REG_CNT; i++) {
+		uint32_t reg = info->status_regs[i];
+
+		if (BNXT_FW_STATUS_REG_TYPE(reg) != BNXT_FW_STATUS_REG_TYPE_GRC)
+			continue;
+
+		if (reg_base == 0xffffffff)
+			reg_base = reg & 0xfffff000;
+		if ((reg & 0xfffff000) != reg_base)
+			return -ERANGE;
+
+		/* Use mask 0xffc as the Lower 2 bits indicates
+		 * address space location
+		 */
+		info->mapped_status_regs[i] = BNXT_GRCP_WINDOW_2_BASE +
+						(reg & 0xffc);
+	}
+
+	if (reg_base == 0xffffffff)
+		return 0;
+
+	rte_write32(reg_base, (uint8_t *)bp->bar0 +
+		    BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4);
+
+	return 0;
+}
+
+static void bnxt_unmap_fw_health_status_regs(struct bnxt *bp)
+{
+	if (!(bp->flags & BNXT_FLAG_FW_CAP_ERROR_RECOVERY))
+		return;
+
+	rte_write32(0, (uint8_t *)bp->bar0 +
+		    BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4);
+}
+
 static void bnxt_dev_cleanup(struct bnxt *bp)
 {
 	bnxt_set_hwrm_link_config(bp, false);
@@ -4227,6 +4270,7 @@ bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev)
 	bnxt_free_int(bp);
 	bnxt_free_mem(bp, reconfig_dev);
 	bnxt_hwrm_func_buf_unrgtr(bp);
+	bnxt_unmap_fw_health_status_regs(bp);
 	rc = bnxt_hwrm_func_driver_unregister(bp, 0);
 	bp->flags &= ~BNXT_FLAG_REGISTERED;
 	bnxt_free_ctx_mem(bp);
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index e2c993936..2d9c43c98 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -4767,6 +4767,10 @@ int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp)
 err:
 	HWRM_UNLOCK();
 
+	/* Map the FW status registers */
+	if (!rc)
+		rc = bnxt_map_fw_health_status_regs(bp);
+
 	if (rc) {
 		rte_free(bp->recovery_info);
 		bp->recovery_info = NULL;
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH 08/13] net/bnxt: advertise error recovery capability and handle async event
  2019-08-22  5:53 [dpdk-dev] [PATCH 00/13] bnxt patchset to support device error recovery Ajit Khaparde
                   ` (6 preceding siblings ...)
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 07/13] net/bnxt: map status registers for FW health monitoring Ajit Khaparde
@ 2019-08-22  5:53 ` Ajit Khaparde
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 09/13] net/bnxt: add code for periodic FW health monitoring Ajit Khaparde
                   ` (4 subsequent siblings)
  12 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-22  5:53 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

1. Advertise HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_ERROR_RECOVERY_SUPPORT flag
   in the FUNC_DRV_RGTR command.
2. request for the async event ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY
   in the FUNC_DRV_RGTR command.
3. handle the async event EVENT_ID_ERROR_RECOVERY from FW.

Error recovery support will be used by firmware only if all the driver
instances support error recovery process.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt.h      |  2 ++
 drivers/net/bnxt/bnxt_cpr.c  | 45 ++++++++++++++++++++++++++++++++++++
 drivers/net/bnxt/bnxt_cpr.h  | 12 ++++++++++
 drivers/net/bnxt/bnxt_hwrm.c |  5 ++++
 drivers/net/bnxt/bnxt_hwrm.h |  2 ++
 5 files changed, 66 insertions(+)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 1da09569d..f9147a9a8 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -365,6 +365,8 @@ struct bnxt_error_recovery_info {
 	uint8_t		delay_after_reset[BNXT_NUM_RESET_REG];
 #define BNXT_FLAG_ERROR_RECOVERY_HOST	(1 << 0)
 #define BNXT_FLAG_ERROR_RECOVERY_CO_CPU	(1 << 1)
+#define BNXT_FLAG_MASTER_FUNC		(1 << 2)
+#define BNXT_FLAG_RECOVERY_ENABLED	(1 << 3)
 	uint32_t	flags;
 };
 
diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index 6e0b1d67e..7f5b3314e 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -20,6 +20,7 @@ void bnxt_handle_async_event(struct bnxt *bp,
 	struct hwrm_async_event_cmpl *async_cmp =
 				(struct hwrm_async_event_cmpl *)cmp;
 	uint16_t event_id = rte_le_to_cpu_16(async_cmp->event_id);
+	struct bnxt_error_recovery_info *info;
 	uint32_t event_data;
 
 	/* TODO: HWRM async events are not defined yet */
@@ -63,6 +64,31 @@ void bnxt_handle_async_event(struct bnxt *bp,
 		bp->flags |= BNXT_FLAG_FW_RESET;
 		bnxt_dev_reset_and_resume(bp);
 		break;
+	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY:
+		info = bp->recovery_info;
+
+		if (!info)
+			return;
+
+		PMD_DRV_LOG(INFO, "Error recovery async event received\n");
+
+		event_data = rte_le_to_cpu_32(async_cmp->event_data1) &
+				EVENT_DATA1_FLAGS_MASK;
+
+		if (event_data & EVENT_DATA1_FLAGS_MASTER_FUNC)
+			info->flags |= BNXT_FLAG_MASTER_FUNC;
+		else
+			info->flags &= ~BNXT_FLAG_MASTER_FUNC;
+
+		if (event_data & EVENT_DATA1_FLAGS_RECOVERY_ENABLED)
+			info->flags |= BNXT_FLAG_RECOVERY_ENABLED;
+		else
+			info->flags &= ~BNXT_FLAG_RECOVERY_ENABLED;
+
+		PMD_DRV_LOG(INFO, "recovery enabled(%d), master function(%d)\n",
+			    bnxt_is_recovery_enabled(bp),
+			    bnxt_is_master_func(bp));
+		break;
 	default:
 		PMD_DRV_LOG(INFO, "handle_async_event id = 0x%x\n", event_id);
 		break;
@@ -184,3 +210,22 @@ int bnxt_event_hwrm_resp_handler(struct bnxt *bp, struct cmpl_base *cmp)
 
 	return evt;
 }
+
+bool bnxt_is_master_func(struct bnxt *bp)
+{
+	if (bp->recovery_info->flags & BNXT_FLAG_MASTER_FUNC)
+		return true;
+
+	return false;
+}
+
+bool bnxt_is_recovery_enabled(struct bnxt *bp)
+{
+	struct bnxt_error_recovery_info *info;
+
+	info = bp->recovery_info;
+	if (info && (info->flags & BNXT_FLAG_RECOVERY_ENABLED))
+		return true;
+
+	return false;
+}
diff --git a/drivers/net/bnxt/bnxt_cpr.h b/drivers/net/bnxt/bnxt_cpr.h
index 4e63fd12f..22fba5b40 100644
--- a/drivers/net/bnxt/bnxt_cpr.h
+++ b/drivers/net/bnxt/bnxt_cpr.h
@@ -113,4 +113,16 @@ int bnxt_dev_reset_and_resume(struct bnxt *bp);
 #define EVENT_DATA1_REASON_CODE_MASK                   \
 	HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MASK
 
+#define EVENT_DATA1_FLAGS_MASK                         \
+	HWRM_ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_MASK
+
+#define EVENT_DATA1_FLAGS_MASTER_FUNC                  \
+	HWRM_ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_MASTER_FUNC
+
+#define EVENT_DATA1_FLAGS_RECOVERY_ENABLED             \
+	HWRM_ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_RECOVERY_ENABLED
+
+bool bnxt_is_recovery_enabled(struct bnxt *bp);
+bool bnxt_is_master_func(struct bnxt *bp);
+
 #endif
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 2d9c43c98..350e867bf 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -685,6 +685,8 @@ int bnxt_hwrm_func_driver_register(struct bnxt *bp)
 		return 0;
 
 	flags = HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_HOT_RESET_SUPPORT;
+	if (bp->flags & BNXT_FLAG_FW_CAP_ERROR_RECOVERY)
+		flags |= HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_ERROR_RECOVERY_SUPPORT;
 
 	HWRM_PREP(req, FUNC_DRV_RGTR, BNXT_USE_CHIMP_MB);
 	req.enables = rte_cpu_to_le_32(HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_VER |
@@ -716,6 +718,9 @@ int bnxt_hwrm_func_driver_register(struct bnxt *bp)
 				 ASYNC_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED |
 				 ASYNC_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE |
 				 ASYNC_CMPL_EVENT_ID_RESET_NOTIFY);
+	if (bp->flags & BNXT_FLAG_FW_CAP_ERROR_RECOVERY)
+		req.async_event_fwd[0] |=
+			rte_cpu_to_le_32(ASYNC_CMPL_EVENT_ID_ERROR_RECOVERY);
 	req.async_event_fwd[1] |=
 		rte_cpu_to_le_32(ASYNC_CMPL_EVENT_ID_PF_DRVR_UNLOAD |
 				 ASYNC_CMPL_EVENT_ID_VF_CFG_CHANGE);
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index c332c129d..44e335507 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -23,6 +23,8 @@ struct bnxt_cp_ring_info;
 	(1 << HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE)
 #define ASYNC_CMPL_EVENT_ID_RESET_NOTIFY \
 	(1 << HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY)
+#define ASYNC_CMPL_EVENT_ID_ERROR_RECOVERY \
+	(1 << HWRM_ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY)
 #define ASYNC_CMPL_EVENT_ID_PF_DRVR_UNLOAD	\
 	(1 << (HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD - 32))
 #define ASYNC_CMPL_EVENT_ID_VF_CFG_CHANGE	\
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH 09/13] net/bnxt: add code for periodic FW health monitoring
  2019-08-22  5:53 [dpdk-dev] [PATCH 00/13] bnxt patchset to support device error recovery Ajit Khaparde
                   ` (7 preceding siblings ...)
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 08/13] net/bnxt: advertise error recovery capability and handle async event Ajit Khaparde
@ 2019-08-22  5:53 ` Ajit Khaparde
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 10/13] net/bnxt: use BIT macro instead of bit fields Ajit Khaparde
                   ` (3 subsequent siblings)
  12 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-22  5:53 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

Periodically poll the FW heartbeat register and FW recovery counter
registers to check the FW health. Polling frequency will be
advertised by the FW in HWRM_ERROR_RECOVERY_QCFG response.
Schedule the task upon receiving the async event from FW.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
---
 drivers/net/bnxt/bnxt.h        |  5 ++
 drivers/net/bnxt/bnxt_cpr.c    |  7 +++
 drivers/net/bnxt/bnxt_ethdev.c | 89 ++++++++++++++++++++++++++++++++++
 3 files changed, 101 insertions(+)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index f9147a9a8..a23c4a64c 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -368,6 +368,9 @@ struct bnxt_error_recovery_info {
 #define BNXT_FLAG_MASTER_FUNC		(1 << 2)
 #define BNXT_FLAG_RECOVERY_ENABLED	(1 << 3)
 	uint32_t	flags;
+
+	uint32_t        last_heart_beat;
+	uint32_t        last_reset_counter;
 };
 
 /* address space location of register */
@@ -531,6 +534,8 @@ int bnxt_rcv_msg_from_vf(struct bnxt *bp, uint16_t vf_id, void *msg);
 int is_bnxt_in_error(struct bnxt *bp);
 
 int bnxt_map_fw_health_status_regs(struct bnxt *bp);
+uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index);
+void bnxt_schedule_fw_health_check(struct bnxt *bp);
 
 bool is_bnxt_supported(struct rte_eth_dev *dev);
 bool bnxt_stratus_device(struct bnxt *bp);
diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index 7f5b3314e..a692fbe7c 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -88,6 +88,13 @@ void bnxt_handle_async_event(struct bnxt *bp,
 		PMD_DRV_LOG(INFO, "recovery enabled(%d), master function(%d)\n",
 			    bnxt_is_recovery_enabled(bp),
 			    bnxt_is_master_func(bp));
+
+		info->last_heart_beat =
+			bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG);
+		info->last_reset_counter =
+			bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG);
+
+		bnxt_schedule_fw_health_check(bp);
 		break;
 	default:
 		PMD_DRV_LOG(INFO, "handle_async_event id = 0x%x\n", event_id);
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 52c460d2c..0317eb888 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -169,6 +169,7 @@ static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu);
 static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev);
 static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev);
 static int bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev);
+static void bnxt_cancel_fw_health_check(struct bnxt *bp);
 
 int is_bnxt_in_error(struct bnxt *bp)
 {
@@ -880,6 +881,8 @@ static void bnxt_dev_stop_op(struct rte_eth_dev *eth_dev)
 	/* disable uio/vfio intr/eventfd mapping */
 	rte_intr_disable(intr_handle);
 
+	bnxt_cancel_fw_health_check(bp);
+
 	bp->flags &= ~BNXT_FLAG_INIT_DONE;
 	if (bp->eth_dev->data->dev_started) {
 		/* TBD: STOP HW queues DMA */
@@ -3608,6 +3611,92 @@ int bnxt_dev_reset_and_resume(struct bnxt *bp)
 	return rc;
 }
 
+uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index)
+{
+	struct bnxt_error_recovery_info *info = bp->recovery_info;
+	uint32_t reg = info->status_regs[index];
+	uint32_t type, offset, val = 0;
+
+	type = BNXT_FW_STATUS_REG_TYPE(reg);
+	offset = BNXT_FW_STATUS_REG_OFF(reg);
+
+	switch (type) {
+	case BNXT_FW_STATUS_REG_TYPE_CFG:
+		rte_pci_read_config(bp->pdev, &val, sizeof(val), offset);
+		break;
+	case BNXT_FW_STATUS_REG_TYPE_GRC:
+		offset = info->mapped_status_regs[index];
+		/* FALLTHROUGH */
+	case BNXT_FW_STATUS_REG_TYPE_BAR0:
+		val = rte_le_to_cpu_32(rte_read32((uint8_t *)bp->bar0 +
+				       offset));
+		break;
+	}
+
+	return val;
+}
+
+/* Driver should poll FW heartbeat, reset_counter with the frequency
+ * advertised by FW in HWRM_ERROR_RECOVERY_QCFG.
+ * When the driver detects heartbeat stop or change in reset_counter,
+ * it has to trigger a reset to recover from the error condition.
+ * A “master PF” is the function who will have the privilege to
+ * initiate the chimp reset. The master PF will be elected by the
+ * firmware and will be notified through async message.
+ */
+static void bnxt_check_fw_health(void *arg)
+{
+	struct bnxt *bp = arg;
+	struct bnxt_error_recovery_info *info = bp->recovery_info;
+	uint32_t val = 0;
+
+	if (!info || !bnxt_is_recovery_enabled(bp) ||
+	    is_bnxt_in_error(bp))
+		return;
+
+	val = bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG);
+	if (val == info->last_heart_beat)
+		goto reset;
+
+	info->last_heart_beat = val;
+
+	val = bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG);
+	if (val != info->last_reset_counter)
+		goto reset;
+
+	info->last_reset_counter = val;
+
+	rte_eal_alarm_set(US_PER_MS * info->driver_polling_freq,
+			  bnxt_check_fw_health, (void *)bp);
+
+	return;
+reset:
+	/* Stop DMA to/from device */
+	bp->flags |= BNXT_FLAG_FATAL_ERROR;
+	bp->flags |= BNXT_FLAG_FW_RESET;
+
+	PMD_DRV_LOG(ERR, "Detected FW dead condition\n");
+}
+
+void bnxt_schedule_fw_health_check(struct bnxt *bp)
+{
+	uint32_t polling_freq = bp->recovery_info->driver_polling_freq;
+
+	if (!bnxt_is_recovery_enabled(bp))
+		return;
+
+	rte_eal_alarm_set(US_PER_MS * polling_freq,
+			  bnxt_check_fw_health, (void *)bp);
+}
+
+static void bnxt_cancel_fw_health_check(struct bnxt *bp)
+{
+	if (!bnxt_is_recovery_enabled(bp))
+		return;
+
+	rte_eal_alarm_cancel(bnxt_check_fw_health, (void *)bp);
+}
+
 static bool bnxt_vf_pciid(uint16_t id)
 {
 	if (id == BROADCOM_DEV_ID_57304_VF ||
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH 10/13] net/bnxt: use BIT macro instead of bit fields
  2019-08-22  5:53 [dpdk-dev] [PATCH 00/13] bnxt patchset to support device error recovery Ajit Khaparde
                   ` (8 preceding siblings ...)
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 09/13] net/bnxt: add code for periodic FW health monitoring Ajit Khaparde
@ 2019-08-22  5:53 ` Ajit Khaparde
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 11/13] net/bnxt: reschedule the health check alarm correctly Ajit Khaparde
                   ` (2 subsequent siblings)
  12 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-22  5:53 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

use BIT macro instead of bit fields.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt.h      | 73 ++++++++++++++++++------------------
 drivers/net/bnxt/bnxt_util.h |  4 ++
 2 files changed, 41 insertions(+), 36 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index a23c4a64c..93aac15b4 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -19,6 +19,7 @@
 #include <rte_time.h>
 
 #include "bnxt_cpr.h"
+#include "bnxt_util.h"
 
 #define BNXT_MAX_MTU		9574
 #define VLAN_TAG_SIZE		4
@@ -198,16 +199,16 @@ struct bnxt_ptp_cfg {
 	struct bnxt		*bp;
 #define BNXT_MAX_TX_TS	1
 	uint16_t			rxctl;
-#define BNXT_PTP_MSG_SYNC			(1 << 0)
-#define BNXT_PTP_MSG_DELAY_REQ			(1 << 1)
-#define BNXT_PTP_MSG_PDELAY_REQ			(1 << 2)
-#define BNXT_PTP_MSG_PDELAY_RESP		(1 << 3)
-#define BNXT_PTP_MSG_FOLLOW_UP			(1 << 8)
-#define BNXT_PTP_MSG_DELAY_RESP			(1 << 9)
-#define BNXT_PTP_MSG_PDELAY_RESP_FOLLOW_UP	(1 << 10)
-#define BNXT_PTP_MSG_ANNOUNCE			(1 << 11)
-#define BNXT_PTP_MSG_SIGNALING			(1 << 12)
-#define BNXT_PTP_MSG_MANAGEMENT			(1 << 13)
+#define BNXT_PTP_MSG_SYNC			BIT(0)
+#define BNXT_PTP_MSG_DELAY_REQ			BIT(1)
+#define BNXT_PTP_MSG_PDELAY_REQ			BIT(2)
+#define BNXT_PTP_MSG_PDELAY_RESP		BIT(3)
+#define BNXT_PTP_MSG_FOLLOW_UP			BIT(8)
+#define BNXT_PTP_MSG_DELAY_RESP			BIT(9)
+#define BNXT_PTP_MSG_PDELAY_RESP_FOLLOW_UP	BIT(10)
+#define BNXT_PTP_MSG_ANNOUNCE			BIT(11)
+#define BNXT_PTP_MSG_SIGNALING			BIT(12)
+#define BNXT_PTP_MSG_MANAGEMENT			BIT(13)
 #define BNXT_PTP_MSG_EVENTS		(BNXT_PTP_MSG_SYNC |		\
 					 BNXT_PTP_MSG_DELAY_REQ |	\
 					 BNXT_PTP_MSG_PDELAY_REQ |	\
@@ -363,10 +364,10 @@ struct bnxt_error_recovery_info {
 	uint32_t	reset_reg[BNXT_NUM_RESET_REG];
 	uint32_t	reset_reg_val[BNXT_NUM_RESET_REG];
 	uint8_t		delay_after_reset[BNXT_NUM_RESET_REG];
-#define BNXT_FLAG_ERROR_RECOVERY_HOST	(1 << 0)
-#define BNXT_FLAG_ERROR_RECOVERY_CO_CPU	(1 << 1)
-#define BNXT_FLAG_MASTER_FUNC		(1 << 2)
-#define BNXT_FLAG_RECOVERY_ENABLED	(1 << 3)
+#define BNXT_FLAG_ERROR_RECOVERY_HOST	BIT(0)
+#define BNXT_FLAG_ERROR_RECOVERY_CO_CPU	BIT(1)
+#define BNXT_FLAG_MASTER_FUNC		BIT(2)
+#define BNXT_FLAG_RECOVERY_ENABLED	BIT(3)
 	uint32_t	flags;
 
 	uint32_t        last_heart_beat;
@@ -399,28 +400,28 @@ struct bnxt {
 	void				*doorbell_base;
 
 	uint32_t		flags;
-#define BNXT_FLAG_REGISTERED	(1 << 0)
-#define BNXT_FLAG_VF		(1 << 1)
-#define BNXT_FLAG_PORT_STATS	(1 << 2)
-#define BNXT_FLAG_JUMBO		(1 << 3)
-#define BNXT_FLAG_SHORT_CMD	(1 << 4)
-#define BNXT_FLAG_UPDATE_HASH	(1 << 5)
-#define BNXT_FLAG_PTP_SUPPORTED	(1 << 6)
-#define BNXT_FLAG_MULTI_HOST    (1 << 7)
-#define BNXT_FLAG_EXT_RX_PORT_STATS	(1 << 8)
-#define BNXT_FLAG_EXT_TX_PORT_STATS	(1 << 9)
-#define BNXT_FLAG_KONG_MB_EN	(1 << 10)
-#define BNXT_FLAG_TRUSTED_VF_EN	(1 << 11)
-#define BNXT_FLAG_DFLT_VNIC_SET	(1 << 12)
-#define BNXT_FLAG_THOR_CHIP	(1 << 13)
-#define BNXT_FLAG_STINGRAY	(1 << 14)
-#define BNXT_FLAG_FW_RESET	(1 << 15)
-#define BNXT_FLAG_FATAL_ERROR	(1 << 16)
-#define BNXT_FLAG_FW_CAP_IF_CHANGE	(1 << 17)
-#define BNXT_FLAG_FW_CAP_ERROR_RECOVERY	(1 << 18)
-#define BNXT_FLAG_EXT_STATS_SUPPORTED	(1 << 29)
-#define BNXT_FLAG_NEW_RM	(1 << 30)
-#define BNXT_FLAG_INIT_DONE	(1U << 31)
+#define BNXT_FLAG_REGISTERED		BIT(0)
+#define BNXT_FLAG_VF			BIT(1)
+#define BNXT_FLAG_PORT_STATS		BIT(2)
+#define BNXT_FLAG_JUMBO			BIT(3)
+#define BNXT_FLAG_SHORT_CMD		BIT(4)
+#define BNXT_FLAG_UPDATE_HASH		BIT(5)
+#define BNXT_FLAG_PTP_SUPPORTED		BIT(6)
+#define BNXT_FLAG_MULTI_HOST		BIT(7)
+#define BNXT_FLAG_EXT_RX_PORT_STATS	BIT(8)
+#define BNXT_FLAG_EXT_TX_PORT_STATS	BIT(9)
+#define BNXT_FLAG_KONG_MB_EN		BIT(10)
+#define BNXT_FLAG_TRUSTED_VF_EN		BIT(11)
+#define BNXT_FLAG_DFLT_VNIC_SET		BIT(12)
+#define BNXT_FLAG_THOR_CHIP		BIT(13)
+#define BNXT_FLAG_STINGRAY		BIT(14)
+#define BNXT_FLAG_FW_RESET		BIT(15)
+#define BNXT_FLAG_FATAL_ERROR		BIT(16)
+#define BNXT_FLAG_FW_CAP_IF_CHANGE	BIT(17)
+#define BNXT_FLAG_FW_CAP_ERROR_RECOVERY	BIT(18)
+#define BNXT_FLAG_EXT_STATS_SUPPORTED	BIT(19)
+#define BNXT_FLAG_NEW_RM		BIT(20)
+#define BNXT_FLAG_INIT_DONE		BIT(21)
 #define BNXT_PF(bp)		(!((bp)->flags & BNXT_FLAG_VF))
 #define BNXT_VF(bp)		((bp)->flags & BNXT_FLAG_VF)
 #define BNXT_NPAR(bp)		((bp)->port_partition_type)
diff --git a/drivers/net/bnxt/bnxt_util.h b/drivers/net/bnxt/bnxt_util.h
index 9f1868a78..a15b3a1a9 100644
--- a/drivers/net/bnxt/bnxt_util.h
+++ b/drivers/net/bnxt/bnxt_util.h
@@ -6,6 +6,10 @@
 #ifndef _BNXT_UTIL_H_
 #define _BNXT_UTIL_H_
 
+#ifndef BIT
+#define BIT(n)	(1UL << (n))
+#endif /* BIT */
+
 int bnxt_check_zero_bytes(const uint8_t *bytes, int len);
 void bnxt_eth_hw_addr_random(uint8_t *mac_addr);
 
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH 11/13] net/bnxt: reschedule the health check alarm correctly
  2019-08-22  5:53 [dpdk-dev] [PATCH 00/13] bnxt patchset to support device error recovery Ajit Khaparde
                   ` (9 preceding siblings ...)
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 10/13] net/bnxt: use BIT macro instead of bit fields Ajit Khaparde
@ 2019-08-22  5:53 ` Ajit Khaparde
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 12/13] net/bnxt: add support for FW reset Ajit Khaparde
  2019-08-22  5:54 ` [dpdk-dev] [PATCH 13/13] net/bnxt: reduce verbosity of logs Ajit Khaparde
  12 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-22  5:53 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Santoshkumar Karanappa Rastapur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

When the driver receives the error recovery notify event from fw
for the first time, it has to read the heartbeat count register and
recovery count register and schedule the fw health check task for
periodically monitoring the fw health.

FW may send this event at a later time when the state of master function
changes. There is no need to schedule the health check task this time.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Santoshkumar Karanappa Rastapur <santosh.rastapur@broadcom.com>
Signed-off-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt.h        | 1 +
 drivers/net/bnxt/bnxt_cpr.c    | 3 +++
 drivers/net/bnxt/bnxt_ethdev.c | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 93aac15b4..edaef7897 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -422,6 +422,7 @@ struct bnxt {
 #define BNXT_FLAG_EXT_STATS_SUPPORTED	BIT(19)
 #define BNXT_FLAG_NEW_RM		BIT(20)
 #define BNXT_FLAG_INIT_DONE		BIT(21)
+#define BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED	BIT(22)
 #define BNXT_PF(bp)		(!((bp)->flags & BNXT_FLAG_VF))
 #define BNXT_VF(bp)		((bp)->flags & BNXT_FLAG_VF)
 #define BNXT_NPAR(bp)		((bp)->port_partition_type)
diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index a692fbe7c..50f93bd21 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -89,6 +89,9 @@ void bnxt_handle_async_event(struct bnxt *bp,
 			    bnxt_is_recovery_enabled(bp),
 			    bnxt_is_master_func(bp));
 
+		if (bp->flags & BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED)
+			return;
+
 		info->last_heart_beat =
 			bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG);
 		info->last_reset_counter =
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 0317eb888..e7b0b44c4 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -3687,6 +3687,7 @@ void bnxt_schedule_fw_health_check(struct bnxt *bp)
 
 	rte_eal_alarm_set(US_PER_MS * polling_freq,
 			  bnxt_check_fw_health, (void *)bp);
+	bp->flags |= BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED;
 }
 
 static void bnxt_cancel_fw_health_check(struct bnxt *bp)
@@ -3695,6 +3696,7 @@ static void bnxt_cancel_fw_health_check(struct bnxt *bp)
 		return;
 
 	rte_eal_alarm_cancel(bnxt_check_fw_health, (void *)bp);
+	bp->flags &= ~BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED;
 }
 
 static bool bnxt_vf_pciid(uint16_t id)
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH 12/13] net/bnxt: add support for FW reset
  2019-08-22  5:53 [dpdk-dev] [PATCH 00/13] bnxt patchset to support device error recovery Ajit Khaparde
                   ` (10 preceding siblings ...)
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 11/13] net/bnxt: reschedule the health check alarm correctly Ajit Khaparde
@ 2019-08-22  5:53 ` Ajit Khaparde
  2019-08-22  5:54 ` [dpdk-dev] [PATCH 13/13] net/bnxt: reduce verbosity of logs Ajit Khaparde
  12 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-22  5:53 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

Added code to perform FW_RESET. When the driver detects error in FW,
it has to initiate the recovery by resetting the cores. FW advertise
the method to do a core reset, reset register offsets and values
to perform reset in response of HWRM_ERROR_RECOVERY_QCFG command.

There are 2 ways to recover from the error.
1. Master function issues core resets to recover from error.
2. Master function detects chimp dead condition and notify the Kong
   processor about the chimp dead case through FW_RESET HWRM command.
   Kong Processor send an RESET_NOTIFY async event with
   REASON_CODE_FW_EXCEPTION_FATAL to all the PF’s/VF’s that
   chimp is dead and it is going to reset the chimp.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt.h        |   1 +
 drivers/net/bnxt/bnxt_ethdev.c | 103 ++++++++++++++++++++++++++++++++-
 drivers/net/bnxt/bnxt_hwrm.c   |  26 +++++++++
 drivers/net/bnxt/bnxt_hwrm.h   |   1 +
 4 files changed, 130 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index edaef7897..9ea84ec2f 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -389,6 +389,7 @@ struct bnxt_error_recovery_info {
 #define BNXT_FW_STATUS_REG_OFF(reg)	((reg) & ~BNXT_FW_STATUS_REG_TYPE_MASK)
 
 #define BNXT_GRCP_WINDOW_2_BASE		0x2000
+#define BNXT_GRCP_WINDOW_3_BASE		0x3000
 
 #define BNXT_HWRM_SHORT_REQ_LEN		sizeof(struct hwrm_short_input)
 struct bnxt {
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index e7b0b44c4..095395dae 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -3499,6 +3499,19 @@ static const struct eth_dev_ops bnxt_dev_ops = {
 	.timesync_read_tx_timestamp = bnxt_timesync_read_tx_timestamp,
 };
 
+static uint32_t bnxt_map_reset_regs(struct bnxt *bp, uint32_t reg)
+{
+	uint32_t offset;
+
+	/* Only pre-map the reset GRC registers using window 3 */
+	rte_write32(reg & 0xfffff000, (uint8_t *)bp->bar0 +
+		    BNXT_GRCPF_REG_WINDOW_BASE_OUT + 8);
+
+	offset = BNXT_GRCP_WINDOW_3_BASE + (reg & 0xffc);
+
+	return offset;
+}
+
 int bnxt_map_fw_health_status_regs(struct bnxt *bp)
 {
 	struct bnxt_error_recovery_info *info = bp->recovery_info;
@@ -3542,6 +3555,34 @@ static void bnxt_unmap_fw_health_status_regs(struct bnxt *bp)
 		    BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4);
 }
 
+static void bnxt_write_fw_reset_reg(struct bnxt *bp, uint32_t index)
+{
+	struct bnxt_error_recovery_info *info = bp->recovery_info;
+	uint32_t delay = info->delay_after_reset[index];
+	uint32_t val = info->reset_reg_val[index];
+	uint32_t reg = info->reset_reg[index];
+	uint32_t type, offset;
+
+	type = BNXT_FW_STATUS_REG_TYPE(reg);
+	offset = BNXT_FW_STATUS_REG_OFF(reg);
+
+	switch (type) {
+	case BNXT_FW_STATUS_REG_TYPE_CFG:
+		rte_pci_write_config(bp->pdev, &val, sizeof(val), offset);
+		break;
+	case BNXT_FW_STATUS_REG_TYPE_GRC:
+		offset = bnxt_map_reset_regs(bp, offset);
+		rte_write32(val, (uint8_t *)bp->bar0 + offset);
+		break;
+	case BNXT_FW_STATUS_REG_TYPE_BAR0:
+		rte_write32(val, (uint8_t *)bp->bar0 + offset);
+		break;
+	}
+	/* wait on a specific interval of time until core reset is complete */
+	if (delay)
+		rte_delay_ms(delay);
+}
+
 static void bnxt_dev_cleanup(struct bnxt *bp)
 {
 	bnxt_set_hwrm_link_config(bp, false);
@@ -3636,6 +3677,58 @@ uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index)
 	return val;
 }
 
+static int bnxt_fw_reset_all(struct bnxt *bp)
+{
+	struct bnxt_error_recovery_info *info = bp->recovery_info;
+	uint32_t i;
+	int rc = 0;
+
+	if (info->flags & BNXT_FLAG_ERROR_RECOVERY_HOST) {
+		/* Reset through master function driver */
+		for (i = 0; i < info->reg_array_cnt; i++)
+			bnxt_write_fw_reset_reg(bp, i);
+		/* Wait for time specified by FW after triggering reset */
+		rte_delay_ms(info->master_func_wait_period_after_reset);
+	} else if (info->flags & BNXT_FLAG_ERROR_RECOVERY_CO_CPU) {
+		/* Reset with the help of Kong processor */
+		rc = bnxt_hwrm_fw_reset(bp);
+		if (rc)
+			PMD_DRV_LOG(ERR, "Failed to reset FW\n");
+	}
+
+	return rc;
+}
+
+static void bnxt_fw_reset_cb(void *arg)
+{
+	struct bnxt *bp = arg;
+	struct bnxt_error_recovery_info *info = bp->recovery_info;
+	int rc = 0;
+
+	/* Only Master function can do FW reset */
+	if (bnxt_is_master_func(bp) &&
+	    bnxt_is_recovery_enabled(bp)) {
+		rc = bnxt_fw_reset_all(bp);
+		if (rc) {
+			PMD_DRV_LOG(ERR, "Adapter recovery failed\n");
+			return;
+		}
+	}
+
+	/* if recovery method is ERROR_RECOVERY_CO_CPU, KONG will send
+	 * EXCEPTION_FATAL_ASYNC event to all the functions
+	 * (including MASTER FUNC). After receiving this Async, all the active
+	 * drivers should treat this case as FW initiated recovery
+	 */
+	if (info->flags & BNXT_FLAG_ERROR_RECOVERY_HOST) {
+		bp->fw_reset_min_msecs = BNXT_MIN_FW_READY_TIMEOUT;
+		bp->fw_reset_max_msecs = BNXT_MAX_FW_RESET_TIMEOUT;
+
+		/* To recover from error */
+		bnxt_dev_reset_and_resume(bp);
+	}
+}
+
 /* Driver should poll FW heartbeat, reset_counter with the frequency
  * advertised by FW in HWRM_ERROR_RECOVERY_QCFG.
  * When the driver detects heartbeat stop or change in reset_counter,
@@ -3648,7 +3741,7 @@ static void bnxt_check_fw_health(void *arg)
 {
 	struct bnxt *bp = arg;
 	struct bnxt_error_recovery_info *info = bp->recovery_info;
-	uint32_t val = 0;
+	uint32_t val = 0, wait_msec;
 
 	if (!info || !bnxt_is_recovery_enabled(bp) ||
 	    is_bnxt_in_error(bp))
@@ -3676,6 +3769,14 @@ static void bnxt_check_fw_health(void *arg)
 	bp->flags |= BNXT_FLAG_FW_RESET;
 
 	PMD_DRV_LOG(ERR, "Detected FW dead condition\n");
+
+	if (bnxt_is_master_func(bp))
+		wait_msec = info->master_func_wait_period;
+	else
+		wait_msec = info->normal_func_wait_period;
+
+	rte_eal_alarm_set(US_PER_MS * wait_msec,
+			  bnxt_fw_reset_cb, (void *)bp);
 }
 
 void bnxt_schedule_fw_health_check(struct bnxt *bp)
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 350e867bf..bd2cc01e1 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -4782,3 +4782,29 @@ int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp)
 	}
 	return rc;
 }
+
+int bnxt_hwrm_fw_reset(struct bnxt *bp)
+{
+	struct hwrm_fw_reset_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_fw_reset_input req = {0};
+	int rc;
+
+	if (!BNXT_PF(bp))
+		return -EOPNOTSUPP;
+
+	HWRM_PREP(req, FW_RESET, BNXT_USE_KONG(bp));
+
+	req.embedded_proc_type =
+		HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_CHIP;
+	req.selfrst_status =
+		HWRM_FW_RESET_INPUT_SELFRST_STATUS_SELFRSTASAP;
+	req.flags = HWRM_FW_RESET_INPUT_FLAGS_RESET_GRACEFUL;
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req),
+				    BNXT_USE_KONG(bp));
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
+
+	return rc;
+}
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index 44e335507..db25ad591 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -205,4 +205,5 @@ int bnxt_hwrm_tunnel_redirect_info(struct bnxt *bp, uint8_t tun_type,
 int bnxt_hwrm_set_mac(struct bnxt *bp);
 int bnxt_hwrm_if_change(struct bnxt *bp, bool state);
 int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp);
+int bnxt_hwrm_fw_reset(struct bnxt *bp);
 #endif
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH 13/13] net/bnxt: reduce verbosity of logs
  2019-08-22  5:53 [dpdk-dev] [PATCH 00/13] bnxt patchset to support device error recovery Ajit Khaparde
                   ` (11 preceding siblings ...)
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 12/13] net/bnxt: add support for FW reset Ajit Khaparde
@ 2019-08-22  5:54 ` Ajit Khaparde
  12 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-22  5:54 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Lance Richardson, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

When IOMMU is available, EAL picks IOVA as VA as the default IOVA mode.
This causes the bnxt driver to log warning messages saying
"Memzone physical address same as virtual." and "Using rte_mem_virt2iova()"
during load.

Reduce the verbosity of logs to DEBUG.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Lance Richardson <lance.richardson@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt_ethdev.c | 21 +++++++++------------
 drivers/net/bnxt/bnxt_ring.c   |  7 +++----
 drivers/net/bnxt/bnxt_vnic.c   |  7 +++----
 3 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 095395dae..13f1ff6fb 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -3893,10 +3893,9 @@ static int bnxt_alloc_ctx_mem_blk(__rte_unused struct bnxt *bp,
 		memset(mz->addr, 0, mz->len);
 		mz_phys_addr = mz->iova;
 		if ((unsigned long)mz->addr == mz_phys_addr) {
-			PMD_DRV_LOG(WARNING,
-				"Memzone physical address same as virtual.\n");
-			PMD_DRV_LOG(WARNING,
-				    "Using rte_mem_virt2iova()\n");
+			PMD_DRV_LOG(DEBUG,
+				    "physical address same as virtual\n");
+			PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n");
 			mz_phys_addr = rte_mem_virt2iova(mz->addr);
 			if (mz_phys_addr == RTE_BAD_IOVA) {
 				PMD_DRV_LOG(ERR,
@@ -3929,10 +3928,9 @@ static int bnxt_alloc_ctx_mem_blk(__rte_unused struct bnxt *bp,
 	memset(mz->addr, 0, mz->len);
 	mz_phys_addr = mz->iova;
 	if ((unsigned long)mz->addr == mz_phys_addr) {
-		PMD_DRV_LOG(WARNING,
+		PMD_DRV_LOG(DEBUG,
 			    "Memzone physical address same as virtual.\n");
-		PMD_DRV_LOG(WARNING,
-			    "Using rte_mem_virt2iova()\n");
+		PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n");
 		for (sz = 0; sz < mem_size; sz += BNXT_PAGE_SIZE)
 			rte_mem_lock_page(((char *)mz->addr) + sz);
 		mz_phys_addr = rte_mem_virt2iova(mz->addr);
@@ -4120,9 +4118,9 @@ static int bnxt_alloc_stats_mem(struct bnxt *bp)
 	memset(mz->addr, 0, mz->len);
 	mz_phys_addr = mz->iova;
 	if ((unsigned long)mz->addr == mz_phys_addr) {
-		PMD_DRV_LOG(WARNING,
+		PMD_DRV_LOG(DEBUG,
 			    "Memzone physical address same as virtual.\n");
-		PMD_DRV_LOG(WARNING,
+		PMD_DRV_LOG(DEBUG,
 			    "Using rte_mem_virt2iova()\n");
 		mz_phys_addr = rte_mem_virt2iova(mz->addr);
 		if (mz_phys_addr == RTE_BAD_IOVA) {
@@ -4158,10 +4156,9 @@ static int bnxt_alloc_stats_mem(struct bnxt *bp)
 	memset(mz->addr, 0, mz->len);
 	mz_phys_addr = mz->iova;
 	if ((unsigned long)mz->addr == mz_phys_addr) {
-		PMD_DRV_LOG(WARNING,
+		PMD_DRV_LOG(DEBUG,
 			    "Memzone physical address same as virtual\n");
-		PMD_DRV_LOG(WARNING,
-			    "Using rte_mem_virt2iova()\n");
+		PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n");
 		mz_phys_addr = rte_mem_virt2iova(mz->addr);
 		if (mz_phys_addr == RTE_BAD_IOVA) {
 			PMD_DRV_LOG(ERR,
diff --git a/drivers/net/bnxt/bnxt_ring.c b/drivers/net/bnxt/bnxt_ring.c
index f19865c83..2f57e038a 100644
--- a/drivers/net/bnxt/bnxt_ring.c
+++ b/drivers/net/bnxt/bnxt_ring.c
@@ -212,10 +212,9 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
 	mz_phys_addr_base = mz->iova;
 	mz_phys_addr = mz->iova;
 	if ((unsigned long)mz->addr == mz_phys_addr_base) {
-		PMD_DRV_LOG(WARNING,
-			"Memzone physical address same as virtual.\n");
-		PMD_DRV_LOG(WARNING,
-			"Using rte_mem_virt2iova()\n");
+		PMD_DRV_LOG(DEBUG,
+			    "Memzone physical address same as virtual.\n");
+		PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n");
 		for (sz = 0; sz < total_alloc_len; sz += getpagesize())
 			rte_mem_lock_page(((char *)mz->addr) + sz);
 		mz_phys_addr_base = rte_mem_virt2iova(mz->addr);
diff --git a/drivers/net/bnxt/bnxt_vnic.c b/drivers/net/bnxt/bnxt_vnic.c
index 98415633e..9ea99388b 100644
--- a/drivers/net/bnxt/bnxt_vnic.c
+++ b/drivers/net/bnxt/bnxt_vnic.c
@@ -150,10 +150,9 @@ int bnxt_alloc_vnic_attributes(struct bnxt *bp)
 	}
 	mz_phys_addr = mz->iova;
 	if ((unsigned long)mz->addr == mz_phys_addr) {
-		PMD_DRV_LOG(WARNING,
-			"Memzone physical address same as virtual.\n");
-		PMD_DRV_LOG(WARNING,
-			"Using rte_mem_virt2iova()\n");
+		PMD_DRV_LOG(DEBUG,
+			    "Memzone physical address same as virtual.\n");
+		PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n");
 		mz_phys_addr = rte_mem_virt2iova(mz->addr);
 		if (mz_phys_addr == RTE_BAD_IOVA) {
 			PMD_DRV_LOG(ERR,
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [dpdk-dev] [PATCH 01/13] net/bnxt: hsi version update
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 01/13] net/bnxt: hsi version update Ajit Khaparde
@ 2019-08-27 13:51   ` Ferruh Yigit
  2019-08-30 16:35     ` [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery Ajit Khaparde
  0 siblings, 1 reply; 48+ messages in thread
From: Ferruh Yigit @ 2019-08-27 13:51 UTC (permalink / raw)
  To: Ajit Khaparde, dev; +Cc: Kalesh AP, Somnath Kotur

On 8/22/2019 6:53 AM, Ajit Khaparde wrote:
> From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
> 
> Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
> Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
> Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
> ---
>  drivers/net/bnxt/hsi_struct_def_dpdk.h | 137 +++++++++++++++++++++++++
>  1 file changed, 137 insertions(+)

Hi Kalesh,

As far as I can see WRM API version is not changed, only two new structs has
been added which are used later for "FW reset" on 12/13. Can you please give a
little more detail on the commit log?

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [dpdk-dev] [PATCH 02/13] net/bnxt: prevent device access when device is in reset
  2019-08-22  5:53 ` [dpdk-dev] [PATCH 02/13] net/bnxt: prevent device access when device is in reset Ajit Khaparde
@ 2019-08-27 15:00   ` Ferruh Yigit
  0 siblings, 0 replies; 48+ messages in thread
From: Ferruh Yigit @ 2019-08-27 15:00 UTC (permalink / raw)
  To: Ajit Khaparde, dev
  Cc: Kalesh AP, Santoshkumar Karanappa Rastapur, Somnath Kotur

On 8/22/2019 6:53 AM, Ajit Khaparde wrote:
> From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
> 
> Refactor init and uninit functions so that the driver can fail
> the eth_dev_ops callbacks and accessing Tx and Rx queues
> when device is in reset or in error state.
> 
> Transmit and receive queues are freed during reset cleanup and
> reallocated during recovery. So we block all data path handling
> in this state. The eth_dev dev_started field is updated depending
> on the status of the device.
> 
> Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
> Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
> Reviewed-by: Santoshkumar Karanappa Rastapur <santosh.rastapur@broadcom.com>
> Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>

<...>

> @@ -167,6 +167,16 @@ static void bnxt_print_link_info(struct rte_eth_dev *eth_dev);
>  static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu);
>  static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev);
>  
> +int is_bnxt_in_error(struct bnxt *bp)
> +{
> +	if (bp->flags & BNXT_FLAG_FATAL_ERROR)
> +		return -EIO;
> +	if (bp->flags & BNXT_FLAG_FW_RESET)
> +		return -EBUSY;
> +
> +	return 0;
> +}

In this patch 'BNXT_FLAG_FATAL_ERROR' & 'BNXT_FLAG_FW_RESET' are not defined
yet, causing build error [1], can you please move the definitions in this patch?

[1]
.../dpdk/drivers/net/bnxt/bnxt_ethdev.c:172:18: error: use of undeclared
identifier 'BNXT_FLAG_FATAL_ERROR'
        if (bp->flags & BNXT_FLAG_FATAL_ERROR)
                        ^
.../dpdk/drivers/net/bnxt/bnxt_ethdev.c:174:18: error: use of undeclared
identifier 'BNXT_FLAG_FW_RESET'
        if (bp->flags & BNXT_FLAG_FW_RESET)
                        ^


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery
  2019-08-27 13:51   ` Ferruh Yigit
@ 2019-08-30 16:35     ` Ajit Khaparde
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 01/13] net/bnxt: add FW reset HWRM command Ajit Khaparde
                         ` (13 more replies)
  0 siblings, 14 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-30 16:35 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

This patchset adds support to monitor the health of the firmware and the
underlying device and recover to an operational state in case of error.
We can also detect if a FW upgrade is in progress and quiesce all
access to the device and recover once FW indicates everything is ready.

Patchset against dpdk-next-net. Please apply.

Kalesh AP (13):
  net/bnxt: add FW reset HWRM command
  net/bnxt: prevent device access when device is in reset
  net/bnxt: handle reset notify async event from FW
  net/bnxt: inform firmware about IF state changes
  net/bnxt: handle fatal event from FW under error conditions
  net/bnxt: query firmware error recovery capabilities
  net/bnxt: map status registers for FW health monitoring
  net/bnxt: advertise error recovery capability and handle async event
  net/bnxt: add code for periodic FW health monitoring
  net/bnxt: add support for FW reset
  net/bnxt: reduce verbosity of logs
  net/bnxt: use BIT macro instead of bit fields
  net/bnxt: avoid null pointer dereference

 drivers/net/bnxt/bnxt.h                | 130 +++-
 drivers/net/bnxt/bnxt_cpr.c            |  80 +++
 drivers/net/bnxt/bnxt_cpr.h            |  18 +
 drivers/net/bnxt/bnxt_ethdev.c         | 815 ++++++++++++++++++++-----
 drivers/net/bnxt/bnxt_hwrm.c           | 200 +++++-
 drivers/net/bnxt/bnxt_hwrm.h           |   7 +
 drivers/net/bnxt/bnxt_ring.c           |  45 +-
 drivers/net/bnxt/bnxt_ring.h           |   1 +
 drivers/net/bnxt/bnxt_rxq.c            |  25 +
 drivers/net/bnxt/bnxt_rxr.c            |  17 +
 drivers/net/bnxt/bnxt_rxr.h            |   2 +
 drivers/net/bnxt/bnxt_stats.c          |  34 +-
 drivers/net/bnxt/bnxt_txq.c            |   7 +
 drivers/net/bnxt/bnxt_txr.c            |  27 +
 drivers/net/bnxt/bnxt_txr.h            |   2 +
 drivers/net/bnxt/bnxt_util.h           |   4 +
 drivers/net/bnxt/bnxt_vnic.c           |   7 +-
 drivers/net/bnxt/hsi_struct_def_dpdk.h | 137 +++++
 18 files changed, 1343 insertions(+), 215 deletions(-)

-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v2 01/13] net/bnxt: add FW reset HWRM command
  2019-08-30 16:35     ` [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery Ajit Khaparde
@ 2019-08-30 16:35       ` Ajit Khaparde
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 02/13] net/bnxt: prevent device access when device is in reset Ajit Khaparde
                         ` (12 subsequent siblings)
  13 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-30 16:35 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

This patch adds new FW reset HWRM command.
This command allows the host software to reset the underlying hardware
if a device error is detected.
Code using this command will be added in future patch.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/hsi_struct_def_dpdk.h | 137 +++++++++++++++++++++++++
 1 file changed, 137 insertions(+)

diff --git a/drivers/net/bnxt/hsi_struct_def_dpdk.h b/drivers/net/bnxt/hsi_struct_def_dpdk.h
index 6c98c1d6d..009571725 100644
--- a/drivers/net/bnxt/hsi_struct_def_dpdk.h
+++ b/drivers/net/bnxt/hsi_struct_def_dpdk.h
@@ -33621,4 +33621,141 @@ struct hwrm_nvm_validate_option_cmd_err {
 	uint8_t	unused_0[7];
 } __attribute__((packed));
 
+/*****************
+ * hwrm_fw_reset *
+ ******************/
+
+
+/* hwrm_fw_reset_input (size:192b/24B) */
+struct hwrm_fw_reset_input {
+	/* The HWRM command request type. */
+	uint16_t        req_type;
+	/*
+	 * The completion ring to send the completion event on. This should
+	 * be the NQ ID returned from the `nq_alloc` HWRM command.
+	 */
+	uint16_t        cmpl_ring;
+	/*
+	 * The sequence ID is used by the driver for tracking multiple
+	 * commands. This ID is treated as opaque data by the firmware and
+	 * the value is returned in the `hwrm_resp_hdr` upon completion.
+	 */
+	uint16_t        seq_id;
+	/*
+	 * The target ID of the command:
+	 * * 0x0-0xFFF8 - The function ID
+	 * * 0xFFF8-0xFFFE - Reserved for internal processors
+	 * * 0xFFFF - HWRM
+	 */
+	uint16_t        target_id;
+	/*
+	 * A physical address pointer pointing to a host buffer that the
+	 * command's response data will be written. This can be either a host
+	 * physical address (HPA) or a guest physical address (GPA) and must
+	 * point to a physically contiguous block of memory.
+	 */
+	uint64_t        resp_addr;
+	/* Type of embedded processor. */
+	uint8_t embedded_proc_type;
+	/* Boot Processor */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_BOOT \
+		UINT32_C(0x0)
+	/* Management Processor */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_MGMT \
+		UINT32_C(0x1)
+	/* Network control processor */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_NETCTRL \
+		UINT32_C(0x2)
+	/* RoCE control processor */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_ROCE \
+		UINT32_C(0x3)
+	/*
+	 * Host (in multi-host environment): This is only valid if requester is IPC.
+	 * Reinit host hardware resources and PCIe.
+	 */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_HOST \
+		UINT32_C(0x4)
+	/* AP processor complex (in multi-host environment). Use host_idx to control which core is reset */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_AP \
+		UINT32_C(0x5)
+	/* Reset all blocks of the chip (including all processors) */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_CHIP \
+		UINT32_C(0x6)
+	/*
+	 * Host (in multi-host environment): This is only valid if requester is IPC.
+	 * Reinit host hardware resources.
+	 */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_HOST_RESOURCE_REINIT \
+		UINT32_C(0x7)
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_LAST \
+		HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_HOST_RESOURCE_REINIT
+	/* Type of self reset. */
+	uint8_t selfrst_status;
+	/* No Self Reset */
+	#define HWRM_FW_RESET_INPUT_SELFRST_STATUS_SELFRSTNONE \
+		UINT32_C(0x0)
+	/* Self Reset as soon as possible to do so safely */
+	#define HWRM_FW_RESET_INPUT_SELFRST_STATUS_SELFRSTASAP \
+		UINT32_C(0x1)
+	/* Self Reset on PCIe Reset */
+	#define HWRM_FW_RESET_INPUT_SELFRST_STATUS_SELFRSTPCIERST \
+		UINT32_C(0x2)
+	/* Self Reset immediately after notification to all clients. */
+	#define HWRM_FW_RESET_INPUT_SELFRST_STATUS_SELFRSTIMMEDIATE \
+		UINT32_C(0x3)
+	#define HWRM_FW_RESET_INPUT_SELFRST_STATUS_LAST \
+		HWRM_FW_RESET_INPUT_SELFRST_STATUS_SELFRSTIMMEDIATE
+	/*
+	 * Indicate which host is being reset. 0 means first host.
+	 * Only valid when embedded_proc_type is host in multihost
+	 * environment
+	 */
+	uint8_t host_idx;
+	uint8_t flags;
+	/*
+	 * When this bit is '1', then the core firmware initiates
+	 * the reset only after graceful shut down of all registered instances.
+	 * If not, the device will continue with the existing firmware.
+	 */
+	#define HWRM_FW_RESET_INPUT_FLAGS_RESET_GRACEFUL     UINT32_C(0x1)
+	uint8_t unused_0[4];
+} __attribute__((packed));
+
+/* hwrm_fw_reset_output (size:128b/16B) */
+struct hwrm_fw_reset_output {
+	/* The specific error status for the command. */
+	uint16_t        error_code;
+	/* The HWRM command request type. */
+	uint16_t        req_type;
+	/* The sequence ID from the original command. */
+	uint16_t        seq_id;
+	/* The length of the response data in number of bytes. */
+	uint16_t        resp_len;
+	/* Type of self reset. */
+	uint8_t selfrst_status;
+	/* No Self Reset */
+	#define HWRM_FW_RESET_OUTPUT_SELFRST_STATUS_SELFRSTNONE \
+		UINT32_C(0x0)
+	/* Self Reset as soon as possible to do so safely */
+	#define HWRM_FW_RESET_OUTPUT_SELFRST_STATUS_SELFRSTASAP \
+		UINT32_C(0x1)
+	/* Self Reset on PCIe Reset */
+	#define HWRM_FW_RESET_OUTPUT_SELFRST_STATUS_SELFRSTPCIERST \
+		UINT32_C(0x2)
+	/* Self Reset immediately after notification to all clients. */
+	#define HWRM_FW_RESET_OUTPUT_SELFRST_STATUS_SELFRSTIMMEDIATE \
+		UINT32_C(0x3)
+	#define HWRM_FW_RESET_OUTPUT_SELFRST_STATUS_LAST \
+		HWRM_FW_RESET_OUTPUT_SELFRST_STATUS_SELFRSTIMMEDIATE
+	uint8_t unused_0[6];
+	/*
+	 * This field is used in Output records to indicate that the output
+	 * is completely written to RAM.  This field should be read as '1'
+	 * to indicate that the output has been completely written.
+	 * When writing a command completion or response to an internal processor,
+	 * the order of writes has to be such that this field is written last.
+	 */
+	uint8_t valid;
+} __attribute__((packed));
+
 #endif /* _HSI_STRUCT_DEF_DPDK_H_ */
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v2 02/13] net/bnxt: prevent device access when device is in reset
  2019-08-30 16:35     ` [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery Ajit Khaparde
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 01/13] net/bnxt: add FW reset HWRM command Ajit Khaparde
@ 2019-08-30 16:35       ` Ajit Khaparde
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 03/13] net/bnxt: handle reset notify async event from FW Ajit Khaparde
                         ` (11 subsequent siblings)
  13 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-30 16:35 UTC (permalink / raw)
  To: dev
  Cc: ferruh.yigit, Kalesh AP, Santoshkumar Karanappa Rastapur, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

Refactor init and uninit functions so that the driver can fail
the eth_dev_ops callbacks and accessing Tx and Rx queues
when device is in reset or in error state.

Transmit and receive queues are freed during reset cleanup and
reallocated during recovery. So we block all data path handling
in this state. The eth_dev dev_started field is updated depending
on the status of the device.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Santoshkumar Karanappa Rastapur <santosh.rastapur@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
---
 drivers/net/bnxt/bnxt.h        |   3 +
 drivers/net/bnxt/bnxt_ethdev.c | 455 ++++++++++++++++++++++-----------
 drivers/net/bnxt/bnxt_hwrm.c   |   2 -
 drivers/net/bnxt/bnxt_ring.c   |  32 +++
 drivers/net/bnxt/bnxt_ring.h   |   1 +
 drivers/net/bnxt/bnxt_rxq.c    |  25 ++
 drivers/net/bnxt/bnxt_rxr.c    |  17 ++
 drivers/net/bnxt/bnxt_rxr.h    |   2 +
 drivers/net/bnxt/bnxt_stats.c  |  34 ++-
 drivers/net/bnxt/bnxt_txq.c    |   7 +
 drivers/net/bnxt/bnxt_txr.c    |  27 ++
 drivers/net/bnxt/bnxt_txr.h    |   2 +
 12 files changed, 454 insertions(+), 153 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 0c9f994ea..37b4c717d 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -358,6 +358,8 @@ struct bnxt {
 #define BNXT_FLAG_DFLT_VNIC_SET	(1 << 12)
 #define BNXT_FLAG_THOR_CHIP	(1 << 13)
 #define BNXT_FLAG_STINGRAY	(1 << 14)
+#define BNXT_FLAG_FW_RESET	(1 << 15)
+#define BNXT_FLAG_FATAL_ERROR	(1 << 16)
 #define BNXT_FLAG_EXT_STATS_SUPPORTED	(1 << 29)
 #define BNXT_FLAG_NEW_RM	(1 << 30)
 #define BNXT_FLAG_INIT_DONE	(1U << 31)
@@ -465,6 +467,7 @@ struct bnxt {
 
 int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete);
 int bnxt_rcv_msg_from_vf(struct bnxt *bp, uint16_t vf_id, void *msg);
+int is_bnxt_in_error(struct bnxt *bp);
 
 bool is_bnxt_supported(struct rte_eth_dev *dev);
 bool bnxt_stratus_device(struct bnxt *bp);
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 6685ee7d9..33ff4a5a7 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -167,6 +167,16 @@ static void bnxt_print_link_info(struct rte_eth_dev *eth_dev);
 static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu);
 static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev);
 
+int is_bnxt_in_error(struct bnxt *bp)
+{
+	if (bp->flags & BNXT_FLAG_FATAL_ERROR)
+		return -EIO;
+	if (bp->flags & BNXT_FLAG_FW_RESET)
+		return -EBUSY;
+
+	return 0;
+}
+
 /***********************/
 
 /*
@@ -207,6 +217,10 @@ static int bnxt_alloc_mem(struct bnxt *bp)
 {
 	int rc;
 
+	rc = bnxt_alloc_ring_grps(bp);
+	if (rc)
+		goto alloc_mem_err;
+
 	rc = bnxt_alloc_async_ring_struct(bp);
 	if (rc)
 		goto alloc_mem_err;
@@ -501,6 +515,9 @@ static void bnxt_dev_info_get_op(struct rte_eth_dev *eth_dev,
 	uint16_t max_vnics, i, j, vpool, vrxq;
 	unsigned int max_rx_rings;
 
+	if (is_bnxt_in_error(bp))
+		return;
+
 	/* MAC Specifics */
 	dev_info->max_mac_addrs = bp->max_l2_ctx;
 	dev_info->max_hash_mac_addrs = 0;
@@ -602,6 +619,10 @@ static int bnxt_dev_configure_op(struct rte_eth_dev *eth_dev)
 	bp->tx_nr_rings = eth_dev->data->nb_tx_queues;
 	bp->rx_nr_rings = eth_dev->data->nb_rx_queues;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (BNXT_VF(bp) && (bp->flags & BNXT_FLAG_NEW_RM)) {
 		rc = bnxt_hwrm_check_vf_rings(bp);
 		if (rc) {
@@ -791,8 +812,10 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
 
 	eth_dev->rx_pkt_burst = bnxt_receive_function(eth_dev);
 	eth_dev->tx_pkt_burst = bnxt_transmit_function(eth_dev);
+
 	bnxt_enable_int(bp);
 	bp->flags |= BNXT_FLAG_INIT_DONE;
+	eth_dev->data->dev_started = 1;
 	bp->dev_stopped = 0;
 	return 0;
 
@@ -835,6 +858,11 @@ static void bnxt_dev_stop_op(struct rte_eth_dev *eth_dev)
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 
+	eth_dev->data->dev_started = 0;
+	/* Prevent crashes when queues are still in use */
+	eth_dev->rx_pkt_burst = &bnxt_dummy_recv_pkts;
+	eth_dev->tx_pkt_burst = &bnxt_dummy_xmit_pkts;
+
 	bnxt_disable_int(bp);
 
 	/* disable uio/vfio intr/eventfd mapping */
@@ -889,6 +917,9 @@ static void bnxt_mac_addr_remove_op(struct rte_eth_dev *eth_dev,
 	struct bnxt_filter_info *filter, *temp_filter;
 	uint32_t i;
 
+	if (is_bnxt_in_error(bp))
+		return;
+
 	/*
 	 * Loop through all VNICs from the specified filter flow pools to
 	 * remove the corresponding MAC addr filter
@@ -924,6 +955,10 @@ static int bnxt_mac_addr_add_op(struct rte_eth_dev *eth_dev,
 	struct bnxt_filter_info *filter;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (BNXT_VF(bp) & !BNXT_VF_IS_TRUSTED(bp)) {
 		PMD_DRV_LOG(ERR, "Cannot add MAC address to a VF interface\n");
 		return -ENOTSUP;
@@ -969,6 +1004,10 @@ int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete)
 	struct rte_eth_link new;
 	unsigned int cnt = BNXT_LINK_WAIT_CNT;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	memset(&new, 0, sizeof(new));
 	do {
 		/* Retrieve link info from hardware */
@@ -1009,6 +1048,9 @@ static void bnxt_promiscuous_enable_op(struct rte_eth_dev *eth_dev)
 	struct bnxt *bp = eth_dev->data->dev_private;
 	struct bnxt_vnic_info *vnic;
 
+	if (is_bnxt_in_error(bp))
+		return;
+
 	if (bp->vnic_info == NULL)
 		return;
 
@@ -1023,6 +1065,9 @@ static void bnxt_promiscuous_disable_op(struct rte_eth_dev *eth_dev)
 	struct bnxt *bp = eth_dev->data->dev_private;
 	struct bnxt_vnic_info *vnic;
 
+	if (is_bnxt_in_error(bp))
+		return;
+
 	if (bp->vnic_info == NULL)
 		return;
 
@@ -1037,6 +1082,9 @@ static void bnxt_allmulticast_enable_op(struct rte_eth_dev *eth_dev)
 	struct bnxt *bp = eth_dev->data->dev_private;
 	struct bnxt_vnic_info *vnic;
 
+	if (is_bnxt_in_error(bp))
+		return;
+
 	if (bp->vnic_info == NULL)
 		return;
 
@@ -1051,6 +1099,9 @@ static void bnxt_allmulticast_disable_op(struct rte_eth_dev *eth_dev)
 	struct bnxt *bp = eth_dev->data->dev_private;
 	struct bnxt_vnic_info *vnic;
 
+	if (is_bnxt_in_error(bp))
+		return;
+
 	if (bp->vnic_info == NULL)
 		return;
 
@@ -1100,7 +1151,11 @@ static int bnxt_reta_update_op(struct rte_eth_dev *eth_dev,
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
 	uint16_t tbl_size = bnxt_rss_hash_tbl_size(bp);
 	uint16_t idx, sft;
-	int i;
+	int i, rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	if (!vnic->rss_table)
 		return -EINVAL;
@@ -1156,6 +1211,11 @@ static int bnxt_reta_query_op(struct rte_eth_dev *eth_dev,
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
 	uint16_t tbl_size = bnxt_rss_hash_tbl_size(bp);
 	uint16_t idx, sft, i;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	/* Retrieve from the default VNIC */
 	if (!vnic)
@@ -1202,6 +1262,11 @@ static int bnxt_rss_hash_update_op(struct rte_eth_dev *eth_dev,
 	struct bnxt_vnic_info *vnic;
 	uint16_t hash_type = 0;
 	unsigned int i;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	/*
 	 * If RSS enablement were different than dev_configure,
@@ -1255,9 +1320,13 @@ static int bnxt_rss_hash_conf_get_op(struct rte_eth_dev *eth_dev,
 {
 	struct bnxt *bp = eth_dev->data->dev_private;
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
-	int len;
+	int len, rc;
 	uint32_t hash_types;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	/* RSS configuration is the same for all VNICs */
 	if (vnic && vnic->rss_hash_key) {
 		if (rss_conf->rss_key) {
@@ -1315,6 +1384,10 @@ static int bnxt_flow_ctrl_get_op(struct rte_eth_dev *dev,
 	struct rte_eth_link link_info;
 	int rc;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	rc = bnxt_get_hwrm_link_config(bp, &link_info);
 	if (rc)
 		return rc;
@@ -1344,6 +1417,11 @@ static int bnxt_flow_ctrl_set_op(struct rte_eth_dev *dev,
 			       struct rte_eth_fc_conf *fc_conf)
 {
 	struct bnxt *bp = dev->data->dev_private;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	if (!BNXT_SINGLE_PF(bp) || BNXT_VF(bp)) {
 		PMD_DRV_LOG(ERR, "Flow Control Settings cannot be modified\n");
@@ -1403,6 +1481,10 @@ bnxt_udp_tunnel_port_add_op(struct rte_eth_dev *eth_dev,
 	uint16_t tunnel_type = 0;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	switch (udp_tunnel->prot_type) {
 	case RTE_TUNNEL_TYPE_VXLAN:
 		if (bp->vxlan_port_cnt) {
@@ -1452,6 +1534,10 @@ bnxt_udp_tunnel_port_del_op(struct rte_eth_dev *eth_dev,
 	uint16_t port = 0;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	switch (udp_tunnel->prot_type) {
 	case RTE_TUNNEL_TYPE_VXLAN:
 		if (!bp->vxlan_port_cnt) {
@@ -1605,6 +1691,11 @@ static int bnxt_vlan_filter_set_op(struct rte_eth_dev *eth_dev,
 		uint16_t vlan_id, int on)
 {
 	struct bnxt *bp = eth_dev->data->dev_private;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	/* These operations apply to ALL existing MAC/VLAN filters */
 	if (on)
@@ -1619,6 +1710,11 @@ bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask)
 	struct bnxt *bp = dev->data->dev_private;
 	uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads;
 	unsigned int i;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	if (mask & ETH_VLAN_FILTER_MASK) {
 		if (!(rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER)) {
@@ -1660,6 +1756,10 @@ bnxt_set_default_mac_addr_op(struct rte_eth_dev *dev,
 	struct bnxt_filter_info *filter;
 	int rc;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (BNXT_VF(bp) && !BNXT_VF_IS_TRUSTED(bp))
 		return -EPERM;
 
@@ -1699,6 +1799,11 @@ bnxt_dev_set_mc_addr_list_op(struct rte_eth_dev *eth_dev,
 	char *mc_addr_list = (char *)mc_addr_set;
 	struct bnxt_vnic_info *vnic;
 	uint32_t off = 0, i = 0;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	vnic = &bp->vnic_info[0];
 
@@ -1784,6 +1889,10 @@ static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu)
 	uint32_t rc = 0;
 	uint32_t i;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	new_pkt_size = new_mtu + RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN +
 		       VLAN_TAG_SIZE * BNXT_NUM_VLANS;
 
@@ -1857,6 +1966,10 @@ bnxt_vlan_pvid_set_op(struct rte_eth_dev *dev, uint16_t pvid, int on)
 	uint16_t vlan = bp->vlan;
 	int rc;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (!BNXT_SINGLE_PF(bp) || BNXT_VF(bp)) {
 		PMD_DRV_LOG(ERR,
 			"PVID cannot be modified for this function\n");
@@ -1874,6 +1987,11 @@ static int
 bnxt_dev_led_on_op(struct rte_eth_dev *dev)
 {
 	struct bnxt *bp = dev->data->dev_private;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	return bnxt_hwrm_port_led_cfg(bp, true);
 }
@@ -1882,6 +2000,11 @@ static int
 bnxt_dev_led_off_op(struct rte_eth_dev *dev)
 {
 	struct bnxt *bp = dev->data->dev_private;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	return bnxt_hwrm_port_led_cfg(bp, false);
 }
@@ -1889,6 +2012,7 @@ bnxt_dev_led_off_op(struct rte_eth_dev *dev)
 static uint32_t
 bnxt_rx_queue_count_op(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
+	struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
 	uint32_t desc = 0, raw_cons = 0, cons;
 	struct bnxt_cp_ring_info *cpr;
 	struct bnxt_rx_queue *rxq;
@@ -1896,6 +2020,11 @@ bnxt_rx_queue_count_op(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 	uint16_t cmp_type;
 	uint8_t cmp = 1;
 	bool valid;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	rxq = dev->data->rx_queues[rx_queue_id];
 	cpr = rxq->cp_ring;
@@ -1940,10 +2069,15 @@ bnxt_rx_descriptor_status_op(void *rx_queue, uint16_t offset)
 	struct bnxt_sw_rx_bd *rx_buf;
 	struct rx_pkt_cmpl *rxcmp;
 	uint32_t cons, cp_cons;
+	int rc;
 
 	if (!rxq)
 		return -EINVAL;
 
+	rc = is_bnxt_in_error(rxq->bp);
+	if (rc)
+		return rc;
+
 	cpr = rxq->cp_ring;
 	rxr = rxq->rx_ring;
 
@@ -1978,10 +2112,15 @@ bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset)
 	struct bnxt_sw_tx_bd *tx_buf;
 	struct tx_pkt_cmpl *txcmp;
 	uint32_t cons, cp_cons;
+	int rc;
 
 	if (!txq)
 		return -EINVAL;
 
+	rc = is_bnxt_in_error(txq->bp);
+	if (rc)
+		return rc;
+
 	cpr = txq->cp_ring;
 	txr = txq->tx_ring;
 
@@ -2811,6 +2950,10 @@ bnxt_filter_ctrl_op(struct rte_eth_dev *dev __rte_unused,
 {
 	int ret = 0;
 
+	ret = is_bnxt_in_error(dev->data->dev_private);
+	if (ret)
+		return ret;
+
 	switch (filter_type) {
 	case RTE_ETH_FILTER_TUNNEL:
 		PMD_DRV_LOG(ERR,
@@ -3126,6 +3269,10 @@ bnxt_get_eeprom_length_op(struct rte_eth_dev *dev)
 	uint32_t dir_entries;
 	uint32_t entry_length;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	PMD_DRV_LOG(INFO, "%04x:%02x:%02x:%02x\n",
 		bp->pdev->addr.domain, bp->pdev->addr.bus,
 		bp->pdev->addr.devid, bp->pdev->addr.function);
@@ -3144,6 +3291,11 @@ bnxt_get_eeprom_op(struct rte_eth_dev *dev,
 	struct bnxt *bp = dev->data->dev_private;
 	uint32_t index;
 	uint32_t offset;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	PMD_DRV_LOG(INFO, "%04x:%02x:%02x:%02x in_eeprom->offset = %d "
 		"len = %d\n", bp->pdev->addr.domain,
@@ -3215,6 +3367,11 @@ bnxt_set_eeprom_op(struct rte_eth_dev *dev,
 	struct bnxt *bp = dev->data->dev_private;
 	uint8_t index, dir_op;
 	uint16_t type, ext, ordinal, attr;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	PMD_DRV_LOG(INFO, "%04x:%02x:%02x:%02x in_eeprom->offset = %d "
 		"len = %d\n", bp->pdev->addr.domain,
@@ -3768,19 +3925,139 @@ static int bnxt_setup_mac_addr(struct rte_eth_dev *eth_dev)
 	return rc;
 }
 
+static void bnxt_config_vf_req_fwd(struct bnxt *bp)
+{
+	if (!BNXT_PF(bp))
+		return;
+
 #define ALLOW_FUNC(x)	\
 	{ \
 		uint32_t arg = (x); \
 		bp->pf.vf_req_fwd[((arg) >> 5)] &= \
 		~rte_cpu_to_le_32(1 << ((arg) & 0x1f)); \
 	}
+
+	/* Forward all requests if firmware is new enough */
+	if (((bp->fw_ver >= ((20 << 24) | (6 << 16) | (100 << 8))) &&
+	     (bp->fw_ver < ((20 << 24) | (7 << 16)))) ||
+	    ((bp->fw_ver >= ((20 << 24) | (8 << 16))))) {
+		memset(bp->pf.vf_req_fwd, 0xff, sizeof(bp->pf.vf_req_fwd));
+	} else {
+		PMD_DRV_LOG(WARNING,
+			    "Firmware too old for VF mailbox functionality\n");
+		memset(bp->pf.vf_req_fwd, 0, sizeof(bp->pf.vf_req_fwd));
+	}
+
+	/*
+	 * The following are used for driver cleanup. If we disallow these,
+	 * VF drivers can't clean up cleanly.
+	 */
+	ALLOW_FUNC(HWRM_FUNC_DRV_UNRGTR);
+	ALLOW_FUNC(HWRM_VNIC_FREE);
+	ALLOW_FUNC(HWRM_RING_FREE);
+	ALLOW_FUNC(HWRM_RING_GRP_FREE);
+	ALLOW_FUNC(HWRM_VNIC_RSS_COS_LB_CTX_FREE);
+	ALLOW_FUNC(HWRM_CFA_L2_FILTER_FREE);
+	ALLOW_FUNC(HWRM_STAT_CTX_FREE);
+	ALLOW_FUNC(HWRM_PORT_PHY_QCFG);
+	ALLOW_FUNC(HWRM_VNIC_TPA_CFG);
+}
+
+static int bnxt_init_fw(struct bnxt *bp)
+{
+	uint16_t mtu;
+	int rc = 0;
+
+	rc = bnxt_hwrm_ver_get(bp);
+	if (rc)
+		return rc;
+
+	rc = bnxt_hwrm_func_reset(bp);
+	if (rc)
+		return -EIO;
+
+	rc = bnxt_hwrm_queue_qportcfg(bp);
+	if (rc)
+		return rc;
+
+	/* Get the MAX capabilities for this function */
+	rc = bnxt_hwrm_func_qcaps(bp);
+	if (rc)
+		return rc;
+
+	rc = bnxt_hwrm_func_qcfg(bp, &mtu);
+	if (rc)
+		return rc;
+
+	if (mtu >= RTE_ETHER_MIN_MTU && mtu <= BNXT_MAX_MTU &&
+	    mtu != bp->eth_dev->data->mtu)
+		bp->eth_dev->data->mtu = mtu;
+
+	bnxt_hwrm_port_led_qcaps(bp);
+
+	return 0;
+}
+
+static int bnxt_init_resources(struct bnxt *bp)
+{
+	int rc;
+
+	rc = bnxt_init_fw(bp);
+	if (rc)
+		return rc;
+
+	rc = bnxt_setup_mac_addr(bp->eth_dev);
+	if (rc)
+		return rc;
+
+	bnxt_config_vf_req_fwd(bp);
+
+	rc = bnxt_hwrm_func_driver_register(bp);
+	if (rc) {
+		PMD_DRV_LOG(ERR, "Failed to register driver");
+		return -EBUSY;
+	}
+
+	if (BNXT_PF(bp)) {
+		if (bp->pdev->max_vfs) {
+			rc = bnxt_hwrm_allocate_vfs(bp, bp->pdev->max_vfs);
+			if (rc) {
+				PMD_DRV_LOG(ERR, "Failed to allocate VFs\n");
+				return rc;
+			}
+		} else {
+			rc = bnxt_hwrm_allocate_pf_only(bp);
+			if (rc) {
+				PMD_DRV_LOG(ERR,
+					    "Failed to allocate PF resources");
+				return rc;
+			}
+		}
+	}
+
+	rc = bnxt_alloc_mem(bp);
+	if (rc)
+		return rc;
+
+	rc = bnxt_setup_int(bp);
+	if (rc)
+		return rc;
+
+	bnxt_init_nic(bp);
+
+	rc = bnxt_request_int(bp);
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
 static int
 bnxt_dev_init(struct rte_eth_dev *eth_dev)
 {
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 	static int version_printed;
 	struct bnxt *bp;
-	uint16_t mtu;
 	int rc;
 
 	if (version_printed++ == 0)
@@ -3822,166 +4099,50 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
 	rc = bnxt_init_board(eth_dev);
 	if (rc) {
 		PMD_DRV_LOG(ERR,
-			"Board initialization failed rc: %x\n", rc);
-		goto error;
+			    "Failed to initialize board rc: %x\n", rc);
+		return rc;
 	}
 
 	rc = bnxt_alloc_hwrm_resources(bp);
 	if (rc) {
 		PMD_DRV_LOG(ERR,
-			"hwrm resource allocation failure rc: %x\n", rc);
+			    "Failed to allocate hwrm resource rc: %x\n", rc);
 		goto error_free;
 	}
-	rc = bnxt_hwrm_ver_get(bp);
+	rc = bnxt_init_resources(bp);
 	if (rc)
 		goto error_free;
 
-	rc = bnxt_hwrm_func_reset(bp);
-	if (rc) {
-		PMD_DRV_LOG(ERR, "hwrm chip reset failure rc: %x\n", rc);
-		rc = -EIO;
-		goto error_free;
-	}
-
-	rc = bnxt_hwrm_queue_qportcfg(bp);
-	if (rc) {
-		PMD_DRV_LOG(ERR, "hwrm queue qportcfg failed\n");
-		goto error_free;
-	}
-	/* Get the MAX capabilities for this function */
-	rc = bnxt_hwrm_func_qcaps(bp);
-	if (rc) {
-		PMD_DRV_LOG(ERR, "hwrm query capability failure rc: %x\n", rc);
-		goto error_free;
-	}
-
 	rc = bnxt_alloc_stats_mem(bp);
 	if (rc)
 		goto error_free;
 
-	if (bp->max_tx_rings == 0) {
-		PMD_DRV_LOG(ERR, "No TX rings available!\n");
-		rc = -EBUSY;
-		goto error_free;
-	}
-
-	rc = bnxt_setup_mac_addr(eth_dev);
-	if (rc)
-		goto error_free;
-
-	/* THOR does not support ring groups.
-	 * But we will use the array to save RSS context IDs.
-	 */
-	if (BNXT_CHIP_THOR(bp)) {
-		bp->max_ring_grps = BNXT_MAX_RSS_CTXTS_THOR;
-	} else if (bp->max_ring_grps < bp->rx_cp_nr_rings) {
-		/* 1 ring is for default completion ring */
-		PMD_DRV_LOG(ERR, "Insufficient resource: Ring Group\n");
-		rc = -ENOSPC;
-		goto error_free;
-	}
-
-	if (BNXT_HAS_RING_GRPS(bp)) {
-		bp->grp_info = rte_zmalloc("bnxt_grp_info",
-					sizeof(*bp->grp_info) *
-						bp->max_ring_grps, 0);
-		if (!bp->grp_info) {
-			PMD_DRV_LOG(ERR,
-				"Failed to alloc %zu bytes for grp info tbl.\n",
-				sizeof(*bp->grp_info) * bp->max_ring_grps);
-			rc = -ENOMEM;
-			goto error_free;
-		}
-	}
-
-	/* Forward all requests if firmware is new enough */
-	if (((bp->fw_ver >= ((20 << 24) | (6 << 16) | (100 << 8))) &&
-	    (bp->fw_ver < ((20 << 24) | (7 << 16)))) ||
-	    ((bp->fw_ver >= ((20 << 24) | (8 << 16))))) {
-		memset(bp->pf.vf_req_fwd, 0xff, sizeof(bp->pf.vf_req_fwd));
-	} else {
-		PMD_DRV_LOG(WARNING,
-			"Firmware too old for VF mailbox functionality\n");
-		memset(bp->pf.vf_req_fwd, 0, sizeof(bp->pf.vf_req_fwd));
-	}
-
-	/*
-	 * The following are used for driver cleanup.  If we disallow these,
-	 * VF drivers can't clean up cleanly.
-	 */
-	ALLOW_FUNC(HWRM_FUNC_DRV_UNRGTR);
-	ALLOW_FUNC(HWRM_VNIC_FREE);
-	ALLOW_FUNC(HWRM_RING_FREE);
-	ALLOW_FUNC(HWRM_RING_GRP_FREE);
-	ALLOW_FUNC(HWRM_VNIC_RSS_COS_LB_CTX_FREE);
-	ALLOW_FUNC(HWRM_CFA_L2_FILTER_FREE);
-	ALLOW_FUNC(HWRM_STAT_CTX_FREE);
-	ALLOW_FUNC(HWRM_PORT_PHY_QCFG);
-	ALLOW_FUNC(HWRM_VNIC_TPA_CFG);
-	rc = bnxt_hwrm_func_driver_register(bp);
-	if (rc) {
-		PMD_DRV_LOG(ERR,
-			"Failed to register driver");
-		rc = -EBUSY;
-		goto error_free;
-	}
-
 	PMD_DRV_LOG(INFO,
-		DRV_MODULE_NAME " found at mem %" PRIx64 ", node addr %pM\n",
-		pci_dev->mem_resource[0].phys_addr,
-		pci_dev->mem_resource[0].addr);
-
-	rc = bnxt_hwrm_func_qcfg(bp, &mtu);
-	if (rc) {
-		PMD_DRV_LOG(ERR, "hwrm func qcfg failed\n");
-		goto error_free;
-	}
-
-	if (mtu >= RTE_ETHER_MIN_MTU && mtu <= BNXT_MAX_MTU &&
-	    mtu != eth_dev->data->mtu)
-		eth_dev->data->mtu = mtu;
-
-	if (BNXT_PF(bp)) {
-		//if (bp->pf.active_vfs) {
-			// TODO: Deallocate VF resources?
-		//}
-		if (bp->pdev->max_vfs) {
-			rc = bnxt_hwrm_allocate_vfs(bp, bp->pdev->max_vfs);
-			if (rc) {
-				PMD_DRV_LOG(ERR, "Failed to allocate VFs\n");
-				goto error_free;
-			}
-		} else {
-			rc = bnxt_hwrm_allocate_pf_only(bp);
-			if (rc) {
-				PMD_DRV_LOG(ERR,
-					"Failed to allocate PF resources\n");
-				goto error_free;
-			}
-		}
-	}
-
-	bnxt_hwrm_port_led_qcaps(bp);
-
-	rc = bnxt_setup_int(bp);
-	if (rc)
-		goto error_free;
-
-	rc = bnxt_alloc_mem(bp);
-	if (rc)
-		goto error_free;
-
-	bnxt_init_nic(bp);
-
-	rc = bnxt_request_int(bp);
-	if (rc)
-		goto error_free;
+		    DRV_MODULE_NAME "found at mem %" PRIX64 ", node addr %pM\n",
+		    pci_dev->mem_resource[0].phys_addr,
+		    pci_dev->mem_resource[0].addr);
 
 	return 0;
 
 error_free:
 	bnxt_dev_uninit(eth_dev);
-error:
+	return rc;
+}
+
+static int
+bnxt_uninit_resources(struct bnxt *bp)
+{
+	int rc;
+
+	bnxt_disable_int(bp);
+	bnxt_free_int(bp);
+	bnxt_free_mem(bp);
+	bnxt_hwrm_func_buf_unrgtr(bp);
+	rc = bnxt_hwrm_func_driver_unregister(bp, 0);
+	bp->flags &= ~BNXT_FLAG_REGISTERED;
+	bnxt_free_ctx_mem(bp);
+	bnxt_free_hwrm_resources(bp);
+
 	return rc;
 }
 
@@ -3995,18 +4156,13 @@ bnxt_dev_uninit(struct rte_eth_dev *eth_dev)
 		return -EPERM;
 
 	PMD_DRV_LOG(DEBUG, "Calling Device uninit\n");
-	bnxt_disable_int(bp);
-	bnxt_free_int(bp);
-	bnxt_free_mem(bp);
 
-	bnxt_hwrm_func_buf_unrgtr(bp);
+	rc = bnxt_uninit_resources(bp);
 
 	if (bp->grp_info != NULL) {
 		rte_free(bp->grp_info);
 		bp->grp_info = NULL;
 	}
-	rc = bnxt_hwrm_func_driver_unregister(bp, 0);
-	bnxt_free_hwrm_resources(bp);
 
 	if (bp->tx_mem_zone) {
 		rte_memzone_free((const struct rte_memzone *)bp->tx_mem_zone);
@@ -4022,7 +4178,6 @@ bnxt_dev_uninit(struct rte_eth_dev *eth_dev)
 		bnxt_dev_close_op(eth_dev);
 	if (bp->pf.vf_info)
 		rte_free(bp->pf.vf_info);
-	bnxt_free_ctx_mem(bp);
 	eth_dev->dev_ops = NULL;
 	eth_dev->rx_pkt_burst = NULL;
 	eth_dev->tx_pkt_burst = NULL;
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 9883fb506..24a5a0914 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -964,8 +964,6 @@ int bnxt_hwrm_func_driver_unregister(struct bnxt *bp, uint32_t flags)
 	HWRM_CHECK_RESULT();
 	HWRM_UNLOCK();
 
-	bp->flags &= ~BNXT_FLAG_REGISTERED;
-
 	return rc;
 }
 
diff --git a/drivers/net/bnxt/bnxt_ring.c b/drivers/net/bnxt/bnxt_ring.c
index be15b4bd1..f19865c83 100644
--- a/drivers/net/bnxt/bnxt_ring.c
+++ b/drivers/net/bnxt/bnxt_ring.c
@@ -50,6 +50,38 @@ int bnxt_init_ring_grps(struct bnxt *bp)
 	return 0;
 }
 
+int bnxt_alloc_ring_grps(struct bnxt *bp)
+{
+	if (bp->max_tx_rings == 0) {
+		PMD_DRV_LOG(ERR, "No TX rings available!\n");
+		return -EBUSY;
+	}
+
+	/* THOR does not support ring groups.
+	 * But we will use the array to save RSS context IDs.
+	 */
+	if (BNXT_CHIP_THOR(bp)) {
+		bp->max_ring_grps = BNXT_MAX_RSS_CTXTS_THOR;
+	} else if (bp->max_ring_grps < bp->rx_cp_nr_rings) {
+		/* 1 ring is for default completion ring */
+		PMD_DRV_LOG(ERR, "Insufficient resource: Ring Group\n");
+		return -ENOSPC;
+	}
+
+	if (BNXT_HAS_RING_GRPS(bp)) {
+		bp->grp_info = rte_zmalloc("bnxt_grp_info",
+					   sizeof(*bp->grp_info) *
+					   bp->max_ring_grps, 0);
+		if (!bp->grp_info) {
+			PMD_DRV_LOG(ERR,
+				    "Failed to alloc grp info tbl.\n");
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * Allocates a completion ring with vmem and stats optionally also allocating
  * a TX and/or RX ring.  Passing NULL as tx_ring_info and/or rx_ring_info
diff --git a/drivers/net/bnxt/bnxt_ring.h b/drivers/net/bnxt/bnxt_ring.h
index 04c7b04b8..a31d59ea3 100644
--- a/drivers/net/bnxt/bnxt_ring.h
+++ b/drivers/net/bnxt/bnxt_ring.h
@@ -67,6 +67,7 @@ struct bnxt_rx_ring_info;
 struct bnxt_cp_ring_info;
 void bnxt_free_ring(struct bnxt_ring *ring);
 int bnxt_init_ring_grps(struct bnxt *bp);
+int bnxt_alloc_ring_grps(struct bnxt *bp);
 int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
 			    struct bnxt_tx_queue *txq,
 			    struct bnxt_rx_queue *rxq,
diff --git a/drivers/net/bnxt/bnxt_rxq.c b/drivers/net/bnxt/bnxt_rxq.c
index 1d95f1139..d5fc5268d 100644
--- a/drivers/net/bnxt/bnxt_rxq.c
+++ b/drivers/net/bnxt/bnxt_rxq.c
@@ -263,6 +263,9 @@ void bnxt_rx_queue_release_op(void *rx_queue)
 	struct bnxt_rx_queue *rxq = (struct bnxt_rx_queue *)rx_queue;
 
 	if (rxq) {
+		if (is_bnxt_in_error(rxq->bp))
+			return;
+
 		bnxt_rx_queue_release_mbufs(rxq);
 
 		/* Free RX ring hardware descriptors */
@@ -294,6 +297,10 @@ int bnxt_rx_queue_setup_op(struct rte_eth_dev *eth_dev,
 	int rc = 0;
 	uint8_t queue_state;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (queue_idx >= bp->max_rx_rings) {
 		PMD_DRV_LOG(ERR,
 			"Cannot create Rx ring %d. Only %d rings available\n",
@@ -363,10 +370,15 @@ int bnxt_rx_queue_setup_op(struct rte_eth_dev *eth_dev,
 int
 bnxt_rx_queue_intr_enable_op(struct rte_eth_dev *eth_dev, uint16_t queue_id)
 {
+	struct bnxt *bp = eth_dev->data->dev_private;
 	struct bnxt_rx_queue *rxq;
 	struct bnxt_cp_ring_info *cpr;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (eth_dev->data->rx_queues) {
 		rxq = eth_dev->data->rx_queues[queue_id];
 		if (!rxq) {
@@ -382,10 +394,15 @@ bnxt_rx_queue_intr_enable_op(struct rte_eth_dev *eth_dev, uint16_t queue_id)
 int
 bnxt_rx_queue_intr_disable_op(struct rte_eth_dev *eth_dev, uint16_t queue_id)
 {
+	struct bnxt *bp = eth_dev->data->dev_private;
 	struct bnxt_rx_queue *rxq;
 	struct bnxt_cp_ring_info *cpr;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (eth_dev->data->rx_queues) {
 		rxq = eth_dev->data->rx_queues[queue_id];
 		if (!rxq) {
@@ -406,6 +423,10 @@ int bnxt_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 	struct bnxt_vnic_info *vnic = NULL;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (rxq == NULL) {
 		PMD_DRV_LOG(ERR, "Invalid Rx queue %d\n", rx_queue_id);
 		return -EINVAL;
@@ -458,6 +479,10 @@ int bnxt_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 	struct bnxt_rx_queue *rxq = NULL;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	/* For the stingray platform and other platforms needing tighter
 	 * control of resource utilization, Rx CQ 0 also works as
 	 * Default CQ for async notifications
diff --git a/drivers/net/bnxt/bnxt_rxr.c b/drivers/net/bnxt/bnxt_rxr.c
index 185a0e376..12313dd53 100644
--- a/drivers/net/bnxt/bnxt_rxr.c
+++ b/drivers/net/bnxt/bnxt_rxr.c
@@ -539,6 +539,9 @@ uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	int rc = 0;
 	bool evt = false;
 
+	if (unlikely(is_bnxt_in_error(rxq->bp)))
+		return 0;
+
 	/* If Rx Q was stopped return. RxQ0 cannot be stopped. */
 	if (unlikely(((rxq->rx_deferred_start ||
 		       !rte_spinlock_trylock(&rxq->lock)) &&
@@ -625,6 +628,20 @@ uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	return nb_rx_pkts;
 }
 
+/*
+ * Dummy DPDK callback for RX.
+ *
+ * This function is used to temporarily replace the real callback during
+ * unsafe control operations on the queue, or in case of error.
+ */
+uint16_t
+bnxt_dummy_recv_pkts(void *rx_queue __rte_unused,
+		     struct rte_mbuf **rx_pkts __rte_unused,
+		     uint16_t nb_pkts __rte_unused)
+{
+	return 0;
+}
+
 void bnxt_free_rx_rings(struct bnxt *bp)
 {
 	int i;
diff --git a/drivers/net/bnxt/bnxt_rxr.h b/drivers/net/bnxt/bnxt_rxr.h
index 6a80c37c8..493b75406 100644
--- a/drivers/net/bnxt/bnxt_rxr.h
+++ b/drivers/net/bnxt/bnxt_rxr.h
@@ -185,6 +185,8 @@ struct bnxt_rx_ring_info {
 
 uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			       uint16_t nb_pkts);
+uint16_t bnxt_dummy_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+			      uint16_t nb_pkts);
 void bnxt_free_rx_rings(struct bnxt *bp);
 int bnxt_init_rx_ring_struct(struct bnxt_rx_queue *rxq, unsigned int socket_id);
 int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq);
diff --git a/drivers/net/bnxt/bnxt_stats.c b/drivers/net/bnxt/bnxt_stats.c
index 69ac2dd91..79f23746c 100644
--- a/drivers/net/bnxt/bnxt_stats.c
+++ b/drivers/net/bnxt/bnxt_stats.c
@@ -353,6 +353,10 @@ int bnxt_stats_get_op(struct rte_eth_dev *eth_dev,
 	struct bnxt *bp = eth_dev->data->dev_private;
 	unsigned int num_q_stats;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	memset(bnxt_stats, 0, sizeof(*bnxt_stats));
 	if (!(bp->flags & BNXT_FLAG_INIT_DONE)) {
 		PMD_DRV_LOG(ERR, "Device Initialization not complete!\n");
@@ -397,6 +401,9 @@ void bnxt_stats_reset_op(struct rte_eth_dev *eth_dev)
 	struct bnxt *bp = eth_dev->data->dev_private;
 	unsigned int i;
 
+	if (is_bnxt_in_error(bp))
+		return;
+
 	if (!(bp->flags & BNXT_FLAG_INIT_DONE)) {
 		PMD_DRV_LOG(ERR, "Device Initialization not complete!\n");
 		return;
@@ -414,13 +421,17 @@ int bnxt_dev_xstats_get_op(struct rte_eth_dev *eth_dev,
 			   struct rte_eth_xstat *xstats, unsigned int n)
 {
 	struct bnxt *bp = eth_dev->data->dev_private;
-
 	unsigned int count, i;
 	uint64_t tx_drop_pkts;
 	unsigned int rx_port_stats_ext_cnt;
 	unsigned int tx_port_stats_ext_cnt;
 	unsigned int stat_size = sizeof(uint64_t);
 	unsigned int stat_count;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	memset(xstats, 0, sizeof(*xstats));
 
@@ -499,7 +510,13 @@ int bnxt_dev_xstats_get_names_op(__rte_unused struct rte_eth_dev *eth_dev,
 				RTE_DIM(bnxt_tx_stats_strings) + 1 +
 				RTE_DIM(bnxt_rx_ext_stats_strings) +
 				RTE_DIM(bnxt_tx_ext_stats_strings);
+	struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
 	unsigned int i, count;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	if (xstats_names != NULL) {
 		count = 0;
@@ -547,6 +564,9 @@ void bnxt_dev_xstats_reset_op(struct rte_eth_dev *eth_dev)
 {
 	struct bnxt *bp = eth_dev->data->dev_private;
 
+	if (is_bnxt_in_error(bp))
+		return;
+
 	if (bp->flags & BNXT_FLAG_PORT_STATS && BNXT_SINGLE_PF(bp))
 		bnxt_hwrm_port_clr_stats(bp);
 
@@ -566,9 +586,15 @@ int bnxt_dev_xstats_get_by_id_op(struct rte_eth_dev *dev, const uint64_t *ids,
 				RTE_DIM(bnxt_tx_stats_strings) + 1 +
 				RTE_DIM(bnxt_rx_ext_stats_strings) +
 				RTE_DIM(bnxt_tx_ext_stats_strings);
+	struct bnxt *bp = dev->data->dev_private;
 	struct rte_eth_xstat xstats[stat_cnt];
 	uint64_t values_copy[stat_cnt];
 	uint16_t i;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	if (!ids)
 		return bnxt_dev_xstats_get_op(dev, xstats, stat_cnt);
@@ -594,7 +620,13 @@ int bnxt_dev_xstats_get_names_by_id_op(struct rte_eth_dev *dev,
 				RTE_DIM(bnxt_rx_ext_stats_strings) +
 				RTE_DIM(bnxt_tx_ext_stats_strings);
 	struct rte_eth_xstat_name xstats_names_copy[stat_cnt];
+	struct bnxt *bp = dev->data->dev_private;
 	uint16_t i;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	if (!ids)
 		return bnxt_dev_xstats_get_names_op(dev, xstats_names,
diff --git a/drivers/net/bnxt/bnxt_txq.c b/drivers/net/bnxt/bnxt_txq.c
index 43b3496c1..090132479 100644
--- a/drivers/net/bnxt/bnxt_txq.c
+++ b/drivers/net/bnxt/bnxt_txq.c
@@ -58,6 +58,9 @@ void bnxt_tx_queue_release_op(void *tx_queue)
 	struct bnxt_tx_queue *txq = (struct bnxt_tx_queue *)tx_queue;
 
 	if (txq) {
+		if (is_bnxt_in_error(txq->bp))
+			return;
+
 		/* Free TX ring hardware descriptors */
 		bnxt_tx_queue_release_mbufs(txq);
 		bnxt_free_ring(txq->tx_ring->tx_ring_struct);
@@ -84,6 +87,10 @@ int bnxt_tx_queue_setup_op(struct rte_eth_dev *eth_dev,
 	struct bnxt_tx_queue *txq;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (queue_idx >= bp->max_tx_rings) {
 		PMD_DRV_LOG(ERR,
 			"Cannot create Tx ring %d. Only %d rings available\n",
diff --git a/drivers/net/bnxt/bnxt_txr.c b/drivers/net/bnxt/bnxt_txr.c
index c71e6f189..35e7166be 100644
--- a/drivers/net/bnxt/bnxt_txr.c
+++ b/drivers/net/bnxt/bnxt_txr.c
@@ -148,6 +148,9 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
 		TX_BD_LONG_FLAGS_LHINT_LT2K
 	};
 
+	if (unlikely(is_bnxt_in_error(txq->bp)))
+		return -EIO;
+
 	if (tx_pkt->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM |
 				PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM |
 				PKT_TX_VLAN_PKT | PKT_TX_OUTER_IP_CKSUM |
@@ -485,10 +488,29 @@ uint16_t bnxt_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	return nb_tx_pkts;
 }
 
+/*
+ * Dummy DPDK callback for TX.
+ *
+ * This function is used to temporarily replace the real callback during
+ * unsafe control operations on the queue, or in case of error.
+ */
+uint16_t
+bnxt_dummy_xmit_pkts(void *tx_queue __rte_unused,
+		     struct rte_mbuf **tx_pkts __rte_unused,
+		     uint16_t nb_pkts __rte_unused)
+{
+	return 0;
+}
+
 int bnxt_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 {
 	struct bnxt *bp = dev->data->dev_private;
 	struct bnxt_tx_queue *txq = bp->tx_queues[tx_queue_id];
+	int rc = 0;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
 	txq->tx_deferred_start = false;
@@ -501,6 +523,11 @@ int bnxt_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 {
 	struct bnxt *bp = dev->data->dev_private;
 	struct bnxt_tx_queue *txq = bp->tx_queues[tx_queue_id];
+	int rc = 0;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	/* Handle TX completions */
 	bnxt_handle_tx_cp(txq);
diff --git a/drivers/net/bnxt/bnxt_txr.h b/drivers/net/bnxt/bnxt_txr.h
index 08fd2e014..e7f43f9d1 100644
--- a/drivers/net/bnxt/bnxt_txr.h
+++ b/drivers/net/bnxt/bnxt_txr.h
@@ -57,6 +57,8 @@ int bnxt_init_one_tx_ring(struct bnxt_tx_queue *txq);
 int bnxt_init_tx_ring_struct(struct bnxt_tx_queue *txq, unsigned int socket_id);
 uint16_t bnxt_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			       uint16_t nb_pkts);
+uint16_t bnxt_dummy_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+			      uint16_t nb_pkts);
 #ifdef RTE_ARCH_X86
 uint16_t bnxt_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 			    uint16_t nb_pkts);
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v2 03/13] net/bnxt: handle reset notify async event from FW
  2019-08-30 16:35     ` [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery Ajit Khaparde
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 01/13] net/bnxt: add FW reset HWRM command Ajit Khaparde
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 02/13] net/bnxt: prevent device access when device is in reset Ajit Khaparde
@ 2019-08-30 16:35       ` Ajit Khaparde
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 04/13] net/bnxt: inform firmware about IF state changes Ajit Khaparde
                         ` (10 subsequent siblings)
  13 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-30 16:35 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

When the FW upgrade is initiated the current instance
of FW issues a HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY
async notification to the driver. On receiving this notification,
the PMD shall quiesce itself and poll on the HWRM_VER_GET FW
command at regular intervals.

Once the VER_GET command succeeds, the driver should go through
the rediscovery process and re-initialize the device.

Also register with FW for the reset notify async event.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
---
 drivers/net/bnxt/bnxt.h        |  13 ++++
 drivers/net/bnxt/bnxt_cpr.c    |  16 +++++
 drivers/net/bnxt/bnxt_cpr.h    |   1 +
 drivers/net/bnxt/bnxt_ethdev.c | 109 ++++++++++++++++++++++++++++-----
 drivers/net/bnxt/bnxt_hwrm.c   |  39 +++++++++---
 drivers/net/bnxt/bnxt_hwrm.h   |   2 +
 6 files changed, 157 insertions(+), 23 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 37b4c717d..8797b032e 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -333,6 +333,16 @@ struct bnxt_ctx_mem_info {
 	struct bnxt_ctx_pg_info *tqm_mem[BNXT_MAX_TC_Q];
 };
 
+/* Maximum Firmware Reset bail out value in milliseconds */
+#define BNXT_MAX_FW_RESET_TIMEOUT	6000
+/* Minimum time required for the firmware readiness in milliseconds */
+#define BNXT_MIN_FW_READY_TIMEOUT	2000
+/* Frequency for the firmware readiness check in milliseconds */
+#define BNXT_FW_READY_WAIT_INTERVAL	100
+
+#define US_PER_MS			1000
+#define NS_PER_US			1000
+
 #define BNXT_HWRM_SHORT_REQ_LEN		sizeof(struct hwrm_short_input)
 struct bnxt {
 	void				*bar0;
@@ -463,6 +473,9 @@ struct bnxt {
 	struct bnxt_ptp_cfg     *ptp_cfg;
 	uint16_t		vf_resv_strategy;
 	struct bnxt_ctx_mem_info        *ctx;
+
+	uint16_t		fw_reset_min_msecs;
+	uint16_t		fw_reset_max_msecs;
 };
 
 int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete);
diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index 655bcf1a8..62a16d2ed 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -4,6 +4,7 @@
  */
 
 #include <rte_malloc.h>
+#include <rte_alarm.h>
 
 #include "bnxt.h"
 #include "bnxt_cpr.h"
@@ -40,6 +41,21 @@ void bnxt_handle_async_event(struct bnxt *bp,
 	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED:
 		PMD_DRV_LOG(INFO, "Port conn async event\n");
 		break;
+	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY:
+		/* timestamp_lo/hi values are in units of 100ms */
+		bp->fw_reset_max_msecs = async_cmp->timestamp_hi ?
+			rte_le_to_cpu_16(async_cmp->timestamp_hi) * 100 :
+			BNXT_MAX_FW_RESET_TIMEOUT;
+		bp->fw_reset_min_msecs = async_cmp->timestamp_lo ?
+			async_cmp->timestamp_lo * 100 :
+			BNXT_MIN_FW_READY_TIMEOUT;
+		PMD_DRV_LOG(INFO,
+			    "Firmware non-fatal reset event received\n");
+
+		bp->flags |= BNXT_FLAG_FW_RESET;
+		rte_eal_alarm_set(US_PER_MS, bnxt_dev_reset_and_resume,
+				  (void *)bp);
+		break;
 	default:
 		PMD_DRV_LOG(INFO, "handle_async_event id = 0x%x\n", event_id);
 		break;
diff --git a/drivers/net/bnxt/bnxt_cpr.h b/drivers/net/bnxt/bnxt_cpr.h
index 8c6a34b61..f48293b96 100644
--- a/drivers/net/bnxt/bnxt_cpr.h
+++ b/drivers/net/bnxt/bnxt_cpr.h
@@ -106,5 +106,6 @@ struct bnxt;
 void bnxt_handle_async_event(struct bnxt *bp, struct cmpl_base *cmp);
 void bnxt_handle_fwd_req(struct bnxt *bp, struct cmpl_base *cmp);
 int bnxt_event_hwrm_resp_handler(struct bnxt *bp, struct cmpl_base *cmp);
+void bnxt_dev_reset_and_resume(void *arg);
 
 #endif
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 33ff4a5a7..e545802ce 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -11,6 +11,7 @@
 #include <rte_ethdev_pci.h>
 #include <rte_malloc.h>
 #include <rte_cycles.h>
+#include <rte_alarm.h>
 
 #include "bnxt.h"
 #include "bnxt_cpr.h"
@@ -166,6 +167,8 @@ static int bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask);
 static void bnxt_print_link_info(struct rte_eth_dev *eth_dev);
 static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu);
 static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev);
+static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev);
+static int bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev);
 
 int is_bnxt_in_error(struct bnxt *bp)
 {
@@ -201,19 +204,25 @@ static uint16_t  bnxt_rss_hash_tbl_size(const struct bnxt *bp)
 	return bnxt_rss_ctxts(bp) * BNXT_RSS_ENTRIES_PER_CTX_THOR;
 }
 
-static void bnxt_free_mem(struct bnxt *bp)
+static void bnxt_free_mem(struct bnxt *bp, bool reconfig)
 {
 	bnxt_free_filter_mem(bp);
 	bnxt_free_vnic_attributes(bp);
 	bnxt_free_vnic_mem(bp);
 
-	bnxt_free_stats(bp);
-	bnxt_free_tx_rings(bp);
-	bnxt_free_rx_rings(bp);
+	/* tx/rx rings are configured as part of *_queue_setup callbacks.
+	 * If the number of rings change across fw update,
+	 * we don't have much choice except to warn the user.
+	 */
+	if (!reconfig) {
+		bnxt_free_stats(bp);
+		bnxt_free_tx_rings(bp);
+		bnxt_free_rx_rings(bp);
+	}
 	bnxt_free_async_cp_ring(bp);
 }
 
-static int bnxt_alloc_mem(struct bnxt *bp)
+static int bnxt_alloc_mem(struct bnxt *bp, bool reconfig)
 {
 	int rc;
 
@@ -244,7 +253,7 @@ static int bnxt_alloc_mem(struct bnxt *bp)
 	return 0;
 
 alloc_mem_err:
-	bnxt_free_mem(bp);
+	bnxt_free_mem(bp, reconfig);
 	return rc;
 }
 
@@ -3483,6 +3492,71 @@ static const struct eth_dev_ops bnxt_dev_ops = {
 	.timesync_read_tx_timestamp = bnxt_timesync_read_tx_timestamp,
 };
 
+static void bnxt_dev_cleanup(struct bnxt *bp)
+{
+	bnxt_set_hwrm_link_config(bp, false);
+	bp->link_info.link_up = 0;
+	if (bp->dev_stopped == 0)
+		bnxt_dev_stop_op(bp->eth_dev);
+
+	bnxt_uninit_resources(bp, true);
+}
+
+static void bnxt_dev_recover(void *arg)
+{
+	struct bnxt *bp = arg;
+	int timeout = bp->fw_reset_max_msecs;
+	int rc = 0;
+
+	do {
+		rc = bnxt_hwrm_ver_get(bp);
+		if (rc == 0)
+			break;
+		rte_delay_ms(BNXT_FW_READY_WAIT_INTERVAL);
+		timeout -= BNXT_FW_READY_WAIT_INTERVAL;
+	} while (rc && timeout);
+
+	if (rc) {
+		PMD_DRV_LOG(ERR, "FW is not Ready after reset\n");
+		goto err;
+	}
+
+	rc = bnxt_init_resources(bp, true);
+	if (rc) {
+		PMD_DRV_LOG(ERR,
+			    "Failed to initialize resources after reset\n");
+		goto err;
+	}
+	/* clear reset flag as the device is initialized now */
+	bp->flags &= ~BNXT_FLAG_FW_RESET;
+
+	rc = bnxt_dev_start_op(bp->eth_dev);
+	if (rc) {
+		PMD_DRV_LOG(ERR, "Failed to start port after reset\n");
+		goto err;
+	}
+
+	PMD_DRV_LOG(INFO, "Recovered from FW reset\n");
+	return;
+err:
+	bp->flags |= BNXT_FLAG_FATAL_ERROR;
+	bnxt_uninit_resources(bp, false);
+	PMD_DRV_LOG(ERR, "Failed to recover from FW reset\n");
+}
+
+void bnxt_dev_reset_and_resume(void *arg)
+{
+	struct bnxt *bp = arg;
+	int rc;
+
+	bnxt_dev_cleanup(bp);
+
+	rc = rte_eal_alarm_set(US_PER_MS * bp->fw_reset_min_msecs,
+			       bnxt_dev_recover, (void *)bp);
+	if (rc)
+		PMD_DRV_LOG(ERR, "Error setting recovery alarm");
+}
+
 static bool bnxt_vf_pciid(uint16_t id)
 {
 	if (id == BROADCOM_DEV_ID_57304_VF ||
@@ -3998,7 +4072,7 @@ static int bnxt_init_fw(struct bnxt *bp)
 	return 0;
 }
 
-static int bnxt_init_resources(struct bnxt *bp)
+static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev)
 {
 	int rc;
 
@@ -4006,9 +4080,11 @@ static int bnxt_init_resources(struct bnxt *bp)
 	if (rc)
 		return rc;
 
-	rc = bnxt_setup_mac_addr(bp->eth_dev);
-	if (rc)
-		return rc;
+	if (!reconfig_dev) {
+		rc = bnxt_setup_mac_addr(bp->eth_dev);
+		if (rc)
+			return rc;
+	}
 
 	bnxt_config_vf_req_fwd(bp);
 
@@ -4035,7 +4111,7 @@ static int bnxt_init_resources(struct bnxt *bp)
 		}
 	}
 
-	rc = bnxt_alloc_mem(bp);
+	rc = bnxt_alloc_mem(bp, reconfig_dev);
 	if (rc)
 		return rc;
 
@@ -4109,7 +4185,7 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
 			    "Failed to allocate hwrm resource rc: %x\n", rc);
 		goto error_free;
 	}
-	rc = bnxt_init_resources(bp);
+	rc = bnxt_init_resources(bp, false);
 	if (rc)
 		goto error_free;
 
@@ -4130,18 +4206,19 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
 }
 
 static int
-bnxt_uninit_resources(struct bnxt *bp)
+bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev)
 {
 	int rc;
 
 	bnxt_disable_int(bp);
 	bnxt_free_int(bp);
-	bnxt_free_mem(bp);
+	bnxt_free_mem(bp, reconfig_dev);
 	bnxt_hwrm_func_buf_unrgtr(bp);
 	rc = bnxt_hwrm_func_driver_unregister(bp, 0);
 	bp->flags &= ~BNXT_FLAG_REGISTERED;
 	bnxt_free_ctx_mem(bp);
-	bnxt_free_hwrm_resources(bp);
+	if (!reconfig_dev)
+		bnxt_free_hwrm_resources(bp);
 
 	return rc;
 }
@@ -4157,7 +4234,7 @@ bnxt_dev_uninit(struct rte_eth_dev *eth_dev)
 
 	PMD_DRV_LOG(DEBUG, "Calling Device uninit\n");
 
-	rc = bnxt_uninit_resources(bp);
+	rc = bnxt_uninit_resources(bp, false);
 
 	if (bp->grp_info != NULL) {
 		rte_free(bp->grp_info);
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 24a5a0914..b27dbe87e 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -26,7 +26,7 @@
 
 #include <rte_io.h>
 
-#define HWRM_CMD_TIMEOUT		6000000
+#define HWRM_SHORT_CMD_TIMEOUT		50000
 #define HWRM_SPEC_CODE_1_8_3		0x10803
 #define HWRM_VERSION_1_9_1		0x10901
 #define HWRM_VERSION_1_9_2		0x10903
@@ -97,6 +97,14 @@ static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg,
 		GRCPF_REG_KONG_CHANNEL_OFFSET : GRCPF_REG_CHIMP_CHANNEL_OFFSET;
 	uint16_t mb_trigger_offset = use_kong_mb ?
 		GRCPF_REG_KONG_COMM_TRIGGER : GRCPF_REG_CHIMP_COMM_TRIGGER;
+	uint32_t timeout;
+
+	/* Do not send HWRM commands to firmware in error state */
+	if (bp->flags & BNXT_FLAG_FATAL_ERROR)
+		return 0;
+
+	/* For VER_GET command, set timeout as 50ms */
+	timeout = HWRM_SHORT_CMD_TIMEOUT;
 
 	if (bp->flags & BNXT_FLAG_SHORT_CMD ||
 	    msg_len > bp->max_req_len) {
@@ -139,7 +147,7 @@ static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg,
 	rte_write32(1, bar);
 
 	/* Poll for the valid bit */
-	for (i = 0; i < HWRM_CMD_TIMEOUT; i++) {
+	for (i = 0; i < timeout; i++) {
 		/* Sanity check on the resp->resp_len */
 		rte_rmb();
 		if (resp->resp_len && resp->resp_len <= bp->max_resp_len) {
@@ -151,7 +159,12 @@ static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg,
 		rte_delay_us(1);
 	}
 
-	if (i >= HWRM_CMD_TIMEOUT) {
+	if (i >= timeout) {
+		/* Suppress VER_GET timeout messages during reset recovery */
+		if (bp->flags & BNXT_FLAG_FW_RESET &&
+		    rte_cpu_to_le_16(req->req_type) == HWRM_VER_GET)
+			return -ETIMEDOUT;
+
 		PMD_DRV_LOG(ERR, "Error(timeout) sending msg 0x%04x\n",
 			    req->req_type);
 		return -ETIMEDOUT;
@@ -657,12 +670,15 @@ int bnxt_hwrm_func_reset(struct bnxt *bp)
 int bnxt_hwrm_func_driver_register(struct bnxt *bp)
 {
 	int rc;
+	uint32_t flags = 0;
 	struct hwrm_func_drv_rgtr_input req = {.req_type = 0 };
 	struct hwrm_func_drv_rgtr_output *resp = bp->hwrm_cmd_resp_addr;
 
 	if (bp->flags & BNXT_FLAG_REGISTERED)
 		return 0;
 
+	flags = HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_HOT_RESET_SUPPORT;
+
 	HWRM_PREP(req, FUNC_DRV_RGTR, BNXT_USE_CHIMP_MB);
 	req.enables = rte_cpu_to_le_32(HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_VER |
 			HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_ASYNC_EVENT_FWD);
@@ -683,14 +699,16 @@ int bnxt_hwrm_func_driver_register(struct bnxt *bp)
 		 * this HWRM sniffer list in FW because DPDK PF driver does
 		 * not support this.
 		 */
-		req.flags =
-		rte_cpu_to_le_32(HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_FWD_NONE_MODE);
+		flags |= HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_FWD_NONE_MODE;
 	}
 
+	req.flags = rte_cpu_to_le_32(flags);
+
 	req.async_event_fwd[0] |=
 		rte_cpu_to_le_32(ASYNC_CMPL_EVENT_ID_LINK_STATUS_CHANGE |
 				 ASYNC_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED |
-				 ASYNC_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE);
+				 ASYNC_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE |
+				 ASYNC_CMPL_EVENT_ID_RESET_NOTIFY);
 	req.async_event_fwd[1] |=
 		rte_cpu_to_le_32(ASYNC_CMPL_EVENT_ID_PF_DRVR_UNLOAD |
 				 ASYNC_CMPL_EVENT_ID_VF_CFG_CHANGE);
@@ -837,7 +855,10 @@ int bnxt_hwrm_ver_get(struct bnxt *bp)
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
-	HWRM_CHECK_RESULT();
+	if (bp->flags & BNXT_FLAG_FW_RESET)
+		HWRM_CHECK_RESULT_SILENT();
+	else
+		HWRM_CHECK_RESULT();
 
 	PMD_DRV_LOG(INFO, "%d.%d.%d:%d.%d.%d\n",
 		resp->hwrm_intf_maj_8b, resp->hwrm_intf_min_8b,
@@ -2685,6 +2706,10 @@ int bnxt_hwrm_func_qcfg(struct bnxt *bp, uint16_t *mtu)
 	if (BNXT_VF(bp) && (flags & HWRM_FUNC_QCFG_OUTPUT_FLAGS_TRUSTED_VF)) {
 		bp->flags |= BNXT_FLAG_TRUSTED_VF_EN;
 		PMD_DRV_LOG(INFO, "Trusted VF cap enabled\n");
+	} else if (BNXT_VF(bp) &&
+		   !(flags & HWRM_FUNC_QCFG_OUTPUT_FLAGS_TRUSTED_VF)) {
+		bp->flags &= ~BNXT_FLAG_TRUSTED_VF_EN;
+		PMD_DRV_LOG(INFO, "Trusted VF cap disabled\n");
 	}
 
 	if (mtu)
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index c882fc2a1..a03620532 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -21,6 +21,8 @@ struct bnxt_cp_ring_info;
 	(1 << HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED)
 #define ASYNC_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE	\
 	(1 << HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE)
+#define ASYNC_CMPL_EVENT_ID_RESET_NOTIFY \
+	(1 << HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY)
 #define ASYNC_CMPL_EVENT_ID_PF_DRVR_UNLOAD	\
 	(1 << (HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD - 32))
 #define ASYNC_CMPL_EVENT_ID_VF_CFG_CHANGE	\
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v2 04/13] net/bnxt: inform firmware about IF state changes
  2019-08-30 16:35     ` [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery Ajit Khaparde
                         ` (2 preceding siblings ...)
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 03/13] net/bnxt: handle reset notify async event from FW Ajit Khaparde
@ 2019-08-30 16:35       ` Ajit Khaparde
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 05/13] net/bnxt: handle fatal event from FW under error conditions Ajit Khaparde
                         ` (9 subsequent siblings)
  13 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-30 16:35 UTC (permalink / raw)
  To: dev
  Cc: ferruh.yigit, Kalesh AP, Santoshkumar Karanappa Rastapur, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

Use latest firmware API to inform firmware about IF state changes.
Firmware has the option to clean up resources during IF down and
to require the driver to reserve resources again during IF up.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Santoshkumar Karanappa Rastapur <santosh.rastapur@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt.h        |  1 +
 drivers/net/bnxt/bnxt_ethdev.c |  4 ++++
 drivers/net/bnxt/bnxt_hwrm.c   | 35 ++++++++++++++++++++++++++++++++++
 drivers/net/bnxt/bnxt_hwrm.h   |  1 +
 4 files changed, 41 insertions(+)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 8797b032e..394a2a941 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -370,6 +370,7 @@ struct bnxt {
 #define BNXT_FLAG_STINGRAY	(1 << 14)
 #define BNXT_FLAG_FW_RESET	(1 << 15)
 #define BNXT_FLAG_FATAL_ERROR	(1 << 16)
+#define BNXT_FLAG_FW_CAP_IF_CHANGE	(1 << 17)
 #define BNXT_FLAG_EXT_STATS_SUPPORTED	(1 << 29)
 #define BNXT_FLAG_NEW_RM	(1 << 30)
 #define BNXT_FLAG_INIT_DONE	(1U << 31)
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index e545802ce..385492db5 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -803,6 +803,8 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
 			bp->rx_cp_nr_rings, RTE_ETHDEV_QUEUE_STAT_CNTRS);
 	}
 
+	bnxt_hwrm_if_change(bp, 1);
+
 	rc = bnxt_init_chip(bp);
 	if (rc)
 		goto error;
@@ -829,6 +831,7 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
 	return 0;
 
 error:
+	bnxt_hwrm_if_change(bp, 0);
 	bnxt_shutdown_nic(bp);
 	bnxt_free_tx_mbufs(bp);
 	bnxt_free_rx_mbufs(bp);
@@ -895,6 +898,7 @@ static void bnxt_dev_stop_op(struct rte_eth_dev *eth_dev)
 	bnxt_free_tx_mbufs(bp);
 	bnxt_free_rx_mbufs(bp);
 	bnxt_shutdown_nic(bp);
+	bnxt_hwrm_if_change(bp, 0);
 	bp->dev_stopped = 1;
 }
 
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index b27dbe87e..17c7b5e9e 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -716,6 +716,11 @@ int bnxt_hwrm_func_driver_register(struct bnxt *bp)
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
 	HWRM_CHECK_RESULT();
+
+	flags = rte_le_to_cpu_32(resp->flags);
+	if (flags & HWRM_FUNC_DRV_RGTR_OUTPUT_FLAGS_IF_CHANGE_SUPPORTED)
+		bp->flags |= BNXT_FLAG_FW_CAP_IF_CHANGE;
+
 	HWRM_UNLOCK();
 
 	bp->flags |= BNXT_FLAG_REGISTERED;
@@ -4649,3 +4654,33 @@ int bnxt_hwrm_set_mac(struct bnxt *bp)
 
 	return rc;
 }
+
+int bnxt_hwrm_if_change(struct bnxt *bp, bool state)
+{
+	struct hwrm_func_drv_if_change_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_func_drv_if_change_input req = {0};
+	int rc;
+
+	if (!(bp->flags & BNXT_FLAG_FW_CAP_IF_CHANGE))
+		return 0;
+
+	/* Do not issue FUNC_DRV_IF_CHANGE during reset recovery.
+	 * If we issue FUNC_DRV_IF_CHANGE with flags down before
+	 * FUNC_DRV_UNRGTR, FW resets before FUNC_DRV_UNRGTR
+	 */
+	if (!state && (bp->flags & BNXT_FLAG_FW_RESET))
+		return 0;
+
+	HWRM_PREP(req, FUNC_DRV_IF_CHANGE, BNXT_USE_CHIMP_MB);
+
+	if (state)
+		req.flags =
+		rte_cpu_to_le_32(HWRM_FUNC_DRV_IF_CHANGE_INPUT_FLAGS_UP);
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
+
+	return rc;
+}
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index a03620532..2f57e950b 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -201,4 +201,5 @@ int bnxt_hwrm_tunnel_redirect_query(struct bnxt *bp, uint32_t *type);
 int bnxt_hwrm_tunnel_redirect_info(struct bnxt *bp, uint8_t tun_type,
 				   uint16_t *dst_fid);
 int bnxt_hwrm_set_mac(struct bnxt *bp);
+int bnxt_hwrm_if_change(struct bnxt *bp, bool state);
 #endif
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v2 05/13] net/bnxt: handle fatal event from FW under error conditions
  2019-08-30 16:35     ` [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery Ajit Khaparde
                         ` (3 preceding siblings ...)
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 04/13] net/bnxt: inform firmware about IF state changes Ajit Khaparde
@ 2019-08-30 16:35       ` Ajit Khaparde
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 06/13] net/bnxt: query firmware error recovery capabilities Ajit Khaparde
                         ` (8 subsequent siblings)
  13 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-30 16:35 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

When firmware hit some unrecoverable error conditions, firmware initiate
the recovery by sending an async event EVENT_CMPL_EVENT_ID_RESET_NOTIFY
with data1 set to RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL
to all host drivers and will reset the chip.

The recovery procedure is same sequence as the one for hot FW upgrade.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt_cpr.c    | 13 +++++++++++--
 drivers/net/bnxt/bnxt_cpr.h    |  5 +++++
 drivers/net/bnxt/bnxt_ethdev.c |  3 +++
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index 62a16d2ed..0b2eeef8f 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -21,6 +21,7 @@ void bnxt_handle_async_event(struct bnxt *bp,
 	struct hwrm_async_event_cmpl *async_cmp =
 				(struct hwrm_async_event_cmpl *)cmp;
 	uint16_t event_id = rte_le_to_cpu_16(async_cmp->event_id);
+	uint32_t event_data;
 
 	/* TODO: HWRM async events are not defined yet */
 	/* Needs to handle: link events, error events, etc. */
@@ -42,6 +43,7 @@ void bnxt_handle_async_event(struct bnxt *bp,
 		PMD_DRV_LOG(INFO, "Port conn async event\n");
 		break;
 	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY:
+		event_data = rte_le_to_cpu_32(async_cmp->event_data1);
 		/* timestamp_lo/hi values are in units of 100ms */
 		bp->fw_reset_max_msecs = async_cmp->timestamp_hi ?
 			rte_le_to_cpu_16(async_cmp->timestamp_hi) * 100 :
@@ -49,8 +51,15 @@ void bnxt_handle_async_event(struct bnxt *bp,
 		bp->fw_reset_min_msecs = async_cmp->timestamp_lo ?
 			async_cmp->timestamp_lo * 100 :
 			BNXT_MIN_FW_READY_TIMEOUT;
-		PMD_DRV_LOG(INFO,
-			    "Firmware non-fatal reset event received\n");
+		if ((event_data & EVENT_DATA1_REASON_CODE_MASK) ==
+		    EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL) {
+			PMD_DRV_LOG(INFO,
+				    "Firmware fatal reset event received\n");
+			bp->flags |= BNXT_FLAG_FATAL_ERROR;
+		} else {
+			PMD_DRV_LOG(INFO,
+				    "Firmware non-fatal reset event received\n");
+		}
 
 		bp->flags |= BNXT_FLAG_FW_RESET;
 		rte_eal_alarm_set(US_PER_MS, bnxt_dev_reset_and_resume,
diff --git a/drivers/net/bnxt/bnxt_cpr.h b/drivers/net/bnxt/bnxt_cpr.h
index f48293b96..b61bafa0e 100644
--- a/drivers/net/bnxt/bnxt_cpr.h
+++ b/drivers/net/bnxt/bnxt_cpr.h
@@ -108,4 +108,9 @@ void bnxt_handle_fwd_req(struct bnxt *bp, struct cmpl_base *cmp);
 int bnxt_event_hwrm_resp_handler(struct bnxt *bp, struct cmpl_base *cmp);
 void bnxt_dev_reset_and_resume(void *arg);
 
+#define EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL     \
+	HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL
+#define EVENT_DATA1_REASON_CODE_MASK                   \
+	HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MASK
+
 #endif
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 385492db5..a917e0440 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -3512,6 +3512,9 @@ static void bnxt_dev_recover(void *arg)
 	int timeout = bp->fw_reset_max_msecs;
 	int rc = 0;
 
+	/* Clear Error flag so that device re-init should happen */
+	bp->flags &= ~BNXT_FLAG_FATAL_ERROR;
+
 	do {
 		rc = bnxt_hwrm_ver_get(bp);
 		if (rc == 0)
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v2 06/13] net/bnxt: query firmware error recovery capabilities
  2019-08-30 16:35     ` [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery Ajit Khaparde
                         ` (4 preceding siblings ...)
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 05/13] net/bnxt: handle fatal event from FW under error conditions Ajit Khaparde
@ 2019-08-30 16:35       ` Ajit Khaparde
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 07/13] net/bnxt: map status registers for FW health monitoring Ajit Khaparde
                         ` (7 subsequent siblings)
  13 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-30 16:35 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

In Driver initiated error recovery process, driver has to know about
the registers offset and values to initiate FW reset. The HWRM command
HWRM_ERROR_RECOVERY_QCFG is used to obtain all the registers and values
required to initiate FW reset. This command response includes
FW heart_beat register, health status register, Error counter register,
register offsets and values to do chip reset if firmware crashes and
becomes unresponsive.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt.h        | 27 +++++++++++
 drivers/net/bnxt/bnxt_ethdev.c | 10 ++++
 drivers/net/bnxt/bnxt_hwrm.c   | 89 ++++++++++++++++++++++++++++++++++
 drivers/net/bnxt/bnxt_hwrm.h   |  1 +
 4 files changed, 127 insertions(+)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 394a2a941..19bd13a7f 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -343,6 +343,29 @@ struct bnxt_ctx_mem_info {
 #define US_PER_MS			1000
 #define NS_PER_US			1000
 
+struct bnxt_error_recovery_info {
+	/* All units in milliseconds */
+	uint32_t	driver_polling_freq;
+	uint32_t	master_func_wait_period;
+	uint32_t	normal_func_wait_period;
+	uint32_t	master_func_wait_period_after_reset;
+	uint32_t	max_bailout_time_after_reset;
+#define BNXT_FW_STATUS_REG		0
+#define BNXT_FW_HEARTBEAT_CNT_REG	1
+#define BNXT_FW_RECOVERY_CNT_REG	2
+#define BNXT_FW_RESET_INPROG_REG	3
+	uint32_t	status_regs[4];
+	uint32_t	reset_inprogress_reg_mask;
+#define BNXT_NUM_RESET_REG	16
+	uint8_t		reg_array_cnt;
+	uint32_t	reset_reg[BNXT_NUM_RESET_REG];
+	uint32_t	reset_reg_val[BNXT_NUM_RESET_REG];
+	uint8_t		delay_after_reset[BNXT_NUM_RESET_REG];
+#define BNXT_FLAG_ERROR_RECOVERY_HOST	(1 << 0)
+#define BNXT_FLAG_ERROR_RECOVERY_CO_CPU	(1 << 1)
+	uint32_t	flags;
+};
+
 #define BNXT_HWRM_SHORT_REQ_LEN		sizeof(struct hwrm_short_input)
 struct bnxt {
 	void				*bar0;
@@ -371,6 +394,7 @@ struct bnxt {
 #define BNXT_FLAG_FW_RESET	(1 << 15)
 #define BNXT_FLAG_FATAL_ERROR	(1 << 16)
 #define BNXT_FLAG_FW_CAP_IF_CHANGE	(1 << 17)
+#define BNXT_FLAG_FW_CAP_ERROR_RECOVERY	(1 << 18)
 #define BNXT_FLAG_EXT_STATS_SUPPORTED	(1 << 29)
 #define BNXT_FLAG_NEW_RM	(1 << 30)
 #define BNXT_FLAG_INIT_DONE	(1U << 31)
@@ -477,6 +501,9 @@ struct bnxt {
 
 	uint16_t		fw_reset_min_msecs;
 	uint16_t		fw_reset_max_msecs;
+
+	/* Struct to hold adapter error recovery related info */
+	struct bnxt_error_recovery_info *recovery_info;
 };
 
 int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete);
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index a917e0440..7a1142947 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -4070,6 +4070,11 @@ static int bnxt_init_fw(struct bnxt *bp)
 	if (rc)
 		return rc;
 
+	/* Get the adapter error recovery support info */
+	rc = bnxt_hwrm_error_recovery_qcfg(bp);
+	if (rc)
+		bp->flags &= ~BNXT_FLAG_FW_CAP_ERROR_RECOVERY;
+
 	if (mtu >= RTE_ETHER_MIN_MTU && mtu <= BNXT_MAX_MTU &&
 	    mtu != bp->eth_dev->data->mtu)
 		bp->eth_dev->data->mtu = mtu;
@@ -4227,6 +4232,11 @@ bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev)
 	if (!reconfig_dev)
 		bnxt_free_hwrm_resources(bp);
 
+	if (bp->recovery_info != NULL) {
+		rte_free(bp->recovery_info);
+		bp->recovery_info = NULL;
+	}
+
 	return rc;
 }
 
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 17c7b5e9e..e2c993936 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -626,6 +626,13 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
 	if (flags & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT_STATS_SUPPORTED)
 		bp->flags |= BNXT_FLAG_EXT_STATS_SUPPORTED;
 
+	if (flags & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_ERROR_RECOVERY_CAPABLE) {
+		bp->flags |= BNXT_FLAG_FW_CAP_ERROR_RECOVERY;
+		PMD_DRV_LOG(DEBUG, "Adapter Error recovery SUPPORTED\n");
+	} else {
+		bp->flags &= ~BNXT_FLAG_FW_CAP_ERROR_RECOVERY;
+	}
+
 	HWRM_UNLOCK();
 
 	return rc;
@@ -4684,3 +4691,85 @@ int bnxt_hwrm_if_change(struct bnxt *bp, bool state)
 
 	return rc;
 }
+
+int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp)
+{
+	struct hwrm_error_recovery_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+	struct bnxt_error_recovery_info *info;
+	struct hwrm_error_recovery_qcfg_input req = {0};
+	uint32_t flags = 0;
+	unsigned int i;
+	int rc;
+
+	/* Older FW does not have error recovery support */
+	if (!(bp->flags & BNXT_FLAG_FW_CAP_ERROR_RECOVERY))
+		return 0;
+
+	info = rte_zmalloc("bnxt_hwrm_error_recovery_qcfg",
+			   sizeof(*info), 0);
+	bp->recovery_info = info;
+	if (info == NULL)
+		return -ENOMEM;
+
+	HWRM_PREP(req, ERROR_RECOVERY_QCFG, BNXT_USE_CHIMP_MB);
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
+
+	HWRM_CHECK_RESULT();
+
+	flags = rte_le_to_cpu_32(resp->flags);
+	if (flags & HWRM_ERROR_RECOVERY_QCFG_OUTPUT_FLAGS_HOST)
+		info->flags |= BNXT_FLAG_ERROR_RECOVERY_HOST;
+	else if (flags & HWRM_ERROR_RECOVERY_QCFG_OUTPUT_FLAGS_CO_CPU)
+		info->flags |= BNXT_FLAG_ERROR_RECOVERY_CO_CPU;
+
+	if ((info->flags & BNXT_FLAG_ERROR_RECOVERY_CO_CPU) &&
+	    !(bp->flags & BNXT_FLAG_KONG_MB_EN)) {
+		rc = -EINVAL;
+		goto err;
+	}
+
+	/* FW returned values are in units of 100msec */
+	info->driver_polling_freq =
+		rte_le_to_cpu_32(resp->driver_polling_freq) * 100;
+	info->master_func_wait_period =
+		rte_le_to_cpu_32(resp->master_func_wait_period) * 100;
+	info->normal_func_wait_period =
+		rte_le_to_cpu_32(resp->normal_func_wait_period) * 100;
+	info->master_func_wait_period_after_reset =
+		rte_le_to_cpu_32(resp->master_func_wait_period_after_reset) * 100;
+	info->max_bailout_time_after_reset =
+		rte_le_to_cpu_32(resp->max_bailout_time_after_reset) * 100;
+	info->status_regs[BNXT_FW_STATUS_REG] =
+		rte_le_to_cpu_32(resp->fw_health_status_reg);
+	info->status_regs[BNXT_FW_HEARTBEAT_CNT_REG] =
+		rte_le_to_cpu_32(resp->fw_heartbeat_reg);
+	info->status_regs[BNXT_FW_RECOVERY_CNT_REG] =
+		rte_le_to_cpu_32(resp->fw_reset_cnt_reg);
+	info->status_regs[BNXT_FW_RESET_INPROG_REG] =
+		rte_le_to_cpu_32(resp->reset_inprogress_reg);
+	info->reg_array_cnt =
+		rte_le_to_cpu_32(resp->reg_array_cnt);
+
+	if (info->reg_array_cnt >= BNXT_NUM_RESET_REG) {
+		rc = -EINVAL;
+		goto err;
+	}
+
+	for (i = 0; i < info->reg_array_cnt; i++) {
+		info->reset_reg[i] =
+			rte_le_to_cpu_32(resp->reset_reg[i]);
+		info->reset_reg_val[i] =
+			rte_le_to_cpu_32(resp->reset_reg_val[i]);
+		info->delay_after_reset[i] =
+			resp->delay_after_reset[i];
+	}
+err:
+	HWRM_UNLOCK();
+
+	if (rc) {
+		rte_free(bp->recovery_info);
+		bp->recovery_info = NULL;
+	}
+	return rc;
+}
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index 2f57e950b..c332c129d 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -202,4 +202,5 @@ int bnxt_hwrm_tunnel_redirect_info(struct bnxt *bp, uint8_t tun_type,
 				   uint16_t *dst_fid);
 int bnxt_hwrm_set_mac(struct bnxt *bp);
 int bnxt_hwrm_if_change(struct bnxt *bp, bool state);
+int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp);
 #endif
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v2 07/13] net/bnxt: map status registers for FW health monitoring
  2019-08-30 16:35     ` [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery Ajit Khaparde
                         ` (5 preceding siblings ...)
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 06/13] net/bnxt: query firmware error recovery capabilities Ajit Khaparde
@ 2019-08-30 16:35       ` Ajit Khaparde
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 08/13] net/bnxt: advertise error recovery capability and handle async event Ajit Khaparde
                         ` (6 subsequent siblings)
  13 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-30 16:35 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

HWRM_ERROR_RECOVERY_QCFG command returns the FW status registers offset
for periodic firmware health check monitoring. Map them to GRC window 2.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt.h        | 22 ++++++++++++++++-
 drivers/net/bnxt/bnxt_ethdev.c | 44 ++++++++++++++++++++++++++++++++++
 drivers/net/bnxt/bnxt_hwrm.c   |  4 ++++
 3 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 19bd13a7f..1da09569d 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -354,7 +354,9 @@ struct bnxt_error_recovery_info {
 #define BNXT_FW_HEARTBEAT_CNT_REG	1
 #define BNXT_FW_RECOVERY_CNT_REG	2
 #define BNXT_FW_RESET_INPROG_REG	3
-	uint32_t	status_regs[4];
+#define BNXT_FW_STATUS_REG_CNT		4
+	uint32_t	status_regs[BNXT_FW_STATUS_REG_CNT];
+	uint32_t	mapped_status_regs[BNXT_FW_STATUS_REG_CNT];
 	uint32_t	reset_inprogress_reg_mask;
 #define BNXT_NUM_RESET_REG	16
 	uint8_t		reg_array_cnt;
@@ -366,6 +368,22 @@ struct bnxt_error_recovery_info {
 	uint32_t	flags;
 };
 
+/* address space location of register */
+#define BNXT_FW_STATUS_REG_TYPE_MASK	3
+/* register is located in PCIe config space */
+#define BNXT_FW_STATUS_REG_TYPE_CFG	0
+/* register is located in GRC address space */
+#define BNXT_FW_STATUS_REG_TYPE_GRC	1
+/* register is located in BAR0  */
+#define BNXT_FW_STATUS_REG_TYPE_BAR0	2
+/* register is located in BAR1  */
+#define BNXT_FW_STATUS_REG_TYPE_BAR1	3
+
+#define BNXT_FW_STATUS_REG_TYPE(reg)	((reg) & BNXT_FW_STATUS_REG_TYPE_MASK)
+#define BNXT_FW_STATUS_REG_OFF(reg)	((reg) & ~BNXT_FW_STATUS_REG_TYPE_MASK)
+
+#define BNXT_GRCP_WINDOW_2_BASE		0x2000
+
 #define BNXT_HWRM_SHORT_REQ_LEN		sizeof(struct hwrm_short_input)
 struct bnxt {
 	void				*bar0;
@@ -510,6 +528,8 @@ int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete);
 int bnxt_rcv_msg_from_vf(struct bnxt *bp, uint16_t vf_id, void *msg);
 int is_bnxt_in_error(struct bnxt *bp);
 
+int bnxt_map_fw_health_status_regs(struct bnxt *bp);
+
 bool is_bnxt_supported(struct rte_eth_dev *dev);
 bool bnxt_stratus_device(struct bnxt *bp);
 extern const struct rte_flow_ops bnxt_flow_ops;
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 7a1142947..a0de259da 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -3496,6 +3496,49 @@ static const struct eth_dev_ops bnxt_dev_ops = {
 	.timesync_read_tx_timestamp = bnxt_timesync_read_tx_timestamp,
 };
 
+int bnxt_map_fw_health_status_regs(struct bnxt *bp)
+{
+	struct bnxt_error_recovery_info *info = bp->recovery_info;
+	uint32_t reg_base = 0xffffffff;
+	int i;
+
+	/* Only pre-map the monitoring GRC registers using window 2 */
+	for (i = 0; i < BNXT_FW_STATUS_REG_CNT; i++) {
+		uint32_t reg = info->status_regs[i];
+
+		if (BNXT_FW_STATUS_REG_TYPE(reg) != BNXT_FW_STATUS_REG_TYPE_GRC)
+			continue;
+
+		if (reg_base == 0xffffffff)
+			reg_base = reg & 0xfffff000;
+		if ((reg & 0xfffff000) != reg_base)
+			return -ERANGE;
+
+		/* Use mask 0xffc as the Lower 2 bits indicates
+		 * address space location
+		 */
+		info->mapped_status_regs[i] = BNXT_GRCP_WINDOW_2_BASE +
+						(reg & 0xffc);
+	}
+
+	if (reg_base == 0xffffffff)
+		return 0;
+
+	rte_write32(reg_base, (uint8_t *)bp->bar0 +
+		    BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4);
+
+	return 0;
+}
+
+static void bnxt_unmap_fw_health_status_regs(struct bnxt *bp)
+{
+	if (!(bp->flags & BNXT_FLAG_FW_CAP_ERROR_RECOVERY))
+		return;
+
+	rte_write32(0, (uint8_t *)bp->bar0 +
+		    BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4);
+}
+
 static void bnxt_dev_cleanup(struct bnxt *bp)
 {
 	bnxt_set_hwrm_link_config(bp, false);
@@ -4226,6 +4269,7 @@ bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev)
 	bnxt_free_int(bp);
 	bnxt_free_mem(bp, reconfig_dev);
 	bnxt_hwrm_func_buf_unrgtr(bp);
+	bnxt_unmap_fw_health_status_regs(bp);
 	rc = bnxt_hwrm_func_driver_unregister(bp, 0);
 	bp->flags &= ~BNXT_FLAG_REGISTERED;
 	bnxt_free_ctx_mem(bp);
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index e2c993936..2d9c43c98 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -4767,6 +4767,10 @@ int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp)
 err:
 	HWRM_UNLOCK();
 
+	/* Map the FW status registers */
+	if (!rc)
+		rc = bnxt_map_fw_health_status_regs(bp);
+
 	if (rc) {
 		rte_free(bp->recovery_info);
 		bp->recovery_info = NULL;
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v2 08/13] net/bnxt: advertise error recovery capability and handle async event
  2019-08-30 16:35     ` [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery Ajit Khaparde
                         ` (6 preceding siblings ...)
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 07/13] net/bnxt: map status registers for FW health monitoring Ajit Khaparde
@ 2019-08-30 16:35       ` Ajit Khaparde
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 09/13] net/bnxt: add code for periodic FW health monitoring Ajit Khaparde
                         ` (5 subsequent siblings)
  13 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-30 16:35 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

1. Advertise HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_ERROR_RECOVERY_SUPPORT flag
   in the FUNC_DRV_RGTR command.
2. request for the async event ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY
   in the FUNC_DRV_RGTR command.
3. handle the async event EVENT_ID_ERROR_RECOVERY from FW.

Error recovery support will be used by firmware only if all the driver
instances support error recovery process.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt.h      |  2 ++
 drivers/net/bnxt/bnxt_cpr.c  | 45 ++++++++++++++++++++++++++++++++++++
 drivers/net/bnxt/bnxt_cpr.h  | 12 ++++++++++
 drivers/net/bnxt/bnxt_hwrm.c |  5 ++++
 drivers/net/bnxt/bnxt_hwrm.h |  2 ++
 5 files changed, 66 insertions(+)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 1da09569d..f9147a9a8 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -365,6 +365,8 @@ struct bnxt_error_recovery_info {
 	uint8_t		delay_after_reset[BNXT_NUM_RESET_REG];
 #define BNXT_FLAG_ERROR_RECOVERY_HOST	(1 << 0)
 #define BNXT_FLAG_ERROR_RECOVERY_CO_CPU	(1 << 1)
+#define BNXT_FLAG_MASTER_FUNC		(1 << 2)
+#define BNXT_FLAG_RECOVERY_ENABLED	(1 << 3)
 	uint32_t	flags;
 };
 
diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index 0b2eeef8f..a70301adc 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -21,6 +21,7 @@ void bnxt_handle_async_event(struct bnxt *bp,
 	struct hwrm_async_event_cmpl *async_cmp =
 				(struct hwrm_async_event_cmpl *)cmp;
 	uint16_t event_id = rte_le_to_cpu_16(async_cmp->event_id);
+	struct bnxt_error_recovery_info *info;
 	uint32_t event_data;
 
 	/* TODO: HWRM async events are not defined yet */
@@ -65,6 +66,31 @@ void bnxt_handle_async_event(struct bnxt *bp,
 		rte_eal_alarm_set(US_PER_MS, bnxt_dev_reset_and_resume,
 				  (void *)bp);
 		break;
+	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY:
+		info = bp->recovery_info;
+
+		if (!info)
+			return;
+
+		PMD_DRV_LOG(INFO, "Error recovery async event received\n");
+
+		event_data = rte_le_to_cpu_32(async_cmp->event_data1) &
+				EVENT_DATA1_FLAGS_MASK;
+
+		if (event_data & EVENT_DATA1_FLAGS_MASTER_FUNC)
+			info->flags |= BNXT_FLAG_MASTER_FUNC;
+		else
+			info->flags &= ~BNXT_FLAG_MASTER_FUNC;
+
+		if (event_data & EVENT_DATA1_FLAGS_RECOVERY_ENABLED)
+			info->flags |= BNXT_FLAG_RECOVERY_ENABLED;
+		else
+			info->flags &= ~BNXT_FLAG_RECOVERY_ENABLED;
+
+		PMD_DRV_LOG(INFO, "recovery enabled(%d), master function(%d)\n",
+			    bnxt_is_recovery_enabled(bp),
+			    bnxt_is_master_func(bp));
+		break;
 	default:
 		PMD_DRV_LOG(INFO, "handle_async_event id = 0x%x\n", event_id);
 		break;
@@ -186,3 +212,22 @@ int bnxt_event_hwrm_resp_handler(struct bnxt *bp, struct cmpl_base *cmp)
 
 	return evt;
 }
+
+bool bnxt_is_master_func(struct bnxt *bp)
+{
+	if (bp->recovery_info->flags & BNXT_FLAG_MASTER_FUNC)
+		return true;
+
+	return false;
+}
+
+bool bnxt_is_recovery_enabled(struct bnxt *bp)
+{
+	struct bnxt_error_recovery_info *info;
+
+	info = bp->recovery_info;
+	if (info && (info->flags & BNXT_FLAG_RECOVERY_ENABLED))
+		return true;
+
+	return false;
+}
diff --git a/drivers/net/bnxt/bnxt_cpr.h b/drivers/net/bnxt/bnxt_cpr.h
index b61bafa0e..f118bda36 100644
--- a/drivers/net/bnxt/bnxt_cpr.h
+++ b/drivers/net/bnxt/bnxt_cpr.h
@@ -113,4 +113,16 @@ void bnxt_dev_reset_and_resume(void *arg);
 #define EVENT_DATA1_REASON_CODE_MASK                   \
 	HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MASK
 
+#define EVENT_DATA1_FLAGS_MASK                         \
+	HWRM_ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_MASK
+
+#define EVENT_DATA1_FLAGS_MASTER_FUNC                  \
+	HWRM_ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_MASTER_FUNC
+
+#define EVENT_DATA1_FLAGS_RECOVERY_ENABLED             \
+	HWRM_ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_RECOVERY_ENABLED
+
+bool bnxt_is_recovery_enabled(struct bnxt *bp);
+bool bnxt_is_master_func(struct bnxt *bp);
+
 #endif
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 2d9c43c98..350e867bf 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -685,6 +685,8 @@ int bnxt_hwrm_func_driver_register(struct bnxt *bp)
 		return 0;
 
 	flags = HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_HOT_RESET_SUPPORT;
+	if (bp->flags & BNXT_FLAG_FW_CAP_ERROR_RECOVERY)
+		flags |= HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_ERROR_RECOVERY_SUPPORT;
 
 	HWRM_PREP(req, FUNC_DRV_RGTR, BNXT_USE_CHIMP_MB);
 	req.enables = rte_cpu_to_le_32(HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_VER |
@@ -716,6 +718,9 @@ int bnxt_hwrm_func_driver_register(struct bnxt *bp)
 				 ASYNC_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED |
 				 ASYNC_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE |
 				 ASYNC_CMPL_EVENT_ID_RESET_NOTIFY);
+	if (bp->flags & BNXT_FLAG_FW_CAP_ERROR_RECOVERY)
+		req.async_event_fwd[0] |=
+			rte_cpu_to_le_32(ASYNC_CMPL_EVENT_ID_ERROR_RECOVERY);
 	req.async_event_fwd[1] |=
 		rte_cpu_to_le_32(ASYNC_CMPL_EVENT_ID_PF_DRVR_UNLOAD |
 				 ASYNC_CMPL_EVENT_ID_VF_CFG_CHANGE);
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index c332c129d..44e335507 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -23,6 +23,8 @@ struct bnxt_cp_ring_info;
 	(1 << HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE)
 #define ASYNC_CMPL_EVENT_ID_RESET_NOTIFY \
 	(1 << HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY)
+#define ASYNC_CMPL_EVENT_ID_ERROR_RECOVERY \
+	(1 << HWRM_ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY)
 #define ASYNC_CMPL_EVENT_ID_PF_DRVR_UNLOAD	\
 	(1 << (HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD - 32))
 #define ASYNC_CMPL_EVENT_ID_VF_CFG_CHANGE	\
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v2 09/13] net/bnxt: add code for periodic FW health monitoring
  2019-08-30 16:35     ` [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery Ajit Khaparde
                         ` (7 preceding siblings ...)
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 08/13] net/bnxt: advertise error recovery capability and handle async event Ajit Khaparde
@ 2019-08-30 16:35       ` Ajit Khaparde
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 10/13] net/bnxt: add support for FW reset Ajit Khaparde
                         ` (4 subsequent siblings)
  13 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-30 16:35 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

Periodically poll the FW heartbeat register and FW recovery counter
registers to check the FW health. Polling frequency will be
advertised by the FW in HWRM_ERROR_RECOVERY_QCFG response.
Schedule the task upon receiving the async event from FW.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
---
 drivers/net/bnxt/bnxt.h        |  6 +++
 drivers/net/bnxt/bnxt_cpr.c    | 10 ++++
 drivers/net/bnxt/bnxt_ethdev.c | 89 ++++++++++++++++++++++++++++++++++
 3 files changed, 105 insertions(+)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index f9147a9a8..5579e127c 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -368,6 +368,9 @@ struct bnxt_error_recovery_info {
 #define BNXT_FLAG_MASTER_FUNC		(1 << 2)
 #define BNXT_FLAG_RECOVERY_ENABLED	(1 << 3)
 	uint32_t	flags;
+
+	uint32_t        last_heart_beat;
+	uint32_t        last_reset_counter;
 };
 
 /* address space location of register */
@@ -415,6 +418,7 @@ struct bnxt {
 #define BNXT_FLAG_FATAL_ERROR	(1 << 16)
 #define BNXT_FLAG_FW_CAP_IF_CHANGE	(1 << 17)
 #define BNXT_FLAG_FW_CAP_ERROR_RECOVERY	(1 << 18)
+#define BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED	(1 << 19)
 #define BNXT_FLAG_EXT_STATS_SUPPORTED	(1 << 29)
 #define BNXT_FLAG_NEW_RM	(1 << 30)
 #define BNXT_FLAG_INIT_DONE	(1U << 31)
@@ -531,6 +535,8 @@ int bnxt_rcv_msg_from_vf(struct bnxt *bp, uint16_t vf_id, void *msg);
 int is_bnxt_in_error(struct bnxt *bp);
 
 int bnxt_map_fw_health_status_regs(struct bnxt *bp);
+uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index);
+void bnxt_schedule_fw_health_check(struct bnxt *bp);
 
 bool is_bnxt_supported(struct rte_eth_dev *dev);
 bool bnxt_stratus_device(struct bnxt *bp);
diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index a70301adc..3cedb891e 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -90,6 +90,16 @@ void bnxt_handle_async_event(struct bnxt *bp,
 		PMD_DRV_LOG(INFO, "recovery enabled(%d), master function(%d)\n",
 			    bnxt_is_recovery_enabled(bp),
 			    bnxt_is_master_func(bp));
+
+		if (bp->flags & BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED)
+			return;
+
+		info->last_heart_beat =
+			bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG);
+		info->last_reset_counter =
+			bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG);
+
+		bnxt_schedule_fw_health_check(bp);
 		break;
 	default:
 		PMD_DRV_LOG(INFO, "handle_async_event id = 0x%x\n", event_id);
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index a0de259da..62a4a65fb 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -3607,6 +3607,94 @@ void bnxt_dev_reset_and_resume(void *arg)
 		PMD_DRV_LOG(ERR, "Error setting recovery alarm");
 }
 
+uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index)
+{
+	struct bnxt_error_recovery_info *info = bp->recovery_info;
+	uint32_t reg = info->status_regs[index];
+	uint32_t type, offset, val = 0;
+
+	type = BNXT_FW_STATUS_REG_TYPE(reg);
+	offset = BNXT_FW_STATUS_REG_OFF(reg);
+
+	switch (type) {
+	case BNXT_FW_STATUS_REG_TYPE_CFG:
+		rte_pci_read_config(bp->pdev, &val, sizeof(val), offset);
+		break;
+	case BNXT_FW_STATUS_REG_TYPE_GRC:
+		offset = info->mapped_status_regs[index];
+		/* FALLTHROUGH */
+	case BNXT_FW_STATUS_REG_TYPE_BAR0:
+		val = rte_le_to_cpu_32(rte_read32((uint8_t *)bp->bar0 +
+				       offset));
+		break;
+	}
+
+	return val;
+}
+
+/* Driver should poll FW heartbeat, reset_counter with the frequency
+ * advertised by FW in HWRM_ERROR_RECOVERY_QCFG.
+ * When the driver detects heartbeat stop or change in reset_counter,
+ * it has to trigger a reset to recover from the error condition.
+ * A “master PF” is the function who will have the privilege to
+ * initiate the chimp reset. The master PF will be elected by the
+ * firmware and will be notified through async message.
+ */
+static void bnxt_check_fw_health(void *arg)
+{
+	struct bnxt *bp = arg;
+	struct bnxt_error_recovery_info *info = bp->recovery_info;
+	uint32_t val = 0;
+
+	if (!info || !bnxt_is_recovery_enabled(bp) ||
+	    is_bnxt_in_error(bp))
+		return;
+
+	val = bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG);
+	if (val == info->last_heart_beat)
+		goto reset;
+
+	info->last_heart_beat = val;
+
+	val = bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG);
+	if (val != info->last_reset_counter)
+		goto reset;
+
+	info->last_reset_counter = val;
+
+	rte_eal_alarm_set(US_PER_MS * info->driver_polling_freq,
+			  bnxt_check_fw_health, (void *)bp);
+
+	return;
+reset:
+	/* Stop DMA to/from device */
+	bp->flags |= BNXT_FLAG_FATAL_ERROR;
+	bp->flags |= BNXT_FLAG_FW_RESET;
+
+	PMD_DRV_LOG(ERR, "Detected FW dead condition\n");
+}
+
+void bnxt_schedule_fw_health_check(struct bnxt *bp)
+{
+	uint32_t polling_freq = bp->recovery_info->driver_polling_freq;
+
+	if (!bnxt_is_recovery_enabled(bp))
+		return;
+
+	rte_eal_alarm_set(US_PER_MS * polling_freq,
+			  bnxt_check_fw_health, (void *)bp);
+	bp->flags |= BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED;
+}
+
+static void bnxt_cancel_fw_health_check(struct bnxt *bp)
+{
+	if (!bnxt_is_recovery_enabled(bp))
+		return;
+
+	rte_eal_alarm_cancel(bnxt_check_fw_health, (void *)bp);
+	bp->flags &= ~BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED;
+}
+
 static bool bnxt_vf_pciid(uint16_t id)
 {
 	if (id == BROADCOM_DEV_ID_57304_VF ||
@@ -4269,6 +4357,7 @@ bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev)
 	bnxt_free_int(bp);
 	bnxt_free_mem(bp, reconfig_dev);
 	bnxt_hwrm_func_buf_unrgtr(bp);
+	bnxt_cancel_fw_health_check(bp);
 	bnxt_unmap_fw_health_status_regs(bp);
 	rc = bnxt_hwrm_func_driver_unregister(bp, 0);
 	bp->flags &= ~BNXT_FLAG_REGISTERED;
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v2 10/13] net/bnxt: add support for FW reset
  2019-08-30 16:35     ` [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery Ajit Khaparde
                         ` (8 preceding siblings ...)
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 09/13] net/bnxt: add code for periodic FW health monitoring Ajit Khaparde
@ 2019-08-30 16:35       ` Ajit Khaparde
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 11/13] net/bnxt: reduce verbosity of logs Ajit Khaparde
                         ` (3 subsequent siblings)
  13 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-30 16:35 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

Added code to perform FW_RESET. When the driver detects error in FW,
it has to initiate the recovery by resetting the cores. FW advertise
the method to do a core reset, reset register offsets and values
to perform reset in response of HWRM_ERROR_RECOVERY_QCFG command.

There are 2 ways to recover from the error.
1. Master function issues core resets to recover from error.
2. Master function detects chimp dead condition and notify the Kong
   processor about the chimp dead case through FW_RESET HWRM command.
   Kong Processor send an RESET_NOTIFY async event with
   REASON_CODE_FW_EXCEPTION_FATAL to all the PF’s/VF’s that
   chimp is dead and it is going to reset the chimp.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt.h        |   1 +
 drivers/net/bnxt/bnxt_ethdev.c | 104 ++++++++++++++++++++++++++++++++-
 drivers/net/bnxt/bnxt_hwrm.c   |  26 +++++++++
 drivers/net/bnxt/bnxt_hwrm.h   |   1 +
 4 files changed, 131 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 5579e127c..a1a8cd534 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -388,6 +388,7 @@ struct bnxt_error_recovery_info {
 #define BNXT_FW_STATUS_REG_OFF(reg)	((reg) & ~BNXT_FW_STATUS_REG_TYPE_MASK)
 
 #define BNXT_GRCP_WINDOW_2_BASE		0x2000
+#define BNXT_GRCP_WINDOW_3_BASE		0x3000
 
 #define BNXT_HWRM_SHORT_REQ_LEN		sizeof(struct hwrm_short_input)
 struct bnxt {
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 62a4a65fb..76f9e197f 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -3496,6 +3496,19 @@ static const struct eth_dev_ops bnxt_dev_ops = {
 	.timesync_read_tx_timestamp = bnxt_timesync_read_tx_timestamp,
 };
 
+static uint32_t bnxt_map_reset_regs(struct bnxt *bp, uint32_t reg)
+{
+	uint32_t offset;
+
+	/* Only pre-map the reset GRC registers using window 3 */
+	rte_write32(reg & 0xfffff000, (uint8_t *)bp->bar0 +
+		    BNXT_GRCPF_REG_WINDOW_BASE_OUT + 8);
+
+	offset = BNXT_GRCP_WINDOW_3_BASE + (reg & 0xffc);
+
+	return offset;
+}
+
 int bnxt_map_fw_health_status_regs(struct bnxt *bp)
 {
 	struct bnxt_error_recovery_info *info = bp->recovery_info;
@@ -3539,6 +3552,34 @@ static void bnxt_unmap_fw_health_status_regs(struct bnxt *bp)
 		    BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4);
 }
 
+static void bnxt_write_fw_reset_reg(struct bnxt *bp, uint32_t index)
+{
+	struct bnxt_error_recovery_info *info = bp->recovery_info;
+	uint32_t delay = info->delay_after_reset[index];
+	uint32_t val = info->reset_reg_val[index];
+	uint32_t reg = info->reset_reg[index];
+	uint32_t type, offset;
+
+	type = BNXT_FW_STATUS_REG_TYPE(reg);
+	offset = BNXT_FW_STATUS_REG_OFF(reg);
+
+	switch (type) {
+	case BNXT_FW_STATUS_REG_TYPE_CFG:
+		rte_pci_write_config(bp->pdev, &val, sizeof(val), offset);
+		break;
+	case BNXT_FW_STATUS_REG_TYPE_GRC:
+		offset = bnxt_map_reset_regs(bp, offset);
+		rte_write32(val, (uint8_t *)bp->bar0 + offset);
+		break;
+	case BNXT_FW_STATUS_REG_TYPE_BAR0:
+		rte_write32(val, (uint8_t *)bp->bar0 + offset);
+		break;
+	}
+	/* wait on a specific interval of time until core reset is complete */
+	if (delay)
+		rte_delay_ms(delay);
+}
+
 static void bnxt_dev_cleanup(struct bnxt *bp)
 {
 	bnxt_set_hwrm_link_config(bp, false);
@@ -3632,6 +3673,59 @@ uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index)
 	return val;
 }
 
+static int bnxt_fw_reset_all(struct bnxt *bp)
+{
+	struct bnxt_error_recovery_info *info = bp->recovery_info;
+	uint32_t i;
+	int rc = 0;
+
+	if (info->flags & BNXT_FLAG_ERROR_RECOVERY_HOST) {
+		/* Reset through master function driver */
+		for (i = 0; i < info->reg_array_cnt; i++)
+			bnxt_write_fw_reset_reg(bp, i);
+		/* Wait for time specified by FW after triggering reset */
+		rte_delay_ms(info->master_func_wait_period_after_reset);
+	} else if (info->flags & BNXT_FLAG_ERROR_RECOVERY_CO_CPU) {
+		/* Reset with the help of Kong processor */
+		rc = bnxt_hwrm_fw_reset(bp);
+		if (rc)
+			PMD_DRV_LOG(ERR, "Failed to reset FW\n");
+	}
+
+	return rc;
+}
+
+static void bnxt_fw_reset_cb(void *arg)
+{
+	struct bnxt *bp = arg;
+	struct bnxt_error_recovery_info *info = bp->recovery_info;
+	int rc = 0;
+
+	/* Only Master function can do FW reset */
+	if (bnxt_is_master_func(bp) &&
+	    bnxt_is_recovery_enabled(bp)) {
+		rc = bnxt_fw_reset_all(bp);
+		if (rc) {
+			PMD_DRV_LOG(ERR, "Adapter recovery failed\n");
+			return;
+		}
+	}
+
+	/* if recovery method is ERROR_RECOVERY_CO_CPU, KONG will send
+	 * EXCEPTION_FATAL_ASYNC event to all the functions
+	 * (including MASTER FUNC). After receiving this Async, all the active
+	 * drivers should treat this case as FW initiated recovery
+	 */
+	if (info->flags & BNXT_FLAG_ERROR_RECOVERY_HOST) {
+		bp->fw_reset_min_msecs = BNXT_MIN_FW_READY_TIMEOUT;
+		bp->fw_reset_max_msecs = BNXT_MAX_FW_RESET_TIMEOUT;
+
+		/* To recover from error */
+		rte_eal_alarm_set(US_PER_MS, bnxt_dev_reset_and_resume,
+				  (void *)bp);
+	}
+}
+
 /* Driver should poll FW heartbeat, reset_counter with the frequency
  * advertised by FW in HWRM_ERROR_RECOVERY_QCFG.
  * When the driver detects heartbeat stop or change in reset_counter,
@@ -3644,7 +3738,7 @@ static void bnxt_check_fw_health(void *arg)
 {
 	struct bnxt *bp = arg;
 	struct bnxt_error_recovery_info *info = bp->recovery_info;
-	uint32_t val = 0;
+	uint32_t val = 0, wait_msec;
 
 	if (!info || !bnxt_is_recovery_enabled(bp) ||
 	    is_bnxt_in_error(bp))
@@ -3672,6 +3766,14 @@ static void bnxt_check_fw_health(void *arg)
 	bp->flags |= BNXT_FLAG_FW_RESET;
 
 	PMD_DRV_LOG(ERR, "Detected FW dead condition\n");
+
+	if (bnxt_is_master_func(bp))
+		wait_msec = info->master_func_wait_period;
+	else
+		wait_msec = info->normal_func_wait_period;
+
+	rte_eal_alarm_set(US_PER_MS * wait_msec,
+			  bnxt_fw_reset_cb, (void *)bp);
 }
 
 void bnxt_schedule_fw_health_check(struct bnxt *bp)
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 350e867bf..bd2cc01e1 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -4782,3 +4782,29 @@ int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp)
 	}
 	return rc;
 }
+
+int bnxt_hwrm_fw_reset(struct bnxt *bp)
+{
+	struct hwrm_fw_reset_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_fw_reset_input req = {0};
+	int rc;
+
+	if (!BNXT_PF(bp))
+		return -EOPNOTSUPP;
+
+	HWRM_PREP(req, FW_RESET, BNXT_USE_KONG(bp));
+
+	req.embedded_proc_type =
+		HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_CHIP;
+	req.selfrst_status =
+		HWRM_FW_RESET_INPUT_SELFRST_STATUS_SELFRSTASAP;
+	req.flags = HWRM_FW_RESET_INPUT_FLAGS_RESET_GRACEFUL;
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req),
+				    BNXT_USE_KONG(bp));
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
+
+	return rc;
+}
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index 44e335507..db25ad591 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -205,4 +205,5 @@ int bnxt_hwrm_tunnel_redirect_info(struct bnxt *bp, uint8_t tun_type,
 int bnxt_hwrm_set_mac(struct bnxt *bp);
 int bnxt_hwrm_if_change(struct bnxt *bp, bool state);
 int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp);
+int bnxt_hwrm_fw_reset(struct bnxt *bp);
 #endif
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v2 11/13] net/bnxt: reduce verbosity of logs
  2019-08-30 16:35     ` [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery Ajit Khaparde
                         ` (9 preceding siblings ...)
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 10/13] net/bnxt: add support for FW reset Ajit Khaparde
@ 2019-08-30 16:35       ` Ajit Khaparde
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 12/13] net/bnxt: use BIT macro instead of bit fields Ajit Khaparde
                         ` (2 subsequent siblings)
  13 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-30 16:35 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Lance Richardson, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

When IOMMU is available, EAL picks IOVA as VA as the default IOVA mode.
This causes the bnxt driver to log warning messages saying
"Memzone physical address same as virtual." and "Using rte_mem_virt2iova()"
during load.

Reduce the verbosity of logs to DEBUG.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Lance Richardson <lance.richardson@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt_ethdev.c | 21 +++++++++------------
 drivers/net/bnxt/bnxt_ring.c   |  7 +++----
 drivers/net/bnxt/bnxt_vnic.c   |  7 +++----
 3 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 76f9e197f..b94c9a122 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -3890,10 +3890,9 @@ static int bnxt_alloc_ctx_mem_blk(__rte_unused struct bnxt *bp,
 		memset(mz->addr, 0, mz->len);
 		mz_phys_addr = mz->iova;
 		if ((unsigned long)mz->addr == mz_phys_addr) {
-			PMD_DRV_LOG(WARNING,
-				"Memzone physical address same as virtual.\n");
-			PMD_DRV_LOG(WARNING,
-				    "Using rte_mem_virt2iova()\n");
+			PMD_DRV_LOG(DEBUG,
+				    "physical address same as virtual\n");
+			PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n");
 			mz_phys_addr = rte_mem_virt2iova(mz->addr);
 			if (mz_phys_addr == RTE_BAD_IOVA) {
 				PMD_DRV_LOG(ERR,
@@ -3926,10 +3925,9 @@ static int bnxt_alloc_ctx_mem_blk(__rte_unused struct bnxt *bp,
 	memset(mz->addr, 0, mz->len);
 	mz_phys_addr = mz->iova;
 	if ((unsigned long)mz->addr == mz_phys_addr) {
-		PMD_DRV_LOG(WARNING,
+		PMD_DRV_LOG(DEBUG,
 			    "Memzone physical address same as virtual.\n");
-		PMD_DRV_LOG(WARNING,
-			    "Using rte_mem_virt2iova()\n");
+		PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n");
 		for (sz = 0; sz < mem_size; sz += BNXT_PAGE_SIZE)
 			rte_mem_lock_page(((char *)mz->addr) + sz);
 		mz_phys_addr = rte_mem_virt2iova(mz->addr);
@@ -4117,9 +4115,9 @@ static int bnxt_alloc_stats_mem(struct bnxt *bp)
 	memset(mz->addr, 0, mz->len);
 	mz_phys_addr = mz->iova;
 	if ((unsigned long)mz->addr == mz_phys_addr) {
-		PMD_DRV_LOG(WARNING,
+		PMD_DRV_LOG(DEBUG,
 			    "Memzone physical address same as virtual.\n");
-		PMD_DRV_LOG(WARNING,
+		PMD_DRV_LOG(DEBUG,
 			    "Using rte_mem_virt2iova()\n");
 		mz_phys_addr = rte_mem_virt2iova(mz->addr);
 		if (mz_phys_addr == RTE_BAD_IOVA) {
@@ -4155,10 +4153,9 @@ static int bnxt_alloc_stats_mem(struct bnxt *bp)
 	memset(mz->addr, 0, mz->len);
 	mz_phys_addr = mz->iova;
 	if ((unsigned long)mz->addr == mz_phys_addr) {
-		PMD_DRV_LOG(WARNING,
+		PMD_DRV_LOG(DEBUG,
 			    "Memzone physical address same as virtual\n");
-		PMD_DRV_LOG(WARNING,
-			    "Using rte_mem_virt2iova()\n");
+		PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n");
 		mz_phys_addr = rte_mem_virt2iova(mz->addr);
 		if (mz_phys_addr == RTE_BAD_IOVA) {
 			PMD_DRV_LOG(ERR,
diff --git a/drivers/net/bnxt/bnxt_ring.c b/drivers/net/bnxt/bnxt_ring.c
index f19865c83..2f57e038a 100644
--- a/drivers/net/bnxt/bnxt_ring.c
+++ b/drivers/net/bnxt/bnxt_ring.c
@@ -212,10 +212,9 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
 	mz_phys_addr_base = mz->iova;
 	mz_phys_addr = mz->iova;
 	if ((unsigned long)mz->addr == mz_phys_addr_base) {
-		PMD_DRV_LOG(WARNING,
-			"Memzone physical address same as virtual.\n");
-		PMD_DRV_LOG(WARNING,
-			"Using rte_mem_virt2iova()\n");
+		PMD_DRV_LOG(DEBUG,
+			    "Memzone physical address same as virtual.\n");
+		PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n");
 		for (sz = 0; sz < total_alloc_len; sz += getpagesize())
 			rte_mem_lock_page(((char *)mz->addr) + sz);
 		mz_phys_addr_base = rte_mem_virt2iova(mz->addr);
diff --git a/drivers/net/bnxt/bnxt_vnic.c b/drivers/net/bnxt/bnxt_vnic.c
index 98415633e..9ea99388b 100644
--- a/drivers/net/bnxt/bnxt_vnic.c
+++ b/drivers/net/bnxt/bnxt_vnic.c
@@ -150,10 +150,9 @@ int bnxt_alloc_vnic_attributes(struct bnxt *bp)
 	}
 	mz_phys_addr = mz->iova;
 	if ((unsigned long)mz->addr == mz_phys_addr) {
-		PMD_DRV_LOG(WARNING,
-			"Memzone physical address same as virtual.\n");
-		PMD_DRV_LOG(WARNING,
-			"Using rte_mem_virt2iova()\n");
+		PMD_DRV_LOG(DEBUG,
+			    "Memzone physical address same as virtual.\n");
+		PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n");
 		mz_phys_addr = rte_mem_virt2iova(mz->addr);
 		if (mz_phys_addr == RTE_BAD_IOVA) {
 			PMD_DRV_LOG(ERR,
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v2 12/13] net/bnxt: use BIT macro instead of bit fields
  2019-08-30 16:35     ` [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery Ajit Khaparde
                         ` (10 preceding siblings ...)
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 11/13] net/bnxt: reduce verbosity of logs Ajit Khaparde
@ 2019-08-30 16:35       ` Ajit Khaparde
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 13/13] net/bnxt: avoid null pointer dereference Ajit Khaparde
  2019-09-30 13:29       ` [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery Ferruh Yigit
  13 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-30 16:35 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

use BIT macro instead of bit fields.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt.h      | 75 ++++++++++++++++++------------------
 drivers/net/bnxt/bnxt_util.h |  4 ++
 2 files changed, 42 insertions(+), 37 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index a1a8cd534..ac602fe52 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -19,6 +19,7 @@
 #include <rte_time.h>
 
 #include "bnxt_cpr.h"
+#include "bnxt_util.h"
 
 #define BNXT_MAX_MTU		9574
 #define VLAN_TAG_SIZE		4
@@ -198,16 +199,16 @@ struct bnxt_ptp_cfg {
 	struct bnxt		*bp;
 #define BNXT_MAX_TX_TS	1
 	uint16_t			rxctl;
-#define BNXT_PTP_MSG_SYNC			(1 << 0)
-#define BNXT_PTP_MSG_DELAY_REQ			(1 << 1)
-#define BNXT_PTP_MSG_PDELAY_REQ			(1 << 2)
-#define BNXT_PTP_MSG_PDELAY_RESP		(1 << 3)
-#define BNXT_PTP_MSG_FOLLOW_UP			(1 << 8)
-#define BNXT_PTP_MSG_DELAY_RESP			(1 << 9)
-#define BNXT_PTP_MSG_PDELAY_RESP_FOLLOW_UP	(1 << 10)
-#define BNXT_PTP_MSG_ANNOUNCE			(1 << 11)
-#define BNXT_PTP_MSG_SIGNALING			(1 << 12)
-#define BNXT_PTP_MSG_MANAGEMENT			(1 << 13)
+#define BNXT_PTP_MSG_SYNC			BIT(0)
+#define BNXT_PTP_MSG_DELAY_REQ			BIT(1)
+#define BNXT_PTP_MSG_PDELAY_REQ			BIT(2)
+#define BNXT_PTP_MSG_PDELAY_RESP		BIT(3)
+#define BNXT_PTP_MSG_FOLLOW_UP			BIT(8)
+#define BNXT_PTP_MSG_DELAY_RESP			BIT(9)
+#define BNXT_PTP_MSG_PDELAY_RESP_FOLLOW_UP	BIT(10)
+#define BNXT_PTP_MSG_ANNOUNCE			BIT(11)
+#define BNXT_PTP_MSG_SIGNALING			BIT(12)
+#define BNXT_PTP_MSG_MANAGEMENT			BIT(13)
 #define BNXT_PTP_MSG_EVENTS		(BNXT_PTP_MSG_SYNC |		\
 					 BNXT_PTP_MSG_DELAY_REQ |	\
 					 BNXT_PTP_MSG_PDELAY_REQ |	\
@@ -363,10 +364,10 @@ struct bnxt_error_recovery_info {
 	uint32_t	reset_reg[BNXT_NUM_RESET_REG];
 	uint32_t	reset_reg_val[BNXT_NUM_RESET_REG];
 	uint8_t		delay_after_reset[BNXT_NUM_RESET_REG];
-#define BNXT_FLAG_ERROR_RECOVERY_HOST	(1 << 0)
-#define BNXT_FLAG_ERROR_RECOVERY_CO_CPU	(1 << 1)
-#define BNXT_FLAG_MASTER_FUNC		(1 << 2)
-#define BNXT_FLAG_RECOVERY_ENABLED	(1 << 3)
+#define BNXT_FLAG_ERROR_RECOVERY_HOST	BIT(0)
+#define BNXT_FLAG_ERROR_RECOVERY_CO_CPU	BIT(1)
+#define BNXT_FLAG_MASTER_FUNC		BIT(2)
+#define BNXT_FLAG_RECOVERY_ENABLED	BIT(3)
 	uint32_t	flags;
 
 	uint32_t        last_heart_beat;
@@ -400,29 +401,29 @@ struct bnxt {
 	void				*doorbell_base;
 
 	uint32_t		flags;
-#define BNXT_FLAG_REGISTERED	(1 << 0)
-#define BNXT_FLAG_VF		(1 << 1)
-#define BNXT_FLAG_PORT_STATS	(1 << 2)
-#define BNXT_FLAG_JUMBO		(1 << 3)
-#define BNXT_FLAG_SHORT_CMD	(1 << 4)
-#define BNXT_FLAG_UPDATE_HASH	(1 << 5)
-#define BNXT_FLAG_PTP_SUPPORTED	(1 << 6)
-#define BNXT_FLAG_MULTI_HOST    (1 << 7)
-#define BNXT_FLAG_EXT_RX_PORT_STATS	(1 << 8)
-#define BNXT_FLAG_EXT_TX_PORT_STATS	(1 << 9)
-#define BNXT_FLAG_KONG_MB_EN	(1 << 10)
-#define BNXT_FLAG_TRUSTED_VF_EN	(1 << 11)
-#define BNXT_FLAG_DFLT_VNIC_SET	(1 << 12)
-#define BNXT_FLAG_THOR_CHIP	(1 << 13)
-#define BNXT_FLAG_STINGRAY	(1 << 14)
-#define BNXT_FLAG_FW_RESET	(1 << 15)
-#define BNXT_FLAG_FATAL_ERROR	(1 << 16)
-#define BNXT_FLAG_FW_CAP_IF_CHANGE	(1 << 17)
-#define BNXT_FLAG_FW_CAP_ERROR_RECOVERY	(1 << 18)
-#define BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED	(1 << 19)
-#define BNXT_FLAG_EXT_STATS_SUPPORTED	(1 << 29)
-#define BNXT_FLAG_NEW_RM	(1 << 30)
-#define BNXT_FLAG_INIT_DONE	(1U << 31)
+#define BNXT_FLAG_REGISTERED			BIT(0)
+#define BNXT_FLAG_VF				BIT(1)
+#define BNXT_FLAG_PORT_STATS			BIT(2)
+#define BNXT_FLAG_JUMBO				BIT(3)
+#define BNXT_FLAG_SHORT_CMD			BIT(4)
+#define BNXT_FLAG_UPDATE_HASH			BIT(5)
+#define BNXT_FLAG_PTP_SUPPORTED			BIT(6)
+#define BNXT_FLAG_MULTI_HOST    		BIT(7)
+#define BNXT_FLAG_EXT_RX_PORT_STATS		BIT(8)
+#define BNXT_FLAG_EXT_TX_PORT_STATS		BIT(9)
+#define BNXT_FLAG_KONG_MB_EN			BIT(10)
+#define BNXT_FLAG_TRUSTED_VF_EN			BIT(11)
+#define BNXT_FLAG_DFLT_VNIC_SET			BIT(12)
+#define BNXT_FLAG_THOR_CHIP			BIT(13)
+#define BNXT_FLAG_STINGRAY			BIT(14)
+#define BNXT_FLAG_FW_RESET			BIT(15)
+#define BNXT_FLAG_FATAL_ERROR			BIT(16)
+#define BNXT_FLAG_FW_CAP_IF_CHANGE		BIT(17)
+#define BNXT_FLAG_FW_CAP_ERROR_RECOVERY		BIT(18)
+#define BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED	BIT(19)
+#define BNXT_FLAG_EXT_STATS_SUPPORTED		BIT(20)
+#define BNXT_FLAG_NEW_RM			BIT(21)
+#define BNXT_FLAG_INIT_DONE			BIT(22)
 #define BNXT_PF(bp)		(!((bp)->flags & BNXT_FLAG_VF))
 #define BNXT_VF(bp)		((bp)->flags & BNXT_FLAG_VF)
 #define BNXT_NPAR(bp)		((bp)->port_partition_type)
diff --git a/drivers/net/bnxt/bnxt_util.h b/drivers/net/bnxt/bnxt_util.h
index 9f1868a78..a15b3a1a9 100644
--- a/drivers/net/bnxt/bnxt_util.h
+++ b/drivers/net/bnxt/bnxt_util.h
@@ -6,6 +6,10 @@
 #ifndef _BNXT_UTIL_H_
 #define _BNXT_UTIL_H_
 
+#ifndef BIT
+#define BIT(n)	(1UL << (n))
+#endif /* BIT */
+
 int bnxt_check_zero_bytes(const uint8_t *bytes, int len);
 void bnxt_eth_hw_addr_random(uint8_t *mac_addr);
 
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v2 13/13] net/bnxt: avoid null pointer dereference
  2019-08-30 16:35     ` [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery Ajit Khaparde
                         ` (11 preceding siblings ...)
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 12/13] net/bnxt: use BIT macro instead of bit fields Ajit Khaparde
@ 2019-08-30 16:35       ` Ajit Khaparde
  2019-09-30 13:29       ` [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery Ferruh Yigit
  13 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-08-30 16:35 UTC (permalink / raw)
  To: dev
  Cc: ferruh.yigit, Kalesh AP, stable, Ajit Kumar Khaparde,
	Rahul Gupta, Lance Richardson

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

Commit "bd0a14c99f65" enables the creation of a dedicated completion
ring for asynchronous event handling instead of handling these
events on a receive completion ring on non Stingray Platforms.

This causes a segfault due to NULL pointer defreference in
bnxt_alloc_async_cp_ring() on stingray. Fix this by checking the
pointer validity before accessing it.

Fixes: bd0a14c99f65 ("net/bnxt: use dedicated CPR for async events")
Cc: stable@dpdk.org

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Signed-off-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Rahul Gupta <rahul.gupta@broadcom.com>
Reviewed-by: Lance Richardson <lance.richardson@broadcom.com>
---
 drivers/net/bnxt/bnxt_ring.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_ring.c b/drivers/net/bnxt/bnxt_ring.c
index 2f57e038a..ec17783cf 100644
--- a/drivers/net/bnxt/bnxt_ring.c
+++ b/drivers/net/bnxt/bnxt_ring.c
@@ -694,13 +694,15 @@ int bnxt_alloc_hwrm_rings(struct bnxt *bp)
 int bnxt_alloc_async_cp_ring(struct bnxt *bp)
 {
 	struct bnxt_cp_ring_info *cpr = bp->async_cp_ring;
-	struct bnxt_ring *cp_ring = cpr->cp_ring_struct;
+	struct bnxt_ring *cp_ring;
 	uint8_t ring_type;
 	int rc;
 
-	if (BNXT_NUM_ASYNC_CPR(bp) == 0)
+	if (BNXT_NUM_ASYNC_CPR(bp) == 0 || cpr == NULL)
 		return 0;
 
+	cp_ring = cpr->cp_ring_struct;
+
 	if (BNXT_HAS_NQ(bp))
 		ring_type = HWRM_RING_ALLOC_INPUT_RING_TYPE_NQ;
 	else
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery
  2019-08-30 16:35     ` [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery Ajit Khaparde
                         ` (12 preceding siblings ...)
  2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 13/13] net/bnxt: avoid null pointer dereference Ajit Khaparde
@ 2019-09-30 13:29       ` Ferruh Yigit
  2019-10-02  1:23         ` [dpdk-dev] [PATCH v3 00/15] " Ajit Khaparde
  13 siblings, 1 reply; 48+ messages in thread
From: Ferruh Yigit @ 2019-09-30 13:29 UTC (permalink / raw)
  To: Ajit Khaparde, dev

On 8/30/2019 5:35 PM, Ajit Khaparde wrote:
> This patchset adds support to monitor the health of the firmware and the
> underlying device and recover to an operational state in case of error.
> We can also detect if a FW upgrade is in progress and quiesce all
> access to the device and recover once FW indicates everything is ready.
> 
> Patchset against dpdk-next-net. Please apply.
> 
> Kalesh AP (13):
>   net/bnxt: add FW reset HWRM command
>   net/bnxt: prevent device access when device is in reset
>   net/bnxt: handle reset notify async event from FW
>   net/bnxt: inform firmware about IF state changes
>   net/bnxt: handle fatal event from FW under error conditions
>   net/bnxt: query firmware error recovery capabilities
>   net/bnxt: map status registers for FW health monitoring
>   net/bnxt: advertise error recovery capability and handle async event
>   net/bnxt: add code for periodic FW health monitoring
>   net/bnxt: add support for FW reset
>   net/bnxt: reduce verbosity of logs
>   net/bnxt: use BIT macro instead of bit fields
>   net/bnxt: avoid null pointer dereference


Hi Ajit, Kalesh,

Some APIs and dev_ops return types has been changed in the next-net, and the
patch conflict because of those changes, can you please send a new version on
top of latest next-net?

Thanks,
ferruh

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v3 00/15] bnxt patchset to support device error recovery
  2019-09-30 13:29       ` [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery Ferruh Yigit
@ 2019-10-02  1:23         ` Ajit Khaparde
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 01/15] net/bnxt: add FW reset HWRM command Ajit Khaparde
                             ` (15 more replies)
  0 siblings, 16 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-10-02  1:23 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit

This patchset adds support to monitor the health of the firmware and the
underlying device and recover to an operational state in case of error.
We can also detect if a FW upgrade is in progress and quiesce all
access to the device and recover once FW indicates everything is ready.

Patchset against dpdk-next-net. Please apply.

v2->v3: Some APIs and dev_ops return types has been updated since
	v2 was submitted. This version addresses the conflicts on
	account of that.

Kalesh AP (15):
  net/bnxt: add FW reset HWRM command
  net/bnxt: prevent device access when device is in reset
  net/bnxt: handle reset notify async event from FW
  net/bnxt: inform firmware about IF state changes
  net/bnxt: handle fatal event from FW under error conditions
  net/bnxt: query firmware error recovery capabilities
  net/bnxt: map status registers for FW health monitoring
  net/bnxt: advertise error recovery capability and handle async event
  net/bnxt: add code for periodic FW health monitoring
  net/bnxt: add support for FW reset
  net/bnxt: add hot firmware upgrade support for Stingray
  net/bnxt: reduce verbosity of logs
  net/bnxt: avoid null pointer dereference
  net/bnxt: use BIT macro instead of bit fields
  net/bnxt: add PTP support for Thor

 drivers/net/bnxt/bnxt.h                |  142 +++-
 drivers/net/bnxt/bnxt_cpr.c            |  122 ++-
 drivers/net/bnxt/bnxt_cpr.h            |   19 +
 drivers/net/bnxt/bnxt_ethdev.c         | 1066 ++++++++++++++++++------
 drivers/net/bnxt/bnxt_filter.c         |    2 +-
 drivers/net/bnxt/bnxt_hwrm.c           |  310 ++++++-
 drivers/net/bnxt/bnxt_hwrm.h           |    9 +
 drivers/net/bnxt/bnxt_ring.c           |   45 +-
 drivers/net/bnxt/bnxt_ring.h           |    1 +
 drivers/net/bnxt/bnxt_rxq.c            |   25 +
 drivers/net/bnxt/bnxt_rxr.c            |   59 +-
 drivers/net/bnxt/bnxt_rxr.h            |    2 +
 drivers/net/bnxt/bnxt_stats.c          |   36 +-
 drivers/net/bnxt/bnxt_txq.c            |    7 +
 drivers/net/bnxt/bnxt_txr.c            |   34 +-
 drivers/net/bnxt/bnxt_txr.h            |    2 +
 drivers/net/bnxt/bnxt_util.h           |    4 +
 drivers/net/bnxt/bnxt_vnic.c           |    7 +-
 drivers/net/bnxt/hsi_struct_def_dpdk.h |  242 ++++++
 19 files changed, 1816 insertions(+), 318 deletions(-)

-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v3 01/15] net/bnxt: add FW reset HWRM command
  2019-10-02  1:23         ` [dpdk-dev] [PATCH v3 00/15] " Ajit Khaparde
@ 2019-10-02  1:23           ` Ajit Khaparde
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 02/15] net/bnxt: prevent device access when device is in reset Ajit Khaparde
                             ` (14 subsequent siblings)
  15 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-10-02  1:23 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

This patch adds new FW reset HWRM command.
Code using this command will be added in future patch.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/hsi_struct_def_dpdk.h | 137 +++++++++++++++++++++++++
 1 file changed, 137 insertions(+)

diff --git a/drivers/net/bnxt/hsi_struct_def_dpdk.h b/drivers/net/bnxt/hsi_struct_def_dpdk.h
index 6c98c1d6dd..0095717254 100644
--- a/drivers/net/bnxt/hsi_struct_def_dpdk.h
+++ b/drivers/net/bnxt/hsi_struct_def_dpdk.h
@@ -33621,4 +33621,141 @@ struct hwrm_nvm_validate_option_cmd_err {
 	uint8_t	unused_0[7];
 } __attribute__((packed));
 
+/*****************
+ * hwrm_fw_reset *
+ ******************/
+
+
+/* hwrm_fw_reset_input (size:192b/24B) */
+struct hwrm_fw_reset_input {
+	/* The HWRM command request type. */
+	uint16_t        req_type;
+	/*
+	 * The completion ring to send the completion event on. This should
+	 * be the NQ ID returned from the `nq_alloc` HWRM command.
+	 */
+	uint16_t        cmpl_ring;
+	/*
+	 * The sequence ID is used by the driver for tracking multiple
+	 * commands. This ID is treated as opaque data by the firmware and
+	 * the value is returned in the `hwrm_resp_hdr` upon completion.
+	 */
+	uint16_t        seq_id;
+	/*
+	 * The target ID of the command:
+	 * * 0x0-0xFFF8 - The function ID
+	 * * 0xFFF8-0xFFFE - Reserved for internal processors
+	 * * 0xFFFF - HWRM
+	 */
+	uint16_t        target_id;
+	/*
+	 * A physical address pointer pointing to a host buffer that the
+	 * command's response data will be written. This can be either a host
+	 * physical address (HPA) or a guest physical address (GPA) and must
+	 * point to a physically contiguous block of memory.
+	 */
+	uint64_t        resp_addr;
+	/* Type of embedded processor. */
+	uint8_t embedded_proc_type;
+	/* Boot Processor */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_BOOT \
+		UINT32_C(0x0)
+	/* Management Processor */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_MGMT \
+		UINT32_C(0x1)
+	/* Network control processor */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_NETCTRL \
+		UINT32_C(0x2)
+	/* RoCE control processor */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_ROCE \
+		UINT32_C(0x3)
+	/*
+	 * Host (in multi-host environment): This is only valid if requester is IPC.
+	 * Reinit host hardware resources and PCIe.
+	 */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_HOST \
+		UINT32_C(0x4)
+	/* AP processor complex (in multi-host environment). Use host_idx to control which core is reset */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_AP \
+		UINT32_C(0x5)
+	/* Reset all blocks of the chip (including all processors) */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_CHIP \
+		UINT32_C(0x6)
+	/*
+	 * Host (in multi-host environment): This is only valid if requester is IPC.
+	 * Reinit host hardware resources.
+	 */
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_HOST_RESOURCE_REINIT \
+		UINT32_C(0x7)
+	#define HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_LAST \
+		HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_HOST_RESOURCE_REINIT
+	/* Type of self reset. */
+	uint8_t selfrst_status;
+	/* No Self Reset */
+	#define HWRM_FW_RESET_INPUT_SELFRST_STATUS_SELFRSTNONE \
+		UINT32_C(0x0)
+	/* Self Reset as soon as possible to do so safely */
+	#define HWRM_FW_RESET_INPUT_SELFRST_STATUS_SELFRSTASAP \
+		UINT32_C(0x1)
+	/* Self Reset on PCIe Reset */
+	#define HWRM_FW_RESET_INPUT_SELFRST_STATUS_SELFRSTPCIERST \
+		UINT32_C(0x2)
+	/* Self Reset immediately after notification to all clients. */
+	#define HWRM_FW_RESET_INPUT_SELFRST_STATUS_SELFRSTIMMEDIATE \
+		UINT32_C(0x3)
+	#define HWRM_FW_RESET_INPUT_SELFRST_STATUS_LAST \
+		HWRM_FW_RESET_INPUT_SELFRST_STATUS_SELFRSTIMMEDIATE
+	/*
+	 * Indicate which host is being reset. 0 means first host.
+	 * Only valid when embedded_proc_type is host in multihost
+	 * environment
+	 */
+	uint8_t host_idx;
+	uint8_t flags;
+	/*
+	 * When this bit is '1', then the core firmware initiates
+	 * the reset only after graceful shut down of all registered instances.
+	 * If not, the device will continue with the existing firmware.
+	 */
+	#define HWRM_FW_RESET_INPUT_FLAGS_RESET_GRACEFUL     UINT32_C(0x1)
+	uint8_t unused_0[4];
+} __attribute__((packed));
+
+/* hwrm_fw_reset_output (size:128b/16B) */
+struct hwrm_fw_reset_output {
+	/* The specific error status for the command. */
+	uint16_t        error_code;
+	/* The HWRM command request type. */
+	uint16_t        req_type;
+	/* The sequence ID from the original command. */
+	uint16_t        seq_id;
+	/* The length of the response data in number of bytes. */
+	uint16_t        resp_len;
+	/* Type of self reset. */
+	uint8_t selfrst_status;
+	/* No Self Reset */
+	#define HWRM_FW_RESET_OUTPUT_SELFRST_STATUS_SELFRSTNONE \
+		UINT32_C(0x0)
+	/* Self Reset as soon as possible to do so safely */
+	#define HWRM_FW_RESET_OUTPUT_SELFRST_STATUS_SELFRSTASAP \
+		UINT32_C(0x1)
+	/* Self Reset on PCIe Reset */
+	#define HWRM_FW_RESET_OUTPUT_SELFRST_STATUS_SELFRSTPCIERST \
+		UINT32_C(0x2)
+	/* Self Reset immediately after notification to all clients. */
+	#define HWRM_FW_RESET_OUTPUT_SELFRST_STATUS_SELFRSTIMMEDIATE \
+		UINT32_C(0x3)
+	#define HWRM_FW_RESET_OUTPUT_SELFRST_STATUS_LAST \
+		HWRM_FW_RESET_OUTPUT_SELFRST_STATUS_SELFRSTIMMEDIATE
+	uint8_t unused_0[6];
+	/*
+	 * This field is used in Output records to indicate that the output
+	 * is completely written to RAM.  This field should be read as '1'
+	 * to indicate that the output has been completely written.
+	 * When writing a command completion or response to an internal processor,
+	 * the order of writes has to be such that this field is written last.
+	 */
+	uint8_t valid;
+} __attribute__((packed));
+
 #endif /* _HSI_STRUCT_DEF_DPDK_H_ */
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v3 02/15] net/bnxt: prevent device access when device is in reset
  2019-10-02  1:23         ` [dpdk-dev] [PATCH v3 00/15] " Ajit Khaparde
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 01/15] net/bnxt: add FW reset HWRM command Ajit Khaparde
@ 2019-10-02  1:23           ` Ajit Khaparde
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 03/15] net/bnxt: handle reset notify async event from FW Ajit Khaparde
                             ` (13 subsequent siblings)
  15 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-10-02  1:23 UTC (permalink / raw)
  To: dev
  Cc: ferruh.yigit, Kalesh AP, Santoshkumar Karanappa Rastapur, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

Refactor init and uninit functions so that the driver can fail
the eth_dev_ops callbacks and accessing Tx and Rx queues
when device is in reset or in error state.

Transmit and receive queues are freed during reset cleanup and
reallocated during recovery. So we block all data path handling
in this state. The eth_dev dev_started field is updated depending
on the status of the device.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Santoshkumar Karanappa Rastapur <santosh.rastapur@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
---
 drivers/net/bnxt/bnxt.h        |   3 +
 drivers/net/bnxt/bnxt_cpr.c    |   3 +
 drivers/net/bnxt/bnxt_ethdev.c | 461 ++++++++++++++++++++++-----------
 drivers/net/bnxt/bnxt_hwrm.c   |   2 -
 drivers/net/bnxt/bnxt_ring.c   |  32 +++
 drivers/net/bnxt/bnxt_ring.h   |   1 +
 drivers/net/bnxt/bnxt_rxq.c    |  25 ++
 drivers/net/bnxt/bnxt_rxr.c    |  17 ++
 drivers/net/bnxt/bnxt_rxr.h    |   2 +
 drivers/net/bnxt/bnxt_stats.c  |  36 ++-
 drivers/net/bnxt/bnxt_txq.c    |   7 +
 drivers/net/bnxt/bnxt_txr.c    |  27 ++
 drivers/net/bnxt/bnxt_txr.h    |   2 +
 13 files changed, 465 insertions(+), 153 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 0c9f994eaa..37b4c717d6 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -358,6 +358,8 @@ struct bnxt {
 #define BNXT_FLAG_DFLT_VNIC_SET	(1 << 12)
 #define BNXT_FLAG_THOR_CHIP	(1 << 13)
 #define BNXT_FLAG_STINGRAY	(1 << 14)
+#define BNXT_FLAG_FW_RESET	(1 << 15)
+#define BNXT_FLAG_FATAL_ERROR	(1 << 16)
 #define BNXT_FLAG_EXT_STATS_SUPPORTED	(1 << 29)
 #define BNXT_FLAG_NEW_RM	(1 << 30)
 #define BNXT_FLAG_INIT_DONE	(1U << 31)
@@ -465,6 +467,7 @@ struct bnxt {
 
 int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete);
 int bnxt_rcv_msg_from_vf(struct bnxt *bp, uint16_t vf_id, void *msg);
+int is_bnxt_in_error(struct bnxt *bp);
 
 bool is_bnxt_supported(struct rte_eth_dev *dev);
 bool bnxt_stratus_device(struct bnxt *bp);
diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index 655bcf1a8d..bbcdb42f10 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -142,6 +142,9 @@ int bnxt_event_hwrm_resp_handler(struct bnxt *bp, struct cmpl_base *cmp)
 		return evt;
 	}
 
+	if (unlikely(is_bnxt_in_error(bp)))
+		return 0;
+
 	switch (CMP_TYPE(cmp)) {
 	case CMPL_BASE_TYPE_HWRM_ASYNC_EVENT:
 		/* Handle any async event */
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index eb8701131a..d90a6e4202 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -167,6 +167,16 @@ static void bnxt_print_link_info(struct rte_eth_dev *eth_dev);
 static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu);
 static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev);
 
+int is_bnxt_in_error(struct bnxt *bp)
+{
+	if (bp->flags & BNXT_FLAG_FATAL_ERROR)
+		return -EIO;
+	if (bp->flags & BNXT_FLAG_FW_RESET)
+		return -EBUSY;
+
+	return 0;
+}
+
 /***********************/
 
 /*
@@ -207,6 +217,10 @@ static int bnxt_alloc_mem(struct bnxt *bp)
 {
 	int rc;
 
+	rc = bnxt_alloc_ring_grps(bp);
+	if (rc)
+		goto alloc_mem_err;
+
 	rc = bnxt_alloc_async_ring_struct(bp);
 	if (rc)
 		goto alloc_mem_err;
@@ -500,6 +514,11 @@ static int bnxt_dev_info_get_op(struct rte_eth_dev *eth_dev,
 	struct bnxt *bp = eth_dev->data->dev_private;
 	uint16_t max_vnics, i, j, vpool, vrxq;
 	unsigned int max_rx_rings;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	/* MAC Specifics */
 	dev_info->max_mac_addrs = bp->max_l2_ctx;
@@ -604,6 +623,10 @@ static int bnxt_dev_configure_op(struct rte_eth_dev *eth_dev)
 	bp->tx_nr_rings = eth_dev->data->nb_tx_queues;
 	bp->rx_nr_rings = eth_dev->data->nb_rx_queues;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (BNXT_VF(bp) && (bp->flags & BNXT_FLAG_NEW_RM)) {
 		rc = bnxt_hwrm_check_vf_rings(bp);
 		if (rc) {
@@ -793,8 +816,10 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
 
 	eth_dev->rx_pkt_burst = bnxt_receive_function(eth_dev);
 	eth_dev->tx_pkt_burst = bnxt_transmit_function(eth_dev);
+
 	bnxt_enable_int(bp);
 	bp->flags |= BNXT_FLAG_INIT_DONE;
+	eth_dev->data->dev_started = 1;
 	bp->dev_stopped = 0;
 	return 0;
 
@@ -837,6 +862,11 @@ static void bnxt_dev_stop_op(struct rte_eth_dev *eth_dev)
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 
+	eth_dev->data->dev_started = 0;
+	/* Prevent crashes when queues are still in use */
+	eth_dev->rx_pkt_burst = &bnxt_dummy_recv_pkts;
+	eth_dev->tx_pkt_burst = &bnxt_dummy_xmit_pkts;
+
 	bnxt_disable_int(bp);
 
 	/* disable uio/vfio intr/eventfd mapping */
@@ -891,6 +921,9 @@ static void bnxt_mac_addr_remove_op(struct rte_eth_dev *eth_dev,
 	struct bnxt_filter_info *filter, *temp_filter;
 	uint32_t i;
 
+	if (is_bnxt_in_error(bp))
+		return;
+
 	/*
 	 * Loop through all VNICs from the specified filter flow pools to
 	 * remove the corresponding MAC addr filter
@@ -926,6 +959,10 @@ static int bnxt_mac_addr_add_op(struct rte_eth_dev *eth_dev,
 	struct bnxt_filter_info *filter;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (BNXT_VF(bp) & !BNXT_VF_IS_TRUSTED(bp)) {
 		PMD_DRV_LOG(ERR, "Cannot add MAC address to a VF interface\n");
 		return -ENOTSUP;
@@ -971,6 +1008,10 @@ int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete)
 	struct rte_eth_link new;
 	unsigned int cnt = BNXT_LINK_WAIT_CNT;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	memset(&new, 0, sizeof(new));
 	do {
 		/* Retrieve link info from hardware */
@@ -1013,6 +1054,10 @@ static int bnxt_promiscuous_enable_op(struct rte_eth_dev *eth_dev)
 	uint32_t old_flags;
 	int rc;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (bp->vnic_info == NULL)
 		return 0;
 
@@ -1034,6 +1079,10 @@ static int bnxt_promiscuous_disable_op(struct rte_eth_dev *eth_dev)
 	uint32_t old_flags;
 	int rc;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (bp->vnic_info == NULL)
 		return 0;
 
@@ -1055,6 +1104,10 @@ static int bnxt_allmulticast_enable_op(struct rte_eth_dev *eth_dev)
 	uint32_t old_flags;
 	int rc;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (bp->vnic_info == NULL)
 		return 0;
 
@@ -1076,6 +1129,10 @@ static int bnxt_allmulticast_disable_op(struct rte_eth_dev *eth_dev)
 	uint32_t old_flags;
 	int rc;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (bp->vnic_info == NULL)
 		return 0;
 
@@ -1130,7 +1187,11 @@ static int bnxt_reta_update_op(struct rte_eth_dev *eth_dev,
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
 	uint16_t tbl_size = bnxt_rss_hash_tbl_size(bp);
 	uint16_t idx, sft;
-	int i;
+	int i, rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	if (!vnic->rss_table)
 		return -EINVAL;
@@ -1186,6 +1247,11 @@ static int bnxt_reta_query_op(struct rte_eth_dev *eth_dev,
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
 	uint16_t tbl_size = bnxt_rss_hash_tbl_size(bp);
 	uint16_t idx, sft, i;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	/* Retrieve from the default VNIC */
 	if (!vnic)
@@ -1232,6 +1298,11 @@ static int bnxt_rss_hash_update_op(struct rte_eth_dev *eth_dev,
 	struct bnxt_vnic_info *vnic;
 	uint16_t hash_type = 0;
 	unsigned int i;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	/*
 	 * If RSS enablement were different than dev_configure,
@@ -1285,9 +1356,13 @@ static int bnxt_rss_hash_conf_get_op(struct rte_eth_dev *eth_dev,
 {
 	struct bnxt *bp = eth_dev->data->dev_private;
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
-	int len;
+	int len, rc;
 	uint32_t hash_types;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	/* RSS configuration is the same for all VNICs */
 	if (vnic && vnic->rss_hash_key) {
 		if (rss_conf->rss_key) {
@@ -1345,6 +1420,10 @@ static int bnxt_flow_ctrl_get_op(struct rte_eth_dev *dev,
 	struct rte_eth_link link_info;
 	int rc;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	rc = bnxt_get_hwrm_link_config(bp, &link_info);
 	if (rc)
 		return rc;
@@ -1374,6 +1453,11 @@ static int bnxt_flow_ctrl_set_op(struct rte_eth_dev *dev,
 			       struct rte_eth_fc_conf *fc_conf)
 {
 	struct bnxt *bp = dev->data->dev_private;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	if (!BNXT_SINGLE_PF(bp) || BNXT_VF(bp)) {
 		PMD_DRV_LOG(ERR, "Flow Control Settings cannot be modified\n");
@@ -1433,6 +1517,10 @@ bnxt_udp_tunnel_port_add_op(struct rte_eth_dev *eth_dev,
 	uint16_t tunnel_type = 0;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	switch (udp_tunnel->prot_type) {
 	case RTE_TUNNEL_TYPE_VXLAN:
 		if (bp->vxlan_port_cnt) {
@@ -1482,6 +1570,10 @@ bnxt_udp_tunnel_port_del_op(struct rte_eth_dev *eth_dev,
 	uint16_t port = 0;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	switch (udp_tunnel->prot_type) {
 	case RTE_TUNNEL_TYPE_VXLAN:
 		if (!bp->vxlan_port_cnt) {
@@ -1635,6 +1727,11 @@ static int bnxt_vlan_filter_set_op(struct rte_eth_dev *eth_dev,
 		uint16_t vlan_id, int on)
 {
 	struct bnxt *bp = eth_dev->data->dev_private;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	/* These operations apply to ALL existing MAC/VLAN filters */
 	if (on)
@@ -1649,6 +1746,11 @@ bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask)
 	struct bnxt *bp = dev->data->dev_private;
 	uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads;
 	unsigned int i;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	if (mask & ETH_VLAN_FILTER_MASK) {
 		if (!(rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER)) {
@@ -1690,6 +1792,10 @@ bnxt_set_default_mac_addr_op(struct rte_eth_dev *dev,
 	struct bnxt_filter_info *filter;
 	int rc;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (BNXT_VF(bp) && !BNXT_VF_IS_TRUSTED(bp))
 		return -EPERM;
 
@@ -1729,6 +1835,11 @@ bnxt_dev_set_mc_addr_list_op(struct rte_eth_dev *eth_dev,
 	char *mc_addr_list = (char *)mc_addr_set;
 	struct bnxt_vnic_info *vnic;
 	uint32_t off = 0, i = 0;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	vnic = &bp->vnic_info[0];
 
@@ -1814,6 +1925,10 @@ static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu)
 	uint32_t rc = 0;
 	uint32_t i;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	new_pkt_size = new_mtu + RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN +
 		       VLAN_TAG_SIZE * BNXT_NUM_VLANS;
 
@@ -1891,6 +2006,10 @@ bnxt_vlan_pvid_set_op(struct rte_eth_dev *dev, uint16_t pvid, int on)
 	uint16_t vlan = bp->vlan;
 	int rc;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (!BNXT_SINGLE_PF(bp) || BNXT_VF(bp)) {
 		PMD_DRV_LOG(ERR,
 			"PVID cannot be modified for this function\n");
@@ -1908,6 +2027,11 @@ static int
 bnxt_dev_led_on_op(struct rte_eth_dev *dev)
 {
 	struct bnxt *bp = dev->data->dev_private;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	return bnxt_hwrm_port_led_cfg(bp, true);
 }
@@ -1916,6 +2040,11 @@ static int
 bnxt_dev_led_off_op(struct rte_eth_dev *dev)
 {
 	struct bnxt *bp = dev->data->dev_private;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	return bnxt_hwrm_port_led_cfg(bp, false);
 }
@@ -1923,6 +2052,7 @@ bnxt_dev_led_off_op(struct rte_eth_dev *dev)
 static uint32_t
 bnxt_rx_queue_count_op(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 {
+	struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
 	uint32_t desc = 0, raw_cons = 0, cons;
 	struct bnxt_cp_ring_info *cpr;
 	struct bnxt_rx_queue *rxq;
@@ -1930,6 +2060,11 @@ bnxt_rx_queue_count_op(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 	uint16_t cmp_type;
 	uint8_t cmp = 1;
 	bool valid;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	rxq = dev->data->rx_queues[rx_queue_id];
 	cpr = rxq->cp_ring;
@@ -1974,10 +2109,15 @@ bnxt_rx_descriptor_status_op(void *rx_queue, uint16_t offset)
 	struct bnxt_sw_rx_bd *rx_buf;
 	struct rx_pkt_cmpl *rxcmp;
 	uint32_t cons, cp_cons;
+	int rc;
 
 	if (!rxq)
 		return -EINVAL;
 
+	rc = is_bnxt_in_error(rxq->bp);
+	if (rc)
+		return rc;
+
 	cpr = rxq->cp_ring;
 	rxr = rxq->rx_ring;
 
@@ -2012,10 +2152,15 @@ bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset)
 	struct bnxt_sw_tx_bd *tx_buf;
 	struct tx_pkt_cmpl *txcmp;
 	uint32_t cons, cp_cons;
+	int rc;
 
 	if (!txq)
 		return -EINVAL;
 
+	rc = is_bnxt_in_error(txq->bp);
+	if (rc)
+		return rc;
+
 	cpr = txq->cp_ring;
 	txr = txq->tx_ring;
 
@@ -2845,6 +2990,10 @@ bnxt_filter_ctrl_op(struct rte_eth_dev *dev __rte_unused,
 {
 	int ret = 0;
 
+	ret = is_bnxt_in_error(dev->data->dev_private);
+	if (ret)
+		return ret;
+
 	switch (filter_type) {
 	case RTE_ETH_FILTER_TUNNEL:
 		PMD_DRV_LOG(ERR,
@@ -3160,6 +3309,10 @@ bnxt_get_eeprom_length_op(struct rte_eth_dev *dev)
 	uint32_t dir_entries;
 	uint32_t entry_length;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	PMD_DRV_LOG(INFO, "%04x:%02x:%02x:%02x\n",
 		bp->pdev->addr.domain, bp->pdev->addr.bus,
 		bp->pdev->addr.devid, bp->pdev->addr.function);
@@ -3178,6 +3331,11 @@ bnxt_get_eeprom_op(struct rte_eth_dev *dev,
 	struct bnxt *bp = dev->data->dev_private;
 	uint32_t index;
 	uint32_t offset;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	PMD_DRV_LOG(INFO, "%04x:%02x:%02x:%02x in_eeprom->offset = %d "
 		"len = %d\n", bp->pdev->addr.domain,
@@ -3249,6 +3407,11 @@ bnxt_set_eeprom_op(struct rte_eth_dev *dev,
 	struct bnxt *bp = dev->data->dev_private;
 	uint8_t index, dir_op;
 	uint16_t type, ext, ordinal, attr;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	PMD_DRV_LOG(INFO, "%04x:%02x:%02x:%02x in_eeprom->offset = %d "
 		"len = %d\n", bp->pdev->addr.domain,
@@ -3802,19 +3965,139 @@ static int bnxt_setup_mac_addr(struct rte_eth_dev *eth_dev)
 	return rc;
 }
 
+static void bnxt_config_vf_req_fwd(struct bnxt *bp)
+{
+	if (!BNXT_PF(bp))
+		return;
+
 #define ALLOW_FUNC(x)	\
 	{ \
 		uint32_t arg = (x); \
 		bp->pf.vf_req_fwd[((arg) >> 5)] &= \
 		~rte_cpu_to_le_32(1 << ((arg) & 0x1f)); \
 	}
+
+	/* Forward all requests if firmware is new enough */
+	if (((bp->fw_ver >= ((20 << 24) | (6 << 16) | (100 << 8))) &&
+	     (bp->fw_ver < ((20 << 24) | (7 << 16)))) ||
+	    ((bp->fw_ver >= ((20 << 24) | (8 << 16))))) {
+		memset(bp->pf.vf_req_fwd, 0xff, sizeof(bp->pf.vf_req_fwd));
+	} else {
+		PMD_DRV_LOG(WARNING,
+			    "Firmware too old for VF mailbox functionality\n");
+		memset(bp->pf.vf_req_fwd, 0, sizeof(bp->pf.vf_req_fwd));
+	}
+
+	/*
+	 * The following are used for driver cleanup. If we disallow these,
+	 * VF drivers can't clean up cleanly.
+	 */
+	ALLOW_FUNC(HWRM_FUNC_DRV_UNRGTR);
+	ALLOW_FUNC(HWRM_VNIC_FREE);
+	ALLOW_FUNC(HWRM_RING_FREE);
+	ALLOW_FUNC(HWRM_RING_GRP_FREE);
+	ALLOW_FUNC(HWRM_VNIC_RSS_COS_LB_CTX_FREE);
+	ALLOW_FUNC(HWRM_CFA_L2_FILTER_FREE);
+	ALLOW_FUNC(HWRM_STAT_CTX_FREE);
+	ALLOW_FUNC(HWRM_PORT_PHY_QCFG);
+	ALLOW_FUNC(HWRM_VNIC_TPA_CFG);
+}
+
+static int bnxt_init_fw(struct bnxt *bp)
+{
+	uint16_t mtu;
+	int rc = 0;
+
+	rc = bnxt_hwrm_ver_get(bp);
+	if (rc)
+		return rc;
+
+	rc = bnxt_hwrm_func_reset(bp);
+	if (rc)
+		return -EIO;
+
+	rc = bnxt_hwrm_queue_qportcfg(bp);
+	if (rc)
+		return rc;
+
+	/* Get the MAX capabilities for this function */
+	rc = bnxt_hwrm_func_qcaps(bp);
+	if (rc)
+		return rc;
+
+	rc = bnxt_hwrm_func_qcfg(bp, &mtu);
+	if (rc)
+		return rc;
+
+	if (mtu >= RTE_ETHER_MIN_MTU && mtu <= BNXT_MAX_MTU &&
+	    mtu != bp->eth_dev->data->mtu)
+		bp->eth_dev->data->mtu = mtu;
+
+	bnxt_hwrm_port_led_qcaps(bp);
+
+	return 0;
+}
+
+static int bnxt_init_resources(struct bnxt *bp)
+{
+	int rc;
+
+	rc = bnxt_init_fw(bp);
+	if (rc)
+		return rc;
+
+	rc = bnxt_setup_mac_addr(bp->eth_dev);
+	if (rc)
+		return rc;
+
+	bnxt_config_vf_req_fwd(bp);
+
+	rc = bnxt_hwrm_func_driver_register(bp);
+	if (rc) {
+		PMD_DRV_LOG(ERR, "Failed to register driver");
+		return -EBUSY;
+	}
+
+	if (BNXT_PF(bp)) {
+		if (bp->pdev->max_vfs) {
+			rc = bnxt_hwrm_allocate_vfs(bp, bp->pdev->max_vfs);
+			if (rc) {
+				PMD_DRV_LOG(ERR, "Failed to allocate VFs\n");
+				return rc;
+			}
+		} else {
+			rc = bnxt_hwrm_allocate_pf_only(bp);
+			if (rc) {
+				PMD_DRV_LOG(ERR,
+					    "Failed to allocate PF resources");
+				return rc;
+			}
+		}
+	}
+
+	rc = bnxt_alloc_mem(bp);
+	if (rc)
+		return rc;
+
+	rc = bnxt_setup_int(bp);
+	if (rc)
+		return rc;
+
+	bnxt_init_nic(bp);
+
+	rc = bnxt_request_int(bp);
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
 static int
 bnxt_dev_init(struct rte_eth_dev *eth_dev)
 {
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 	static int version_printed;
 	struct bnxt *bp;
-	uint16_t mtu;
 	int rc;
 
 	if (version_printed++ == 0)
@@ -3856,166 +4139,50 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
 	rc = bnxt_init_board(eth_dev);
 	if (rc) {
 		PMD_DRV_LOG(ERR,
-			"Board initialization failed rc: %x\n", rc);
-		goto error;
+			    "Failed to initialize board rc: %x\n", rc);
+		return rc;
 	}
 
 	rc = bnxt_alloc_hwrm_resources(bp);
 	if (rc) {
 		PMD_DRV_LOG(ERR,
-			"hwrm resource allocation failure rc: %x\n", rc);
+			    "Failed to allocate hwrm resource rc: %x\n", rc);
 		goto error_free;
 	}
-	rc = bnxt_hwrm_ver_get(bp);
+	rc = bnxt_init_resources(bp);
 	if (rc)
 		goto error_free;
 
-	rc = bnxt_hwrm_func_reset(bp);
-	if (rc) {
-		PMD_DRV_LOG(ERR, "hwrm chip reset failure rc: %x\n", rc);
-		rc = -EIO;
-		goto error_free;
-	}
-
-	rc = bnxt_hwrm_queue_qportcfg(bp);
-	if (rc) {
-		PMD_DRV_LOG(ERR, "hwrm queue qportcfg failed\n");
-		goto error_free;
-	}
-	/* Get the MAX capabilities for this function */
-	rc = bnxt_hwrm_func_qcaps(bp);
-	if (rc) {
-		PMD_DRV_LOG(ERR, "hwrm query capability failure rc: %x\n", rc);
-		goto error_free;
-	}
-
 	rc = bnxt_alloc_stats_mem(bp);
 	if (rc)
 		goto error_free;
 
-	if (bp->max_tx_rings == 0) {
-		PMD_DRV_LOG(ERR, "No TX rings available!\n");
-		rc = -EBUSY;
-		goto error_free;
-	}
-
-	rc = bnxt_setup_mac_addr(eth_dev);
-	if (rc)
-		goto error_free;
-
-	/* THOR does not support ring groups.
-	 * But we will use the array to save RSS context IDs.
-	 */
-	if (BNXT_CHIP_THOR(bp)) {
-		bp->max_ring_grps = BNXT_MAX_RSS_CTXTS_THOR;
-	} else if (bp->max_ring_grps < bp->rx_cp_nr_rings) {
-		/* 1 ring is for default completion ring */
-		PMD_DRV_LOG(ERR, "Insufficient resource: Ring Group\n");
-		rc = -ENOSPC;
-		goto error_free;
-	}
-
-	if (BNXT_HAS_RING_GRPS(bp)) {
-		bp->grp_info = rte_zmalloc("bnxt_grp_info",
-					sizeof(*bp->grp_info) *
-						bp->max_ring_grps, 0);
-		if (!bp->grp_info) {
-			PMD_DRV_LOG(ERR,
-				"Failed to alloc %zu bytes for grp info tbl.\n",
-				sizeof(*bp->grp_info) * bp->max_ring_grps);
-			rc = -ENOMEM;
-			goto error_free;
-		}
-	}
-
-	/* Forward all requests if firmware is new enough */
-	if (((bp->fw_ver >= ((20 << 24) | (6 << 16) | (100 << 8))) &&
-	    (bp->fw_ver < ((20 << 24) | (7 << 16)))) ||
-	    ((bp->fw_ver >= ((20 << 24) | (8 << 16))))) {
-		memset(bp->pf.vf_req_fwd, 0xff, sizeof(bp->pf.vf_req_fwd));
-	} else {
-		PMD_DRV_LOG(WARNING,
-			"Firmware too old for VF mailbox functionality\n");
-		memset(bp->pf.vf_req_fwd, 0, sizeof(bp->pf.vf_req_fwd));
-	}
-
-	/*
-	 * The following are used for driver cleanup.  If we disallow these,
-	 * VF drivers can't clean up cleanly.
-	 */
-	ALLOW_FUNC(HWRM_FUNC_DRV_UNRGTR);
-	ALLOW_FUNC(HWRM_VNIC_FREE);
-	ALLOW_FUNC(HWRM_RING_FREE);
-	ALLOW_FUNC(HWRM_RING_GRP_FREE);
-	ALLOW_FUNC(HWRM_VNIC_RSS_COS_LB_CTX_FREE);
-	ALLOW_FUNC(HWRM_CFA_L2_FILTER_FREE);
-	ALLOW_FUNC(HWRM_STAT_CTX_FREE);
-	ALLOW_FUNC(HWRM_PORT_PHY_QCFG);
-	ALLOW_FUNC(HWRM_VNIC_TPA_CFG);
-	rc = bnxt_hwrm_func_driver_register(bp);
-	if (rc) {
-		PMD_DRV_LOG(ERR,
-			"Failed to register driver");
-		rc = -EBUSY;
-		goto error_free;
-	}
-
 	PMD_DRV_LOG(INFO,
-		DRV_MODULE_NAME " found at mem %" PRIx64 ", node addr %pM\n",
-		pci_dev->mem_resource[0].phys_addr,
-		pci_dev->mem_resource[0].addr);
-
-	rc = bnxt_hwrm_func_qcfg(bp, &mtu);
-	if (rc) {
-		PMD_DRV_LOG(ERR, "hwrm func qcfg failed\n");
-		goto error_free;
-	}
-
-	if (mtu >= RTE_ETHER_MIN_MTU && mtu <= BNXT_MAX_MTU &&
-	    mtu != eth_dev->data->mtu)
-		eth_dev->data->mtu = mtu;
-
-	if (BNXT_PF(bp)) {
-		//if (bp->pf.active_vfs) {
-			// TODO: Deallocate VF resources?
-		//}
-		if (bp->pdev->max_vfs) {
-			rc = bnxt_hwrm_allocate_vfs(bp, bp->pdev->max_vfs);
-			if (rc) {
-				PMD_DRV_LOG(ERR, "Failed to allocate VFs\n");
-				goto error_free;
-			}
-		} else {
-			rc = bnxt_hwrm_allocate_pf_only(bp);
-			if (rc) {
-				PMD_DRV_LOG(ERR,
-					"Failed to allocate PF resources\n");
-				goto error_free;
-			}
-		}
-	}
-
-	bnxt_hwrm_port_led_qcaps(bp);
-
-	rc = bnxt_setup_int(bp);
-	if (rc)
-		goto error_free;
-
-	rc = bnxt_alloc_mem(bp);
-	if (rc)
-		goto error_free;
-
-	bnxt_init_nic(bp);
-
-	rc = bnxt_request_int(bp);
-	if (rc)
-		goto error_free;
+		    DRV_MODULE_NAME "found at mem %" PRIX64 ", node addr %pM\n",
+		    pci_dev->mem_resource[0].phys_addr,
+		    pci_dev->mem_resource[0].addr);
 
 	return 0;
 
 error_free:
 	bnxt_dev_uninit(eth_dev);
-error:
+	return rc;
+}
+
+static int
+bnxt_uninit_resources(struct bnxt *bp)
+{
+	int rc;
+
+	bnxt_disable_int(bp);
+	bnxt_free_int(bp);
+	bnxt_free_mem(bp);
+	bnxt_hwrm_func_buf_unrgtr(bp);
+	rc = bnxt_hwrm_func_driver_unregister(bp, 0);
+	bp->flags &= ~BNXT_FLAG_REGISTERED;
+	bnxt_free_ctx_mem(bp);
+	bnxt_free_hwrm_resources(bp);
+
 	return rc;
 }
 
@@ -4029,18 +4196,13 @@ bnxt_dev_uninit(struct rte_eth_dev *eth_dev)
 		return -EPERM;
 
 	PMD_DRV_LOG(DEBUG, "Calling Device uninit\n");
-	bnxt_disable_int(bp);
-	bnxt_free_int(bp);
-	bnxt_free_mem(bp);
 
-	bnxt_hwrm_func_buf_unrgtr(bp);
+	rc = bnxt_uninit_resources(bp);
 
 	if (bp->grp_info != NULL) {
 		rte_free(bp->grp_info);
 		bp->grp_info = NULL;
 	}
-	rc = bnxt_hwrm_func_driver_unregister(bp, 0);
-	bnxt_free_hwrm_resources(bp);
 
 	if (bp->tx_mem_zone) {
 		rte_memzone_free((const struct rte_memzone *)bp->tx_mem_zone);
@@ -4056,7 +4218,6 @@ bnxt_dev_uninit(struct rte_eth_dev *eth_dev)
 		bnxt_dev_close_op(eth_dev);
 	if (bp->pf.vf_info)
 		rte_free(bp->pf.vf_info);
-	bnxt_free_ctx_mem(bp);
 	eth_dev->dev_ops = NULL;
 	eth_dev->rx_pkt_burst = NULL;
 	eth_dev->tx_pkt_burst = NULL;
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 9883fb5063..24a5a09147 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -964,8 +964,6 @@ int bnxt_hwrm_func_driver_unregister(struct bnxt *bp, uint32_t flags)
 	HWRM_CHECK_RESULT();
 	HWRM_UNLOCK();
 
-	bp->flags &= ~BNXT_FLAG_REGISTERED;
-
 	return rc;
 }
 
diff --git a/drivers/net/bnxt/bnxt_ring.c b/drivers/net/bnxt/bnxt_ring.c
index be15b4bd14..f19865c832 100644
--- a/drivers/net/bnxt/bnxt_ring.c
+++ b/drivers/net/bnxt/bnxt_ring.c
@@ -50,6 +50,38 @@ int bnxt_init_ring_grps(struct bnxt *bp)
 	return 0;
 }
 
+int bnxt_alloc_ring_grps(struct bnxt *bp)
+{
+	if (bp->max_tx_rings == 0) {
+		PMD_DRV_LOG(ERR, "No TX rings available!\n");
+		return -EBUSY;
+	}
+
+	/* THOR does not support ring groups.
+	 * But we will use the array to save RSS context IDs.
+	 */
+	if (BNXT_CHIP_THOR(bp)) {
+		bp->max_ring_grps = BNXT_MAX_RSS_CTXTS_THOR;
+	} else if (bp->max_ring_grps < bp->rx_cp_nr_rings) {
+		/* 1 ring is for default completion ring */
+		PMD_DRV_LOG(ERR, "Insufficient resource: Ring Group\n");
+		return -ENOSPC;
+	}
+
+	if (BNXT_HAS_RING_GRPS(bp)) {
+		bp->grp_info = rte_zmalloc("bnxt_grp_info",
+					   sizeof(*bp->grp_info) *
+					   bp->max_ring_grps, 0);
+		if (!bp->grp_info) {
+			PMD_DRV_LOG(ERR,
+				    "Failed to alloc grp info tbl.\n");
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * Allocates a completion ring with vmem and stats optionally also allocating
  * a TX and/or RX ring.  Passing NULL as tx_ring_info and/or rx_ring_info
diff --git a/drivers/net/bnxt/bnxt_ring.h b/drivers/net/bnxt/bnxt_ring.h
index 04c7b04b82..a31d59ea39 100644
--- a/drivers/net/bnxt/bnxt_ring.h
+++ b/drivers/net/bnxt/bnxt_ring.h
@@ -67,6 +67,7 @@ struct bnxt_rx_ring_info;
 struct bnxt_cp_ring_info;
 void bnxt_free_ring(struct bnxt_ring *ring);
 int bnxt_init_ring_grps(struct bnxt *bp);
+int bnxt_alloc_ring_grps(struct bnxt *bp);
 int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
 			    struct bnxt_tx_queue *txq,
 			    struct bnxt_rx_queue *rxq,
diff --git a/drivers/net/bnxt/bnxt_rxq.c b/drivers/net/bnxt/bnxt_rxq.c
index 1d95f11394..d5fc5268db 100644
--- a/drivers/net/bnxt/bnxt_rxq.c
+++ b/drivers/net/bnxt/bnxt_rxq.c
@@ -263,6 +263,9 @@ void bnxt_rx_queue_release_op(void *rx_queue)
 	struct bnxt_rx_queue *rxq = (struct bnxt_rx_queue *)rx_queue;
 
 	if (rxq) {
+		if (is_bnxt_in_error(rxq->bp))
+			return;
+
 		bnxt_rx_queue_release_mbufs(rxq);
 
 		/* Free RX ring hardware descriptors */
@@ -294,6 +297,10 @@ int bnxt_rx_queue_setup_op(struct rte_eth_dev *eth_dev,
 	int rc = 0;
 	uint8_t queue_state;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (queue_idx >= bp->max_rx_rings) {
 		PMD_DRV_LOG(ERR,
 			"Cannot create Rx ring %d. Only %d rings available\n",
@@ -363,10 +370,15 @@ int bnxt_rx_queue_setup_op(struct rte_eth_dev *eth_dev,
 int
 bnxt_rx_queue_intr_enable_op(struct rte_eth_dev *eth_dev, uint16_t queue_id)
 {
+	struct bnxt *bp = eth_dev->data->dev_private;
 	struct bnxt_rx_queue *rxq;
 	struct bnxt_cp_ring_info *cpr;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (eth_dev->data->rx_queues) {
 		rxq = eth_dev->data->rx_queues[queue_id];
 		if (!rxq) {
@@ -382,10 +394,15 @@ bnxt_rx_queue_intr_enable_op(struct rte_eth_dev *eth_dev, uint16_t queue_id)
 int
 bnxt_rx_queue_intr_disable_op(struct rte_eth_dev *eth_dev, uint16_t queue_id)
 {
+	struct bnxt *bp = eth_dev->data->dev_private;
 	struct bnxt_rx_queue *rxq;
 	struct bnxt_cp_ring_info *cpr;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (eth_dev->data->rx_queues) {
 		rxq = eth_dev->data->rx_queues[queue_id];
 		if (!rxq) {
@@ -406,6 +423,10 @@ int bnxt_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 	struct bnxt_vnic_info *vnic = NULL;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (rxq == NULL) {
 		PMD_DRV_LOG(ERR, "Invalid Rx queue %d\n", rx_queue_id);
 		return -EINVAL;
@@ -458,6 +479,10 @@ int bnxt_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
 	struct bnxt_rx_queue *rxq = NULL;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	/* For the stingray platform and other platforms needing tighter
 	 * control of resource utilization, Rx CQ 0 also works as
 	 * Default CQ for async notifications
diff --git a/drivers/net/bnxt/bnxt_rxr.c b/drivers/net/bnxt/bnxt_rxr.c
index 185a0e376b..12313dd53c 100644
--- a/drivers/net/bnxt/bnxt_rxr.c
+++ b/drivers/net/bnxt/bnxt_rxr.c
@@ -539,6 +539,9 @@ uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	int rc = 0;
 	bool evt = false;
 
+	if (unlikely(is_bnxt_in_error(rxq->bp)))
+		return 0;
+
 	/* If Rx Q was stopped return. RxQ0 cannot be stopped. */
 	if (unlikely(((rxq->rx_deferred_start ||
 		       !rte_spinlock_trylock(&rxq->lock)) &&
@@ -625,6 +628,20 @@ uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	return nb_rx_pkts;
 }
 
+/*
+ * Dummy DPDK callback for RX.
+ *
+ * This function is used to temporarily replace the real callback during
+ * unsafe control operations on the queue, or in case of error.
+ */
+uint16_t
+bnxt_dummy_recv_pkts(void *rx_queue __rte_unused,
+		     struct rte_mbuf **rx_pkts __rte_unused,
+		     uint16_t nb_pkts __rte_unused)
+{
+	return 0;
+}
+
 void bnxt_free_rx_rings(struct bnxt *bp)
 {
 	int i;
diff --git a/drivers/net/bnxt/bnxt_rxr.h b/drivers/net/bnxt/bnxt_rxr.h
index 6a80c37c81..493b754066 100644
--- a/drivers/net/bnxt/bnxt_rxr.h
+++ b/drivers/net/bnxt/bnxt_rxr.h
@@ -185,6 +185,8 @@ struct bnxt_rx_ring_info {
 
 uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 			       uint16_t nb_pkts);
+uint16_t bnxt_dummy_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+			      uint16_t nb_pkts);
 void bnxt_free_rx_rings(struct bnxt *bp);
 int bnxt_init_rx_ring_struct(struct bnxt_rx_queue *rxq, unsigned int socket_id);
 int bnxt_init_one_rx_ring(struct bnxt_rx_queue *rxq);
diff --git a/drivers/net/bnxt/bnxt_stats.c b/drivers/net/bnxt/bnxt_stats.c
index 049ad9e398..21012e1fee 100644
--- a/drivers/net/bnxt/bnxt_stats.c
+++ b/drivers/net/bnxt/bnxt_stats.c
@@ -353,6 +353,10 @@ int bnxt_stats_get_op(struct rte_eth_dev *eth_dev,
 	struct bnxt *bp = eth_dev->data->dev_private;
 	unsigned int num_q_stats;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	memset(bnxt_stats, 0, sizeof(*bnxt_stats));
 	if (!(bp->flags & BNXT_FLAG_INIT_DONE)) {
 		PMD_DRV_LOG(ERR, "Device Initialization not complete!\n");
@@ -398,6 +402,10 @@ int bnxt_stats_reset_op(struct rte_eth_dev *eth_dev)
 	unsigned int i;
 	int ret;
 
+	ret = is_bnxt_in_error(bp);
+	if (ret)
+		return ret;
+
 	if (!(bp->flags & BNXT_FLAG_INIT_DONE)) {
 		PMD_DRV_LOG(ERR, "Device Initialization not complete!\n");
 		return -EINVAL;
@@ -417,13 +425,17 @@ int bnxt_dev_xstats_get_op(struct rte_eth_dev *eth_dev,
 			   struct rte_eth_xstat *xstats, unsigned int n)
 {
 	struct bnxt *bp = eth_dev->data->dev_private;
-
 	unsigned int count, i;
 	uint64_t tx_drop_pkts;
 	unsigned int rx_port_stats_ext_cnt;
 	unsigned int tx_port_stats_ext_cnt;
 	unsigned int stat_size = sizeof(uint64_t);
 	unsigned int stat_count;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	memset(xstats, 0, sizeof(*xstats));
 
@@ -502,7 +514,13 @@ int bnxt_dev_xstats_get_names_op(__rte_unused struct rte_eth_dev *eth_dev,
 				RTE_DIM(bnxt_tx_stats_strings) + 1 +
 				RTE_DIM(bnxt_rx_ext_stats_strings) +
 				RTE_DIM(bnxt_tx_ext_stats_strings);
+	struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
 	unsigned int i, count;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	if (xstats_names != NULL) {
 		count = 0;
@@ -551,6 +569,10 @@ int bnxt_dev_xstats_reset_op(struct rte_eth_dev *eth_dev)
 	struct bnxt *bp = eth_dev->data->dev_private;
 	int ret;
 
+	ret = is_bnxt_in_error(bp);
+	if (ret)
+		return ret;
+
 	if (bp->flags & BNXT_FLAG_PORT_STATS && BNXT_SINGLE_PF(bp)) {
 		ret = bnxt_hwrm_port_clr_stats(bp);
 		if (ret != 0) {
@@ -586,9 +608,15 @@ int bnxt_dev_xstats_get_by_id_op(struct rte_eth_dev *dev, const uint64_t *ids,
 				RTE_DIM(bnxt_tx_stats_strings) + 1 +
 				RTE_DIM(bnxt_rx_ext_stats_strings) +
 				RTE_DIM(bnxt_tx_ext_stats_strings);
+	struct bnxt *bp = dev->data->dev_private;
 	struct rte_eth_xstat xstats[stat_cnt];
 	uint64_t values_copy[stat_cnt];
 	uint16_t i;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	if (!ids)
 		return bnxt_dev_xstats_get_op(dev, xstats, stat_cnt);
@@ -614,7 +642,13 @@ int bnxt_dev_xstats_get_names_by_id_op(struct rte_eth_dev *dev,
 				RTE_DIM(bnxt_rx_ext_stats_strings) +
 				RTE_DIM(bnxt_tx_ext_stats_strings);
 	struct rte_eth_xstat_name xstats_names_copy[stat_cnt];
+	struct bnxt *bp = dev->data->dev_private;
 	uint16_t i;
+	int rc;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	if (!ids)
 		return bnxt_dev_xstats_get_names_op(dev, xstats_names,
diff --git a/drivers/net/bnxt/bnxt_txq.c b/drivers/net/bnxt/bnxt_txq.c
index 43b3496c1e..0901324793 100644
--- a/drivers/net/bnxt/bnxt_txq.c
+++ b/drivers/net/bnxt/bnxt_txq.c
@@ -58,6 +58,9 @@ void bnxt_tx_queue_release_op(void *tx_queue)
 	struct bnxt_tx_queue *txq = (struct bnxt_tx_queue *)tx_queue;
 
 	if (txq) {
+		if (is_bnxt_in_error(txq->bp))
+			return;
+
 		/* Free TX ring hardware descriptors */
 		bnxt_tx_queue_release_mbufs(txq);
 		bnxt_free_ring(txq->tx_ring->tx_ring_struct);
@@ -84,6 +87,10 @@ int bnxt_tx_queue_setup_op(struct rte_eth_dev *eth_dev,
 	struct bnxt_tx_queue *txq;
 	int rc = 0;
 
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
+
 	if (queue_idx >= bp->max_tx_rings) {
 		PMD_DRV_LOG(ERR,
 			"Cannot create Tx ring %d. Only %d rings available\n",
diff --git a/drivers/net/bnxt/bnxt_txr.c b/drivers/net/bnxt/bnxt_txr.c
index c71e6f1892..35e7166bed 100644
--- a/drivers/net/bnxt/bnxt_txr.c
+++ b/drivers/net/bnxt/bnxt_txr.c
@@ -148,6 +148,9 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
 		TX_BD_LONG_FLAGS_LHINT_LT2K
 	};
 
+	if (unlikely(is_bnxt_in_error(txq->bp)))
+		return -EIO;
+
 	if (tx_pkt->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM |
 				PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM |
 				PKT_TX_VLAN_PKT | PKT_TX_OUTER_IP_CKSUM |
@@ -485,10 +488,29 @@ uint16_t bnxt_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	return nb_tx_pkts;
 }
 
+/*
+ * Dummy DPDK callback for TX.
+ *
+ * This function is used to temporarily replace the real callback during
+ * unsafe control operations on the queue, or in case of error.
+ */
+uint16_t
+bnxt_dummy_xmit_pkts(void *tx_queue __rte_unused,
+		     struct rte_mbuf **tx_pkts __rte_unused,
+		     uint16_t nb_pkts __rte_unused)
+{
+	return 0;
+}
+
 int bnxt_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 {
 	struct bnxt *bp = dev->data->dev_private;
 	struct bnxt_tx_queue *txq = bp->tx_queues[tx_queue_id];
+	int rc = 0;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
 	txq->tx_deferred_start = false;
@@ -501,6 +523,11 @@ int bnxt_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 {
 	struct bnxt *bp = dev->data->dev_private;
 	struct bnxt_tx_queue *txq = bp->tx_queues[tx_queue_id];
+	int rc = 0;
+
+	rc = is_bnxt_in_error(bp);
+	if (rc)
+		return rc;
 
 	/* Handle TX completions */
 	bnxt_handle_tx_cp(txq);
diff --git a/drivers/net/bnxt/bnxt_txr.h b/drivers/net/bnxt/bnxt_txr.h
index 08fd2e0142..e7f43f9d1d 100644
--- a/drivers/net/bnxt/bnxt_txr.h
+++ b/drivers/net/bnxt/bnxt_txr.h
@@ -57,6 +57,8 @@ int bnxt_init_one_tx_ring(struct bnxt_tx_queue *txq);
 int bnxt_init_tx_ring_struct(struct bnxt_tx_queue *txq, unsigned int socket_id);
 uint16_t bnxt_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			       uint16_t nb_pkts);
+uint16_t bnxt_dummy_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+			      uint16_t nb_pkts);
 #ifdef RTE_ARCH_X86
 uint16_t bnxt_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 			    uint16_t nb_pkts);
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v3 03/15] net/bnxt: handle reset notify async event from FW
  2019-10-02  1:23         ` [dpdk-dev] [PATCH v3 00/15] " Ajit Khaparde
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 01/15] net/bnxt: add FW reset HWRM command Ajit Khaparde
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 02/15] net/bnxt: prevent device access when device is in reset Ajit Khaparde
@ 2019-10-02  1:23           ` Ajit Khaparde
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 04/15] net/bnxt: inform firmware about IF state changes Ajit Khaparde
                             ` (12 subsequent siblings)
  15 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-10-02  1:23 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

When the FW upgrade is initiated the current instance
of FW issues a HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY
async notification to the driver. On receiving this notification,
the PMD shall quiesce itself and poll on the HWRM_VER_GET FW
command at regular intervals.

Once the VER_GET command succeeds, the driver should go through
the rediscovery process and re-initialize the device.

Also register with FW for the reset notify async event.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
---
 drivers/net/bnxt/bnxt.h                |  13 +++
 drivers/net/bnxt/bnxt_cpr.c            |  16 +++
 drivers/net/bnxt/bnxt_cpr.h            |   1 +
 drivers/net/bnxt/bnxt_ethdev.c         | 147 ++++++++++++++++++++++---
 drivers/net/bnxt/bnxt_hwrm.c           |  47 +++++++-
 drivers/net/bnxt/bnxt_hwrm.h           |   2 +
 drivers/net/bnxt/hsi_struct_def_dpdk.h |  11 ++
 7 files changed, 215 insertions(+), 22 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 37b4c717d6..8797b032ee 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -333,6 +333,16 @@ struct bnxt_ctx_mem_info {
 	struct bnxt_ctx_pg_info *tqm_mem[BNXT_MAX_TC_Q];
 };
 
+/* Maximum Firmware Reset bail out value in milliseconds */
+#define BNXT_MAX_FW_RESET_TIMEOUT	6000
+/* Minimum time required for the firmware readiness in milliseconds */
+#define BNXT_MIN_FW_READY_TIMEOUT	2000
+/* Frequency for the firmware readiness check in milliseconds */
+#define BNXT_FW_READY_WAIT_INTERVAL	100
+
+#define US_PER_MS			1000
+#define NS_PER_US			1000
+
 #define BNXT_HWRM_SHORT_REQ_LEN		sizeof(struct hwrm_short_input)
 struct bnxt {
 	void				*bar0;
@@ -463,6 +473,9 @@ struct bnxt {
 	struct bnxt_ptp_cfg     *ptp_cfg;
 	uint16_t		vf_resv_strategy;
 	struct bnxt_ctx_mem_info        *ctx;
+
+	uint16_t		fw_reset_min_msecs;
+	uint16_t		fw_reset_max_msecs;
 };
 
 int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete);
diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index bbcdb42f10..1a23649b05 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -4,6 +4,7 @@
  */
 
 #include <rte_malloc.h>
+#include <rte_alarm.h>
 
 #include "bnxt.h"
 #include "bnxt_cpr.h"
@@ -40,6 +41,21 @@ void bnxt_handle_async_event(struct bnxt *bp,
 	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED:
 		PMD_DRV_LOG(INFO, "Port conn async event\n");
 		break;
+	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY:
+		/* timestamp_lo/hi values are in units of 100ms */
+		bp->fw_reset_max_msecs = async_cmp->timestamp_hi ?
+			rte_le_to_cpu_16(async_cmp->timestamp_hi) * 100 :
+			BNXT_MAX_FW_RESET_TIMEOUT;
+		bp->fw_reset_min_msecs = async_cmp->timestamp_lo ?
+			async_cmp->timestamp_lo * 100 :
+			BNXT_MIN_FW_READY_TIMEOUT;
+		PMD_DRV_LOG(INFO,
+			    "Firmware non-fatal reset event received\n");
+
+		bp->flags |= BNXT_FLAG_FW_RESET;
+		rte_eal_alarm_set(US_PER_MS, bnxt_dev_reset_and_resume,
+				  (void *)bp);
+		break;
 	default:
 		PMD_DRV_LOG(INFO, "handle_async_event id = 0x%x\n", event_id);
 		break;
diff --git a/drivers/net/bnxt/bnxt_cpr.h b/drivers/net/bnxt/bnxt_cpr.h
index 8c6a34b611..f48293b963 100644
--- a/drivers/net/bnxt/bnxt_cpr.h
+++ b/drivers/net/bnxt/bnxt_cpr.h
@@ -106,5 +106,6 @@ struct bnxt;
 void bnxt_handle_async_event(struct bnxt *bp, struct cmpl_base *cmp);
 void bnxt_handle_fwd_req(struct bnxt *bp, struct cmpl_base *cmp);
 int bnxt_event_hwrm_resp_handler(struct bnxt *bp, struct cmpl_base *cmp);
+void bnxt_dev_reset_and_resume(void *arg);
 
 #endif
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index d90a6e4202..6bc006a719 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -11,6 +11,7 @@
 #include <rte_ethdev_pci.h>
 #include <rte_malloc.h>
 #include <rte_cycles.h>
+#include <rte_alarm.h>
 
 #include "bnxt.h"
 #include "bnxt_cpr.h"
@@ -166,6 +167,8 @@ static int bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask);
 static void bnxt_print_link_info(struct rte_eth_dev *eth_dev);
 static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu);
 static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev);
+static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev);
+static int bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev);
 
 int is_bnxt_in_error(struct bnxt *bp)
 {
@@ -201,19 +204,25 @@ static uint16_t  bnxt_rss_hash_tbl_size(const struct bnxt *bp)
 	return bnxt_rss_ctxts(bp) * BNXT_RSS_ENTRIES_PER_CTX_THOR;
 }
 
-static void bnxt_free_mem(struct bnxt *bp)
+static void bnxt_free_mem(struct bnxt *bp, bool reconfig)
 {
 	bnxt_free_filter_mem(bp);
 	bnxt_free_vnic_attributes(bp);
 	bnxt_free_vnic_mem(bp);
 
-	bnxt_free_stats(bp);
-	bnxt_free_tx_rings(bp);
-	bnxt_free_rx_rings(bp);
+	/* tx/rx rings are configured as part of *_queue_setup callbacks.
+	 * If the number of rings change across fw update,
+	 * we don't have much choice except to warn the user.
+	 */
+	if (!reconfig) {
+		bnxt_free_stats(bp);
+		bnxt_free_tx_rings(bp);
+		bnxt_free_rx_rings(bp);
+	}
 	bnxt_free_async_cp_ring(bp);
 }
 
-static int bnxt_alloc_mem(struct bnxt *bp)
+static int bnxt_alloc_mem(struct bnxt *bp, bool reconfig)
 {
 	int rc;
 
@@ -244,7 +253,7 @@ static int bnxt_alloc_mem(struct bnxt *bp)
 	return 0;
 
 alloc_mem_err:
-	bnxt_free_mem(bp);
+	bnxt_free_mem(bp, reconfig);
 	return rc;
 }
 
@@ -3523,6 +3532,89 @@ static const struct eth_dev_ops bnxt_dev_ops = {
 	.timesync_read_tx_timestamp = bnxt_timesync_read_tx_timestamp,
 };
 
+static void bnxt_dev_cleanup(struct bnxt *bp)
+{
+	bnxt_set_hwrm_link_config(bp, false);
+	bp->link_info.link_up = 0;
+	if (bp->dev_stopped == 0)
+		bnxt_dev_stop_op(bp->eth_dev);
+
+	bnxt_uninit_resources(bp, true);
+}
+
+static int bnxt_restore_filters(struct bnxt *bp)
+{
+	struct rte_eth_dev *dev = bp->eth_dev;
+	int ret = 0;
+
+	if (dev->data->all_multicast)
+		ret = bnxt_allmulticast_enable_op(dev);
+	if (dev->data->promiscuous)
+		ret = bnxt_promiscuous_enable_op(dev);
+
+	/* TODO restore other filters as well */
+	return ret;
+}
+
+static void bnxt_dev_recover(void *arg)
+{
+	struct bnxt *bp = arg;
+	int timeout = bp->fw_reset_max_msecs;
+	int rc = 0;
+
+	do {
+		rc = bnxt_hwrm_ver_get(bp);
+		if (rc == 0)
+			break;
+		rte_delay_ms(BNXT_FW_READY_WAIT_INTERVAL);
+		timeout -= BNXT_FW_READY_WAIT_INTERVAL;
+	} while (rc && timeout);
+
+	if (rc) {
+		PMD_DRV_LOG(ERR, "FW is not Ready after reset\n");
+		goto err;
+	}
+
+	rc = bnxt_init_resources(bp, true);
+	if (rc) {
+		PMD_DRV_LOG(ERR,
+			    "Failed to initialize resources after reset\n");
+		goto err;
+	}
+	/* clear reset flag as the device is initialized now */
+	bp->flags &= ~BNXT_FLAG_FW_RESET;
+
+	rc = bnxt_dev_start_op(bp->eth_dev);
+	if (rc) {
+		PMD_DRV_LOG(ERR, "Failed to start port after reset\n");
+		goto err;
+	}
+
+	rc = bnxt_restore_filters(bp);
+	if (rc)
+		goto err;
+
+	PMD_DRV_LOG(INFO, "Recovered from FW reset\n");
+	return;
+err:
+	bp->flags |= BNXT_FLAG_FATAL_ERROR;
+	bnxt_uninit_resources(bp, false);
+	PMD_DRV_LOG(ERR, "Failed to recover from FW reset\n");
+}
+
+void bnxt_dev_reset_and_resume(void *arg)
+{
+	struct bnxt *bp = arg;
+	int rc;
+
+	bnxt_dev_cleanup(bp);
+
+	rc = rte_eal_alarm_set(US_PER_MS * bp->fw_reset_min_msecs,
+			       bnxt_dev_recover, (void *)bp);
+	if (rc)
+		PMD_DRV_LOG(ERR, "Error setting recovery alarm");
+}
+
 static bool bnxt_vf_pciid(uint16_t id)
 {
 	if (id == BROADCOM_DEV_ID_57304_VF ||
@@ -3965,6 +4057,22 @@ static int bnxt_setup_mac_addr(struct rte_eth_dev *eth_dev)
 	return rc;
 }
 
+static int bnxt_restore_dflt_mac(struct bnxt *bp)
+{
+	int rc = 0;
+
+	/* MAC is already configured in FW */
+	if (!bnxt_check_zero_bytes(bp->dflt_mac_addr, RTE_ETHER_ADDR_LEN))
+		return 0;
+
+	/* Restore the old MAC configured */
+	rc = bnxt_hwrm_set_mac(bp);
+	if (rc)
+		PMD_DRV_LOG(ERR, "Failed to restore MAC address\n");
+
+	return rc;
+}
+
 static void bnxt_config_vf_req_fwd(struct bnxt *bp)
 {
 	if (!BNXT_PF(bp))
@@ -4038,7 +4146,7 @@ static int bnxt_init_fw(struct bnxt *bp)
 	return 0;
 }
 
-static int bnxt_init_resources(struct bnxt *bp)
+static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev)
 {
 	int rc;
 
@@ -4046,9 +4154,15 @@ static int bnxt_init_resources(struct bnxt *bp)
 	if (rc)
 		return rc;
 
-	rc = bnxt_setup_mac_addr(bp->eth_dev);
-	if (rc)
-		return rc;
+	if (!reconfig_dev) {
+		rc = bnxt_setup_mac_addr(bp->eth_dev);
+		if (rc)
+			return rc;
+	} else {
+		rc = bnxt_restore_dflt_mac(bp);
+		if (rc)
+			return rc;
+	}
 
 	bnxt_config_vf_req_fwd(bp);
 
@@ -4075,7 +4189,7 @@ static int bnxt_init_resources(struct bnxt *bp)
 		}
 	}
 
-	rc = bnxt_alloc_mem(bp);
+	rc = bnxt_alloc_mem(bp, reconfig_dev);
 	if (rc)
 		return rc;
 
@@ -4149,7 +4263,7 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
 			    "Failed to allocate hwrm resource rc: %x\n", rc);
 		goto error_free;
 	}
-	rc = bnxt_init_resources(bp);
+	rc = bnxt_init_resources(bp, false);
 	if (rc)
 		goto error_free;
 
@@ -4170,18 +4284,19 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
 }
 
 static int
-bnxt_uninit_resources(struct bnxt *bp)
+bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev)
 {
 	int rc;
 
 	bnxt_disable_int(bp);
 	bnxt_free_int(bp);
-	bnxt_free_mem(bp);
+	bnxt_free_mem(bp, reconfig_dev);
 	bnxt_hwrm_func_buf_unrgtr(bp);
 	rc = bnxt_hwrm_func_driver_unregister(bp, 0);
 	bp->flags &= ~BNXT_FLAG_REGISTERED;
 	bnxt_free_ctx_mem(bp);
-	bnxt_free_hwrm_resources(bp);
+	if (!reconfig_dev)
+		bnxt_free_hwrm_resources(bp);
 
 	return rc;
 }
@@ -4197,7 +4312,7 @@ bnxt_dev_uninit(struct rte_eth_dev *eth_dev)
 
 	PMD_DRV_LOG(DEBUG, "Calling Device uninit\n");
 
-	rc = bnxt_uninit_resources(bp);
+	rc = bnxt_uninit_resources(bp, false);
 
 	if (bp->grp_info != NULL) {
 		rte_free(bp->grp_info);
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 24a5a09147..abbad0152c 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -27,6 +27,7 @@
 #include <rte_io.h>
 
 #define HWRM_CMD_TIMEOUT		6000000
+#define HWRM_SHORT_CMD_TIMEOUT		50000
 #define HWRM_SPEC_CODE_1_8_3		0x10803
 #define HWRM_VERSION_1_9_1		0x10901
 #define HWRM_VERSION_1_9_2		0x10903
@@ -97,6 +98,17 @@ static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg,
 		GRCPF_REG_KONG_CHANNEL_OFFSET : GRCPF_REG_CHIMP_CHANNEL_OFFSET;
 	uint16_t mb_trigger_offset = use_kong_mb ?
 		GRCPF_REG_KONG_COMM_TRIGGER : GRCPF_REG_CHIMP_COMM_TRIGGER;
+	uint32_t timeout;
+
+	/* Do not send HWRM commands to firmware in error state */
+	if (bp->flags & BNXT_FLAG_FATAL_ERROR)
+		return 0;
+
+	/* For VER_GET command, set timeout as 50ms */
+	if (rte_cpu_to_le_16(req->req_type) == HWRM_VER_GET)
+		timeout = HWRM_SHORT_CMD_TIMEOUT;
+	else
+		timeout = HWRM_CMD_TIMEOUT;
 
 	if (bp->flags & BNXT_FLAG_SHORT_CMD ||
 	    msg_len > bp->max_req_len) {
@@ -139,7 +151,7 @@ static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg,
 	rte_write32(1, bar);
 
 	/* Poll for the valid bit */
-	for (i = 0; i < HWRM_CMD_TIMEOUT; i++) {
+	for (i = 0; i < timeout; i++) {
 		/* Sanity check on the resp->resp_len */
 		rte_rmb();
 		if (resp->resp_len && resp->resp_len <= bp->max_resp_len) {
@@ -151,7 +163,12 @@ static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg,
 		rte_delay_us(1);
 	}
 
-	if (i >= HWRM_CMD_TIMEOUT) {
+	if (i >= timeout) {
+		/* Suppress VER_GET timeout messages during reset recovery */
+		if (bp->flags & BNXT_FLAG_FW_RESET &&
+		    rte_cpu_to_le_16(req->req_type) == HWRM_VER_GET)
+			return -ETIMEDOUT;
+
 		PMD_DRV_LOG(ERR, "Error(timeout) sending msg 0x%04x\n",
 			    req->req_type);
 		return -ETIMEDOUT;
@@ -657,12 +674,21 @@ int bnxt_hwrm_func_reset(struct bnxt *bp)
 int bnxt_hwrm_func_driver_register(struct bnxt *bp)
 {
 	int rc;
+	uint32_t flags = 0;
 	struct hwrm_func_drv_rgtr_input req = {.req_type = 0 };
 	struct hwrm_func_drv_rgtr_output *resp = bp->hwrm_cmd_resp_addr;
 
 	if (bp->flags & BNXT_FLAG_REGISTERED)
 		return 0;
 
+	flags = HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_HOT_RESET_SUPPORT;
+
+	/* PFs and trusted VFs should indicate the support of the
+	 * Master capability on non Stingray platform
+	 */
+	if ((BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp)) && !BNXT_STINGRAY(bp))
+		flags |= HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_MASTER_SUPPORT;
+
 	HWRM_PREP(req, FUNC_DRV_RGTR, BNXT_USE_CHIMP_MB);
 	req.enables = rte_cpu_to_le_32(HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_VER |
 			HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_ASYNC_EVENT_FWD);
@@ -683,14 +709,16 @@ int bnxt_hwrm_func_driver_register(struct bnxt *bp)
 		 * this HWRM sniffer list in FW because DPDK PF driver does
 		 * not support this.
 		 */
-		req.flags =
-		rte_cpu_to_le_32(HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_FWD_NONE_MODE);
+		flags |= HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_FWD_NONE_MODE;
 	}
 
+	req.flags = rte_cpu_to_le_32(flags);
+
 	req.async_event_fwd[0] |=
 		rte_cpu_to_le_32(ASYNC_CMPL_EVENT_ID_LINK_STATUS_CHANGE |
 				 ASYNC_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED |
-				 ASYNC_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE);
+				 ASYNC_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE |
+				 ASYNC_CMPL_EVENT_ID_RESET_NOTIFY);
 	req.async_event_fwd[1] |=
 		rte_cpu_to_le_32(ASYNC_CMPL_EVENT_ID_PF_DRVR_UNLOAD |
 				 ASYNC_CMPL_EVENT_ID_VF_CFG_CHANGE);
@@ -837,7 +865,10 @@ int bnxt_hwrm_ver_get(struct bnxt *bp)
 
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
-	HWRM_CHECK_RESULT();
+	if (bp->flags & BNXT_FLAG_FW_RESET)
+		HWRM_CHECK_RESULT_SILENT();
+	else
+		HWRM_CHECK_RESULT();
 
 	PMD_DRV_LOG(INFO, "%d.%d.%d:%d.%d.%d\n",
 		resp->hwrm_intf_maj_8b, resp->hwrm_intf_min_8b,
@@ -2685,6 +2716,10 @@ int bnxt_hwrm_func_qcfg(struct bnxt *bp, uint16_t *mtu)
 	if (BNXT_VF(bp) && (flags & HWRM_FUNC_QCFG_OUTPUT_FLAGS_TRUSTED_VF)) {
 		bp->flags |= BNXT_FLAG_TRUSTED_VF_EN;
 		PMD_DRV_LOG(INFO, "Trusted VF cap enabled\n");
+	} else if (BNXT_VF(bp) &&
+		   !(flags & HWRM_FUNC_QCFG_OUTPUT_FLAGS_TRUSTED_VF)) {
+		bp->flags &= ~BNXT_FLAG_TRUSTED_VF_EN;
+		PMD_DRV_LOG(INFO, "Trusted VF cap disabled\n");
 	}
 
 	if (mtu)
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index c882fc2a15..a036205329 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -21,6 +21,8 @@ struct bnxt_cp_ring_info;
 	(1 << HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED)
 #define ASYNC_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE	\
 	(1 << HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE)
+#define ASYNC_CMPL_EVENT_ID_RESET_NOTIFY \
+	(1 << HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY)
 #define ASYNC_CMPL_EVENT_ID_PF_DRVR_UNLOAD	\
 	(1 << (HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD - 32))
 #define ASYNC_CMPL_EVENT_ID_VF_CFG_CHANGE	\
diff --git a/drivers/net/bnxt/hsi_struct_def_dpdk.h b/drivers/net/bnxt/hsi_struct_def_dpdk.h
index 0095717254..809ea48736 100644
--- a/drivers/net/bnxt/hsi_struct_def_dpdk.h
+++ b/drivers/net/bnxt/hsi_struct_def_dpdk.h
@@ -8918,6 +8918,17 @@ struct hwrm_func_drv_rgtr_input {
 	 */
 	#define HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_ERROR_RECOVERY_SUPPORT \
 		UINT32_C(0x20)
+	/*
+	 * When this bit is 1, the function is indicating the support of the
+	 * Master capability. The Firmware will use this capability to select
+	 * the Master function. The master function will be used to initiate
+	 * designated functionality like error recovery etc. If none of the
+	 * registered PFs or trusted VFs indicate this support, then
+	 * firmware will select the 1st registered PF as Master capable
+	 * instance.
+	 */
+	#define HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_MASTER_SUPPORT \
+		UINT32_C(0x40)
 	uint32_t	enables;
 	/*
 	 * This bit must be '1' for the os_type field to be
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v3 04/15] net/bnxt: inform firmware about IF state changes
  2019-10-02  1:23         ` [dpdk-dev] [PATCH v3 00/15] " Ajit Khaparde
                             ` (2 preceding siblings ...)
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 03/15] net/bnxt: handle reset notify async event from FW Ajit Khaparde
@ 2019-10-02  1:23           ` Ajit Khaparde
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 05/15] net/bnxt: handle fatal event from FW under error conditions Ajit Khaparde
                             ` (11 subsequent siblings)
  15 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-10-02  1:23 UTC (permalink / raw)
  To: dev
  Cc: ferruh.yigit, Kalesh AP, Santoshkumar Karanappa Rastapur, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

Use latest firmware API to inform firmware about IF state changes.
Firmware has the option to clean up resources during IF down and
to require the driver to reserve resources again during IF up.

During port start, HWRM_FUNC_DRV_IF_CHANGE command response
flags indicates that firmware has reset. Add logic to re-probe
the firmware and re-setup resources.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Santoshkumar Karanappa Rastapur <santosh.rastapur@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt.h        |  2 ++
 drivers/net/bnxt/bnxt_ethdev.c | 30 ++++++++++++++++++++++++
 drivers/net/bnxt/bnxt_hwrm.c   | 42 ++++++++++++++++++++++++++++++++++
 drivers/net/bnxt/bnxt_hwrm.h   |  1 +
 4 files changed, 75 insertions(+)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 8797b032ee..040cae3b19 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -370,6 +370,8 @@ struct bnxt {
 #define BNXT_FLAG_STINGRAY	(1 << 14)
 #define BNXT_FLAG_FW_RESET	(1 << 15)
 #define BNXT_FLAG_FATAL_ERROR	(1 << 16)
+#define BNXT_FLAG_FW_CAP_IF_CHANGE	(1 << 17)
+#define BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE	(1 << 18)
 #define BNXT_FLAG_EXT_STATS_SUPPORTED	(1 << 29)
 #define BNXT_FLAG_NEW_RM	(1 << 30)
 #define BNXT_FLAG_INIT_DONE	(1U << 31)
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 6bc006a719..1bb84eacd9 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -794,6 +794,25 @@ bnxt_transmit_function(__rte_unused struct rte_eth_dev *eth_dev)
 	return bnxt_xmit_pkts;
 }
 
+static int bnxt_handle_if_change_status(struct bnxt *bp)
+{
+	int rc;
+
+	/* Since fw has undergone a reset and lost all contexts,
+	 * set fatal flag to not issue hwrm during cleanup
+	 */
+	bp->flags |= BNXT_FLAG_FATAL_ERROR;
+	bnxt_uninit_resources(bp, true);
+
+	/* clear fatal flag so that re-init happens */
+	bp->flags &= ~BNXT_FLAG_FATAL_ERROR;
+	rc = bnxt_init_resources(bp, true);
+
+	bp->flags &= ~BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE;
+
+	return rc;
+}
+
 static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
 {
 	struct bnxt *bp = eth_dev->data->dev_private;
@@ -807,6 +826,15 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
 			bp->rx_cp_nr_rings, RTE_ETHDEV_QUEUE_STAT_CNTRS);
 	}
 
+	rc = bnxt_hwrm_if_change(bp, 1);
+	if (!rc) {
+		if (bp->flags & BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE) {
+			rc = bnxt_handle_if_change_status(bp);
+			if (rc)
+				return rc;
+		}
+	}
+
 	rc = bnxt_init_chip(bp);
 	if (rc)
 		goto error;
@@ -833,6 +861,7 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
 	return 0;
 
 error:
+	bnxt_hwrm_if_change(bp, 0);
 	bnxt_shutdown_nic(bp);
 	bnxt_free_tx_mbufs(bp);
 	bnxt_free_rx_mbufs(bp);
@@ -899,6 +928,7 @@ static void bnxt_dev_stop_op(struct rte_eth_dev *eth_dev)
 	bnxt_free_tx_mbufs(bp);
 	bnxt_free_rx_mbufs(bp);
 	bnxt_shutdown_nic(bp);
+	bnxt_hwrm_if_change(bp, 0);
 	bp->dev_stopped = 1;
 }
 
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index abbad0152c..1bdbb1433d 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -726,6 +726,11 @@ int bnxt_hwrm_func_driver_register(struct bnxt *bp)
 	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
 	HWRM_CHECK_RESULT();
+
+	flags = rte_le_to_cpu_32(resp->flags);
+	if (flags & HWRM_FUNC_DRV_RGTR_OUTPUT_FLAGS_IF_CHANGE_SUPPORTED)
+		bp->flags |= BNXT_FLAG_FW_CAP_IF_CHANGE;
+
 	HWRM_UNLOCK();
 
 	bp->flags |= BNXT_FLAG_REGISTERED;
@@ -4659,3 +4664,40 @@ int bnxt_hwrm_set_mac(struct bnxt *bp)
 
 	return rc;
 }
+
+int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
+{
+	struct hwrm_func_drv_if_change_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_func_drv_if_change_input req = {0};
+	uint32_t flags;
+	int rc;
+
+	if (!(bp->flags & BNXT_FLAG_FW_CAP_IF_CHANGE))
+		return 0;
+
+	/* Do not issue FUNC_DRV_IF_CHANGE during reset recovery.
+	 * If we issue FUNC_DRV_IF_CHANGE with flags down before
+	 * FUNC_DRV_UNRGTR, FW resets before FUNC_DRV_UNRGTR
+	 */
+	if (!up && (bp->flags & BNXT_FLAG_FW_RESET))
+		return 0;
+
+	HWRM_PREP(req, FUNC_DRV_IF_CHANGE, BNXT_USE_CHIMP_MB);
+
+	if (up)
+		req.flags =
+		rte_cpu_to_le_32(HWRM_FUNC_DRV_IF_CHANGE_INPUT_FLAGS_UP);
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
+
+	HWRM_CHECK_RESULT();
+	flags = rte_le_to_cpu_32(resp->flags);
+	HWRM_UNLOCK();
+
+	if (flags & HWRM_FUNC_DRV_IF_CHANGE_OUTPUT_FLAGS_HOT_FW_RESET_DONE) {
+		PMD_DRV_LOG(INFO, "FW reset happened while port was down\n");
+		bp->flags |= BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index a036205329..2f57e950bb 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -201,4 +201,5 @@ int bnxt_hwrm_tunnel_redirect_query(struct bnxt *bp, uint32_t *type);
 int bnxt_hwrm_tunnel_redirect_info(struct bnxt *bp, uint8_t tun_type,
 				   uint16_t *dst_fid);
 int bnxt_hwrm_set_mac(struct bnxt *bp);
+int bnxt_hwrm_if_change(struct bnxt *bp, bool state);
 #endif
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v3 05/15] net/bnxt: handle fatal event from FW under error conditions
  2019-10-02  1:23         ` [dpdk-dev] [PATCH v3 00/15] " Ajit Khaparde
                             ` (3 preceding siblings ...)
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 04/15] net/bnxt: inform firmware about IF state changes Ajit Khaparde
@ 2019-10-02  1:23           ` Ajit Khaparde
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 06/15] net/bnxt: query firmware error recovery capabilities Ajit Khaparde
                             ` (10 subsequent siblings)
  15 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-10-02  1:23 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

When firmware hit some unrecoverable error conditions, firmware initiate
the recovery by sending an async event EVENT_CMPL_EVENT_ID_RESET_NOTIFY
with data1 set to RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL
to all host drivers and will reset the chip.

The recovery procedure is same sequence as the one for hot FW upgrade.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt_cpr.c    | 13 +++++++++++--
 drivers/net/bnxt/bnxt_cpr.h    |  5 +++++
 drivers/net/bnxt/bnxt_ethdev.c |  3 +++
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index 1a23649b05..3afb9902b0 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -21,6 +21,7 @@ void bnxt_handle_async_event(struct bnxt *bp,
 	struct hwrm_async_event_cmpl *async_cmp =
 				(struct hwrm_async_event_cmpl *)cmp;
 	uint16_t event_id = rte_le_to_cpu_16(async_cmp->event_id);
+	uint32_t event_data;
 
 	/* TODO: HWRM async events are not defined yet */
 	/* Needs to handle: link events, error events, etc. */
@@ -42,6 +43,7 @@ void bnxt_handle_async_event(struct bnxt *bp,
 		PMD_DRV_LOG(INFO, "Port conn async event\n");
 		break;
 	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY:
+		event_data = rte_le_to_cpu_32(async_cmp->event_data1);
 		/* timestamp_lo/hi values are in units of 100ms */
 		bp->fw_reset_max_msecs = async_cmp->timestamp_hi ?
 			rte_le_to_cpu_16(async_cmp->timestamp_hi) * 100 :
@@ -49,8 +51,15 @@ void bnxt_handle_async_event(struct bnxt *bp,
 		bp->fw_reset_min_msecs = async_cmp->timestamp_lo ?
 			async_cmp->timestamp_lo * 100 :
 			BNXT_MIN_FW_READY_TIMEOUT;
-		PMD_DRV_LOG(INFO,
-			    "Firmware non-fatal reset event received\n");
+		if ((event_data & EVENT_DATA1_REASON_CODE_MASK) ==
+		    EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL) {
+			PMD_DRV_LOG(INFO,
+				    "Firmware fatal reset event received\n");
+			bp->flags |= BNXT_FLAG_FATAL_ERROR;
+		} else {
+			PMD_DRV_LOG(INFO,
+				    "Firmware non-fatal reset event received\n");
+		}
 
 		bp->flags |= BNXT_FLAG_FW_RESET;
 		rte_eal_alarm_set(US_PER_MS, bnxt_dev_reset_and_resume,
diff --git a/drivers/net/bnxt/bnxt_cpr.h b/drivers/net/bnxt/bnxt_cpr.h
index f48293b963..b61bafa0e8 100644
--- a/drivers/net/bnxt/bnxt_cpr.h
+++ b/drivers/net/bnxt/bnxt_cpr.h
@@ -108,4 +108,9 @@ void bnxt_handle_fwd_req(struct bnxt *bp, struct cmpl_base *cmp);
 int bnxt_event_hwrm_resp_handler(struct bnxt *bp, struct cmpl_base *cmp);
 void bnxt_dev_reset_and_resume(void *arg);
 
+#define EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL     \
+	HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL
+#define EVENT_DATA1_REASON_CODE_MASK                   \
+	HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MASK
+
 #endif
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 1bb84eacd9..768b6cdc74 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -3592,6 +3592,9 @@ static void bnxt_dev_recover(void *arg)
 	int timeout = bp->fw_reset_max_msecs;
 	int rc = 0;
 
+	/* Clear Error flag so that device re-init should happen */
+	bp->flags &= ~BNXT_FLAG_FATAL_ERROR;
+
 	do {
 		rc = bnxt_hwrm_ver_get(bp);
 		if (rc == 0)
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v3 06/15] net/bnxt: query firmware error recovery capabilities
  2019-10-02  1:23         ` [dpdk-dev] [PATCH v3 00/15] " Ajit Khaparde
                             ` (4 preceding siblings ...)
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 05/15] net/bnxt: handle fatal event from FW under error conditions Ajit Khaparde
@ 2019-10-02  1:23           ` Ajit Khaparde
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 07/15] net/bnxt: map status registers for FW health monitoring Ajit Khaparde
                             ` (9 subsequent siblings)
  15 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-10-02  1:23 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

In Driver initiated error recovery process, driver has to know about
the registers offset and values to initiate FW reset. The HWRM command
HWRM_ERROR_RECOVERY_QCFG is used to obtain all the registers and values
required to initiate FW reset. This command response includes
FW heart_beat register, health status register, Error counter register,
register offsets and values to do chip reset if firmware crashes and
becomes unresponsive.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt.h        | 27 ++++++++++
 drivers/net/bnxt/bnxt_ethdev.c | 13 ++++-
 drivers/net/bnxt/bnxt_hwrm.c   | 93 ++++++++++++++++++++++++++++++++++
 drivers/net/bnxt/bnxt_hwrm.h   |  1 +
 4 files changed, 133 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 040cae3b19..bfe5cb0df2 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -343,6 +343,29 @@ struct bnxt_ctx_mem_info {
 #define US_PER_MS			1000
 #define NS_PER_US			1000
 
+struct bnxt_error_recovery_info {
+	/* All units in milliseconds */
+	uint32_t	driver_polling_freq;
+	uint32_t	master_func_wait_period;
+	uint32_t	normal_func_wait_period;
+	uint32_t	master_func_wait_period_after_reset;
+	uint32_t	max_bailout_time_after_reset;
+#define BNXT_FW_STATUS_REG		0
+#define BNXT_FW_HEARTBEAT_CNT_REG	1
+#define BNXT_FW_RECOVERY_CNT_REG	2
+#define BNXT_FW_RESET_INPROG_REG	3
+	uint32_t	status_regs[4];
+	uint32_t	reset_inprogress_reg_mask;
+#define BNXT_NUM_RESET_REG	16
+	uint8_t		reg_array_cnt;
+	uint32_t	reset_reg[BNXT_NUM_RESET_REG];
+	uint32_t	reset_reg_val[BNXT_NUM_RESET_REG];
+	uint8_t		delay_after_reset[BNXT_NUM_RESET_REG];
+#define BNXT_FLAG_ERROR_RECOVERY_HOST	(1 << 0)
+#define BNXT_FLAG_ERROR_RECOVERY_CO_CPU	(1 << 1)
+	uint32_t	flags;
+};
+
 #define BNXT_HWRM_SHORT_REQ_LEN		sizeof(struct hwrm_short_input)
 struct bnxt {
 	void				*bar0;
@@ -372,6 +395,7 @@ struct bnxt {
 #define BNXT_FLAG_FATAL_ERROR	(1 << 16)
 #define BNXT_FLAG_FW_CAP_IF_CHANGE	(1 << 17)
 #define BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE	(1 << 18)
+#define BNXT_FLAG_FW_CAP_ERROR_RECOVERY		(1 << 19)
 #define BNXT_FLAG_EXT_STATS_SUPPORTED	(1 << 29)
 #define BNXT_FLAG_NEW_RM	(1 << 30)
 #define BNXT_FLAG_INIT_DONE	(1U << 31)
@@ -478,6 +502,9 @@ struct bnxt {
 
 	uint16_t		fw_reset_min_msecs;
 	uint16_t		fw_reset_max_msecs;
+
+	/* Struct to hold adapter error recovery related info */
+	struct bnxt_error_recovery_info *recovery_info;
 };
 
 int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete);
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 768b6cdc74..95d63435bb 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -4170,6 +4170,11 @@ static int bnxt_init_fw(struct bnxt *bp)
 	if (rc)
 		return rc;
 
+	/* Get the adapter error recovery support info */
+	rc = bnxt_hwrm_error_recovery_qcfg(bp);
+	if (rc)
+		bp->flags &= ~BNXT_FLAG_FW_CAP_ERROR_RECOVERY;
+
 	if (mtu >= RTE_ETHER_MIN_MTU && mtu <= BNXT_MAX_MTU &&
 	    mtu != bp->eth_dev->data->mtu)
 		bp->eth_dev->data->mtu = mtu;
@@ -4328,9 +4333,15 @@ bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev)
 	rc = bnxt_hwrm_func_driver_unregister(bp, 0);
 	bp->flags &= ~BNXT_FLAG_REGISTERED;
 	bnxt_free_ctx_mem(bp);
-	if (!reconfig_dev)
+	if (!reconfig_dev) {
 		bnxt_free_hwrm_resources(bp);
 
+		if (bp->recovery_info != NULL) {
+			rte_free(bp->recovery_info);
+			bp->recovery_info = NULL;
+		}
+	}
+
 	return rc;
 }
 
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 1bdbb1433d..f197997b89 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -630,6 +630,13 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
 	if (flags & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT_STATS_SUPPORTED)
 		bp->flags |= BNXT_FLAG_EXT_STATS_SUPPORTED;
 
+	if (flags & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_ERROR_RECOVERY_CAPABLE) {
+		bp->flags |= BNXT_FLAG_FW_CAP_ERROR_RECOVERY;
+		PMD_DRV_LOG(DEBUG, "Adapter Error recovery SUPPORTED\n");
+	} else {
+		bp->flags &= ~BNXT_FLAG_FW_CAP_ERROR_RECOVERY;
+	}
+
 	HWRM_UNLOCK();
 
 	return rc;
@@ -4701,3 +4708,89 @@ int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 
 	return 0;
 }
+
+int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp)
+{
+	struct hwrm_error_recovery_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+	struct bnxt_error_recovery_info *info = bp->recovery_info;
+	struct hwrm_error_recovery_qcfg_input req = {0};
+	uint32_t flags = 0;
+	unsigned int i;
+	int rc;
+
+	/* Older FW does not have error recovery support */
+	if (!(bp->flags & BNXT_FLAG_FW_CAP_ERROR_RECOVERY))
+		return 0;
+
+	if (!info) {
+		info = rte_zmalloc("bnxt_hwrm_error_recovery_qcfg",
+				   sizeof(*info), 0);
+		bp->recovery_info = info;
+		if (info == NULL)
+			return -ENOMEM;
+	} else {
+		memset(info, 0, sizeof(*info));
+	}
+
+	HWRM_PREP(req, ERROR_RECOVERY_QCFG, BNXT_USE_CHIMP_MB);
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
+
+	HWRM_CHECK_RESULT();
+
+	flags = rte_le_to_cpu_32(resp->flags);
+	if (flags & HWRM_ERROR_RECOVERY_QCFG_OUTPUT_FLAGS_HOST)
+		info->flags |= BNXT_FLAG_ERROR_RECOVERY_HOST;
+	else if (flags & HWRM_ERROR_RECOVERY_QCFG_OUTPUT_FLAGS_CO_CPU)
+		info->flags |= BNXT_FLAG_ERROR_RECOVERY_CO_CPU;
+
+	if ((info->flags & BNXT_FLAG_ERROR_RECOVERY_CO_CPU) &&
+	    !(bp->flags & BNXT_FLAG_KONG_MB_EN)) {
+		rc = -EINVAL;
+		goto err;
+	}
+
+	/* FW returned values are in units of 100msec */
+	info->driver_polling_freq =
+		rte_le_to_cpu_32(resp->driver_polling_freq) * 100;
+	info->master_func_wait_period =
+		rte_le_to_cpu_32(resp->master_func_wait_period) * 100;
+	info->normal_func_wait_period =
+		rte_le_to_cpu_32(resp->normal_func_wait_period) * 100;
+	info->master_func_wait_period_after_reset =
+		rte_le_to_cpu_32(resp->master_func_wait_period_after_reset) * 100;
+	info->max_bailout_time_after_reset =
+		rte_le_to_cpu_32(resp->max_bailout_time_after_reset) * 100;
+	info->status_regs[BNXT_FW_STATUS_REG] =
+		rte_le_to_cpu_32(resp->fw_health_status_reg);
+	info->status_regs[BNXT_FW_HEARTBEAT_CNT_REG] =
+		rte_le_to_cpu_32(resp->fw_heartbeat_reg);
+	info->status_regs[BNXT_FW_RECOVERY_CNT_REG] =
+		rte_le_to_cpu_32(resp->fw_reset_cnt_reg);
+	info->status_regs[BNXT_FW_RESET_INPROG_REG] =
+		rte_le_to_cpu_32(resp->reset_inprogress_reg);
+	info->reg_array_cnt =
+		rte_le_to_cpu_32(resp->reg_array_cnt);
+
+	if (info->reg_array_cnt >= BNXT_NUM_RESET_REG) {
+		rc = -EINVAL;
+		goto err;
+	}
+
+	for (i = 0; i < info->reg_array_cnt; i++) {
+		info->reset_reg[i] =
+			rte_le_to_cpu_32(resp->reset_reg[i]);
+		info->reset_reg_val[i] =
+			rte_le_to_cpu_32(resp->reset_reg_val[i]);
+		info->delay_after_reset[i] =
+			resp->delay_after_reset[i];
+	}
+err:
+	HWRM_UNLOCK();
+
+	if (rc) {
+		rte_free(bp->recovery_info);
+		bp->recovery_info = NULL;
+	}
+	return rc;
+}
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index 2f57e950bb..c332c129dd 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -202,4 +202,5 @@ int bnxt_hwrm_tunnel_redirect_info(struct bnxt *bp, uint8_t tun_type,
 				   uint16_t *dst_fid);
 int bnxt_hwrm_set_mac(struct bnxt *bp);
 int bnxt_hwrm_if_change(struct bnxt *bp, bool state);
+int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp);
 #endif
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v3 07/15] net/bnxt: map status registers for FW health monitoring
  2019-10-02  1:23         ` [dpdk-dev] [PATCH v3 00/15] " Ajit Khaparde
                             ` (5 preceding siblings ...)
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 06/15] net/bnxt: query firmware error recovery capabilities Ajit Khaparde
@ 2019-10-02  1:23           ` Ajit Khaparde
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 08/15] net/bnxt: advertise error recovery capability and handle async event Ajit Khaparde
                             ` (8 subsequent siblings)
  15 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-10-02  1:23 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

HWRM_ERROR_RECOVERY_QCFG command returns the FW status registers offset
for periodic firmware health check monitoring. Map them to GRC window 2.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt.h        | 22 +++++++++++++++++++++-
 drivers/net/bnxt/bnxt_ethdev.c | 34 ++++++++++++++++++++++++++++++++++
 drivers/net/bnxt/bnxt_hwrm.c   |  4 ++++
 3 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index bfe5cb0df2..ac2bf158dd 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -354,7 +354,9 @@ struct bnxt_error_recovery_info {
 #define BNXT_FW_HEARTBEAT_CNT_REG	1
 #define BNXT_FW_RECOVERY_CNT_REG	2
 #define BNXT_FW_RESET_INPROG_REG	3
-	uint32_t	status_regs[4];
+#define BNXT_FW_STATUS_REG_CNT		4
+	uint32_t	status_regs[BNXT_FW_STATUS_REG_CNT];
+	uint32_t	mapped_status_regs[BNXT_FW_STATUS_REG_CNT];
 	uint32_t	reset_inprogress_reg_mask;
 #define BNXT_NUM_RESET_REG	16
 	uint8_t		reg_array_cnt;
@@ -366,6 +368,22 @@ struct bnxt_error_recovery_info {
 	uint32_t	flags;
 };
 
+/* address space location of register */
+#define BNXT_FW_STATUS_REG_TYPE_MASK	3
+/* register is located in PCIe config space */
+#define BNXT_FW_STATUS_REG_TYPE_CFG	0
+/* register is located in GRC address space */
+#define BNXT_FW_STATUS_REG_TYPE_GRC	1
+/* register is located in BAR0  */
+#define BNXT_FW_STATUS_REG_TYPE_BAR0	2
+/* register is located in BAR1  */
+#define BNXT_FW_STATUS_REG_TYPE_BAR1	3
+
+#define BNXT_FW_STATUS_REG_TYPE(reg)	((reg) & BNXT_FW_STATUS_REG_TYPE_MASK)
+#define BNXT_FW_STATUS_REG_OFF(reg)	((reg) & ~BNXT_FW_STATUS_REG_TYPE_MASK)
+
+#define BNXT_GRCP_WINDOW_2_BASE		0x2000
+
 #define BNXT_HWRM_SHORT_REQ_LEN		sizeof(struct hwrm_short_input)
 struct bnxt {
 	void				*bar0;
@@ -511,6 +529,8 @@ int bnxt_link_update_op(struct rte_eth_dev *eth_dev, int wait_to_complete);
 int bnxt_rcv_msg_from_vf(struct bnxt *bp, uint16_t vf_id, void *msg);
 int is_bnxt_in_error(struct bnxt *bp);
 
+int bnxt_map_fw_health_status_regs(struct bnxt *bp);
+
 bool is_bnxt_supported(struct rte_eth_dev *dev);
 bool bnxt_stratus_device(struct bnxt *bp);
 extern const struct rte_flow_ops bnxt_flow_ops;
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 95d63435bb..a23ca2b53d 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -3562,6 +3562,40 @@ static const struct eth_dev_ops bnxt_dev_ops = {
 	.timesync_read_tx_timestamp = bnxt_timesync_read_tx_timestamp,
 };
 
+int bnxt_map_fw_health_status_regs(struct bnxt *bp)
+{
+	struct bnxt_error_recovery_info *info = bp->recovery_info;
+	uint32_t reg_base = 0xffffffff;
+	int i;
+
+	/* Only pre-map the monitoring GRC registers using window 2 */
+	for (i = 0; i < BNXT_FW_STATUS_REG_CNT; i++) {
+		uint32_t reg = info->status_regs[i];
+
+		if (BNXT_FW_STATUS_REG_TYPE(reg) != BNXT_FW_STATUS_REG_TYPE_GRC)
+			continue;
+
+		if (reg_base == 0xffffffff)
+			reg_base = reg & 0xfffff000;
+		if ((reg & 0xfffff000) != reg_base)
+			return -ERANGE;
+
+		/* Use mask 0xffc as the Lower 2 bits indicates
+		 * address space location
+		 */
+		info->mapped_status_regs[i] = BNXT_GRCP_WINDOW_2_BASE +
+						(reg & 0xffc);
+	}
+
+	if (reg_base == 0xffffffff)
+		return 0;
+
+	rte_write32(reg_base, (uint8_t *)bp->bar0 +
+		    BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4);
+
+	return 0;
+}
+
 static void bnxt_dev_cleanup(struct bnxt *bp)
 {
 	bnxt_set_hwrm_link_config(bp, false);
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index f197997b89..34672dfb00 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -4788,6 +4788,10 @@ int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp)
 err:
 	HWRM_UNLOCK();
 
+	/* Map the FW status registers */
+	if (!rc)
+		rc = bnxt_map_fw_health_status_regs(bp);
+
 	if (rc) {
 		rte_free(bp->recovery_info);
 		bp->recovery_info = NULL;
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v3 08/15] net/bnxt: advertise error recovery capability and handle async event
  2019-10-02  1:23         ` [dpdk-dev] [PATCH v3 00/15] " Ajit Khaparde
                             ` (6 preceding siblings ...)
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 07/15] net/bnxt: map status registers for FW health monitoring Ajit Khaparde
@ 2019-10-02  1:23           ` Ajit Khaparde
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 09/15] net/bnxt: add code for periodic FW health monitoring Ajit Khaparde
                             ` (7 subsequent siblings)
  15 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-10-02  1:23 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

1. Advertise HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_ERROR_RECOVERY_SUPPORT flag
   in the FUNC_DRV_RGTR command.
2. request for the async event ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY
   in the FUNC_DRV_RGTR command.
3. handle the async event EVENT_ID_ERROR_RECOVERY from FW.

Error recovery support will be used by firmware only if all the driver
instances support error recovery process.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt.h      |  2 ++
 drivers/net/bnxt/bnxt_cpr.c  | 45 ++++++++++++++++++++++++++++++++++++
 drivers/net/bnxt/bnxt_cpr.h  | 12 ++++++++++
 drivers/net/bnxt/bnxt_hwrm.c |  5 ++++
 drivers/net/bnxt/bnxt_hwrm.h |  2 ++
 5 files changed, 66 insertions(+)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index ac2bf158dd..ea556d0792 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -365,6 +365,8 @@ struct bnxt_error_recovery_info {
 	uint8_t		delay_after_reset[BNXT_NUM_RESET_REG];
 #define BNXT_FLAG_ERROR_RECOVERY_HOST	(1 << 0)
 #define BNXT_FLAG_ERROR_RECOVERY_CO_CPU	(1 << 1)
+#define BNXT_FLAG_MASTER_FUNC		(1 << 2)
+#define BNXT_FLAG_RECOVERY_ENABLED	(1 << 3)
 	uint32_t	flags;
 };
 
diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index 3afb9902b0..1b046bbfac 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -21,6 +21,7 @@ void bnxt_handle_async_event(struct bnxt *bp,
 	struct hwrm_async_event_cmpl *async_cmp =
 				(struct hwrm_async_event_cmpl *)cmp;
 	uint16_t event_id = rte_le_to_cpu_16(async_cmp->event_id);
+	struct bnxt_error_recovery_info *info;
 	uint32_t event_data;
 
 	/* TODO: HWRM async events are not defined yet */
@@ -65,6 +66,31 @@ void bnxt_handle_async_event(struct bnxt *bp,
 		rte_eal_alarm_set(US_PER_MS, bnxt_dev_reset_and_resume,
 				  (void *)bp);
 		break;
+	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY:
+		info = bp->recovery_info;
+
+		if (!info)
+			return;
+
+		PMD_DRV_LOG(INFO, "Error recovery async event received\n");
+
+		event_data = rte_le_to_cpu_32(async_cmp->event_data1) &
+				EVENT_DATA1_FLAGS_MASK;
+
+		if (event_data & EVENT_DATA1_FLAGS_MASTER_FUNC)
+			info->flags |= BNXT_FLAG_MASTER_FUNC;
+		else
+			info->flags &= ~BNXT_FLAG_MASTER_FUNC;
+
+		if (event_data & EVENT_DATA1_FLAGS_RECOVERY_ENABLED)
+			info->flags |= BNXT_FLAG_RECOVERY_ENABLED;
+		else
+			info->flags &= ~BNXT_FLAG_RECOVERY_ENABLED;
+
+		PMD_DRV_LOG(INFO, "recovery enabled(%d), master function(%d)\n",
+			    bnxt_is_recovery_enabled(bp),
+			    bnxt_is_master_func(bp));
+		break;
 	default:
 		PMD_DRV_LOG(INFO, "handle_async_event id = 0x%x\n", event_id);
 		break;
@@ -189,3 +215,22 @@ int bnxt_event_hwrm_resp_handler(struct bnxt *bp, struct cmpl_base *cmp)
 
 	return evt;
 }
+
+bool bnxt_is_master_func(struct bnxt *bp)
+{
+	if (bp->recovery_info->flags & BNXT_FLAG_MASTER_FUNC)
+		return true;
+
+	return false;
+}
+
+bool bnxt_is_recovery_enabled(struct bnxt *bp)
+{
+	struct bnxt_error_recovery_info *info;
+
+	info = bp->recovery_info;
+	if (info && (info->flags & BNXT_FLAG_RECOVERY_ENABLED))
+		return true;
+
+	return false;
+}
diff --git a/drivers/net/bnxt/bnxt_cpr.h b/drivers/net/bnxt/bnxt_cpr.h
index b61bafa0e8..f118bda36e 100644
--- a/drivers/net/bnxt/bnxt_cpr.h
+++ b/drivers/net/bnxt/bnxt_cpr.h
@@ -113,4 +113,16 @@ void bnxt_dev_reset_and_resume(void *arg);
 #define EVENT_DATA1_REASON_CODE_MASK                   \
 	HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MASK
 
+#define EVENT_DATA1_FLAGS_MASK                         \
+	HWRM_ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_MASK
+
+#define EVENT_DATA1_FLAGS_MASTER_FUNC                  \
+	HWRM_ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_MASTER_FUNC
+
+#define EVENT_DATA1_FLAGS_RECOVERY_ENABLED             \
+	HWRM_ASYNC_EVENT_CMPL_ERROR_RECOVERY_EVENT_DATA1_FLAGS_RECOVERY_ENABLED
+
+bool bnxt_is_recovery_enabled(struct bnxt *bp);
+bool bnxt_is_master_func(struct bnxt *bp);
+
 #endif
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 34672dfb00..c7d6f9fdc4 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -689,6 +689,8 @@ int bnxt_hwrm_func_driver_register(struct bnxt *bp)
 		return 0;
 
 	flags = HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_HOT_RESET_SUPPORT;
+	if (bp->flags & BNXT_FLAG_FW_CAP_ERROR_RECOVERY)
+		flags |= HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_ERROR_RECOVERY_SUPPORT;
 
 	/* PFs and trusted VFs should indicate the support of the
 	 * Master capability on non Stingray platform
@@ -726,6 +728,9 @@ int bnxt_hwrm_func_driver_register(struct bnxt *bp)
 				 ASYNC_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED |
 				 ASYNC_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE |
 				 ASYNC_CMPL_EVENT_ID_RESET_NOTIFY);
+	if (bp->flags & BNXT_FLAG_FW_CAP_ERROR_RECOVERY)
+		req.async_event_fwd[0] |=
+			rte_cpu_to_le_32(ASYNC_CMPL_EVENT_ID_ERROR_RECOVERY);
 	req.async_event_fwd[1] |=
 		rte_cpu_to_le_32(ASYNC_CMPL_EVENT_ID_PF_DRVR_UNLOAD |
 				 ASYNC_CMPL_EVENT_ID_VF_CFG_CHANGE);
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index c332c129dd..44e3355075 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -23,6 +23,8 @@ struct bnxt_cp_ring_info;
 	(1 << HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE)
 #define ASYNC_CMPL_EVENT_ID_RESET_NOTIFY \
 	(1 << HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY)
+#define ASYNC_CMPL_EVENT_ID_ERROR_RECOVERY \
+	(1 << HWRM_ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY)
 #define ASYNC_CMPL_EVENT_ID_PF_DRVR_UNLOAD	\
 	(1 << (HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD - 32))
 #define ASYNC_CMPL_EVENT_ID_VF_CFG_CHANGE	\
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v3 09/15] net/bnxt: add code for periodic FW health monitoring
  2019-10-02  1:23         ` [dpdk-dev] [PATCH v3 00/15] " Ajit Khaparde
                             ` (7 preceding siblings ...)
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 08/15] net/bnxt: advertise error recovery capability and handle async event Ajit Khaparde
@ 2019-10-02  1:23           ` Ajit Khaparde
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 10/15] net/bnxt: add support for FW reset Ajit Khaparde
                             ` (6 subsequent siblings)
  15 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-10-02  1:23 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

Periodically poll the FW heartbeat register and FW recovery counter
registers to check the FW health. Polling frequency will be
advertised by the FW in HWRM_ERROR_RECOVERY_QCFG response.
Schedule the task upon receiving the async event from FW.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
---
 drivers/net/bnxt/bnxt.h        |  6 +++
 drivers/net/bnxt/bnxt_cpr.c    | 10 ++++
 drivers/net/bnxt/bnxt_ethdev.c | 97 ++++++++++++++++++++++++++++++++++
 3 files changed, 113 insertions(+)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index ea556d0792..50b9b38565 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -368,6 +368,9 @@ struct bnxt_error_recovery_info {
 #define BNXT_FLAG_MASTER_FUNC		(1 << 2)
 #define BNXT_FLAG_RECOVERY_ENABLED	(1 << 3)
 	uint32_t	flags;
+
+	uint32_t        last_heart_beat;
+	uint32_t        last_reset_counter;
 };
 
 /* address space location of register */
@@ -416,6 +419,7 @@ struct bnxt {
 #define BNXT_FLAG_FW_CAP_IF_CHANGE	(1 << 17)
 #define BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE	(1 << 18)
 #define BNXT_FLAG_FW_CAP_ERROR_RECOVERY		(1 << 19)
+#define BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED	(1 << 20)
 #define BNXT_FLAG_EXT_STATS_SUPPORTED	(1 << 29)
 #define BNXT_FLAG_NEW_RM	(1 << 30)
 #define BNXT_FLAG_INIT_DONE	(1U << 31)
@@ -532,6 +536,8 @@ int bnxt_rcv_msg_from_vf(struct bnxt *bp, uint16_t vf_id, void *msg);
 int is_bnxt_in_error(struct bnxt *bp);
 
 int bnxt_map_fw_health_status_regs(struct bnxt *bp);
+uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index);
+void bnxt_schedule_fw_health_check(struct bnxt *bp);
 
 bool is_bnxt_supported(struct rte_eth_dev *dev);
 bool bnxt_stratus_device(struct bnxt *bp);
diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index 1b046bbfac..38931799eb 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -90,6 +90,16 @@ void bnxt_handle_async_event(struct bnxt *bp,
 		PMD_DRV_LOG(INFO, "recovery enabled(%d), master function(%d)\n",
 			    bnxt_is_recovery_enabled(bp),
 			    bnxt_is_master_func(bp));
+
+		if (bp->flags & BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED)
+			return;
+
+		info->last_heart_beat =
+			bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG);
+		info->last_reset_counter =
+			bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG);
+
+		bnxt_schedule_fw_health_check(bp);
 		break;
 	default:
 		PMD_DRV_LOG(INFO, "handle_async_event id = 0x%x\n", event_id);
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index a23ca2b53d..d28f1bd8f9 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -169,6 +169,7 @@ static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu);
 static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev);
 static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev);
 static int bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev);
+static void bnxt_cancel_fw_health_check(struct bnxt *bp);
 
 int is_bnxt_in_error(struct bnxt *bp)
 {
@@ -858,6 +859,7 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
 	bp->flags |= BNXT_FLAG_INIT_DONE;
 	eth_dev->data->dev_started = 1;
 	bp->dev_stopped = 0;
+	bnxt_schedule_fw_health_check(bp);
 	return 0;
 
 error:
@@ -910,6 +912,8 @@ static void bnxt_dev_stop_op(struct rte_eth_dev *eth_dev)
 	/* disable uio/vfio intr/eventfd mapping */
 	rte_intr_disable(intr_handle);
 
+	bnxt_cancel_fw_health_check(bp);
+
 	bp->flags &= ~BNXT_FLAG_INIT_DONE;
 	if (bp->eth_dev->data->dev_started) {
 		/* TBD: STOP HW queues DMA */
@@ -3682,6 +3686,99 @@ void bnxt_dev_reset_and_resume(void *arg)
 		PMD_DRV_LOG(ERR, "Error setting recovery alarm");
 }
 
+uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index)
+{
+	struct bnxt_error_recovery_info *info = bp->recovery_info;
+	uint32_t reg = info->status_regs[index];
+	uint32_t type, offset, val = 0;
+
+	type = BNXT_FW_STATUS_REG_TYPE(reg);
+	offset = BNXT_FW_STATUS_REG_OFF(reg);
+
+	switch (type) {
+	case BNXT_FW_STATUS_REG_TYPE_CFG:
+		rte_pci_read_config(bp->pdev, &val, sizeof(val), offset);
+		break;
+	case BNXT_FW_STATUS_REG_TYPE_GRC:
+		offset = info->mapped_status_regs[index];
+		/* FALLTHROUGH */
+	case BNXT_FW_STATUS_REG_TYPE_BAR0:
+		val = rte_le_to_cpu_32(rte_read32((uint8_t *)bp->bar0 +
+				       offset));
+		break;
+	}
+
+	return val;
+}
+
+/* Driver should poll FW heartbeat, reset_counter with the frequency
+ * advertised by FW in HWRM_ERROR_RECOVERY_QCFG.
+ * When the driver detects heartbeat stop or change in reset_counter,
+ * it has to trigger a reset to recover from the error condition.
+ * A “master PF” is the function who will have the privilege to
+ * initiate the chimp reset. The master PF will be elected by the
+ * firmware and will be notified through async message.
+ */
+static void bnxt_check_fw_health(void *arg)
+{
+	struct bnxt *bp = arg;
+	struct bnxt_error_recovery_info *info = bp->recovery_info;
+	uint32_t val = 0;
+
+	if (!info || !bnxt_is_recovery_enabled(bp) ||
+	    is_bnxt_in_error(bp))
+		return;
+
+	val = bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG);
+	if (val == info->last_heart_beat)
+		goto reset;
+
+	info->last_heart_beat = val;
+
+	val = bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG);
+	if (val != info->last_reset_counter)
+		goto reset;
+
+	info->last_reset_counter = val;
+
+	rte_eal_alarm_set(US_PER_MS * info->driver_polling_freq,
+			  bnxt_check_fw_health, (void *)bp);
+
+	return;
+reset:
+	/* Stop DMA to/from device */
+	bp->flags |= BNXT_FLAG_FATAL_ERROR;
+	bp->flags |= BNXT_FLAG_FW_RESET;
+
+	PMD_DRV_LOG(ERR, "Detected FW dead condition\n");
+}
+
+void bnxt_schedule_fw_health_check(struct bnxt *bp)
+{
+	uint32_t polling_freq;
+
+	if (!bnxt_is_recovery_enabled(bp))
+		return;
+
+	if (bp->flags & BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED)
+		return;
+
+	polling_freq = bp->recovery_info->driver_polling_freq;
+
+	rte_eal_alarm_set(US_PER_MS * polling_freq,
+			  bnxt_check_fw_health, (void *)bp);
+	bp->flags |= BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED;
+}
+
+static void bnxt_cancel_fw_health_check(struct bnxt *bp)
+{
+	if (!bnxt_is_recovery_enabled(bp))
+		return;
+
+	rte_eal_alarm_cancel(bnxt_check_fw_health, (void *)bp);
+	bp->flags &= ~BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED;
+}
+
 static bool bnxt_vf_pciid(uint16_t id)
 {
 	if (id == BROADCOM_DEV_ID_57304_VF ||
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v3 10/15] net/bnxt: add support for FW reset
  2019-10-02  1:23         ` [dpdk-dev] [PATCH v3 00/15] " Ajit Khaparde
                             ` (8 preceding siblings ...)
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 09/15] net/bnxt: add code for periodic FW health monitoring Ajit Khaparde
@ 2019-10-02  1:23           ` Ajit Khaparde
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 11/15] net/bnxt: add hot firmware upgrade support for Stingray Ajit Khaparde
                             ` (5 subsequent siblings)
  15 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-10-02  1:23 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

Added code to perform FW_RESET. When the driver detects error in FW,
it has to initiate the recovery by resetting the cores. FW advertise
the method to do a core reset, reset register offsets and values
to perform reset in response of HWRM_ERROR_RECOVERY_QCFG command.

There are 2 ways to recover from the error.
1. Master function issues core resets to recover from error.
2. Master function detects chimp dead condition and notify the Kong
   processor about the chimp dead case through FW_RESET HWRM command.
   Kong Processor send an RESET_NOTIFY async event with
   REASON_CODE_FW_EXCEPTION_FATAL to all the PF’s/VF’s that
   chimp is dead and it is going to reset the chimp.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt.h        |   1 +
 drivers/net/bnxt/bnxt_ethdev.c | 104 ++++++++++++++++++++++++++++++++-
 drivers/net/bnxt/bnxt_hwrm.c   |  26 +++++++++
 drivers/net/bnxt/bnxt_hwrm.h   |   1 +
 4 files changed, 131 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 50b9b38565..637ee9a0f7 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -388,6 +388,7 @@ struct bnxt_error_recovery_info {
 #define BNXT_FW_STATUS_REG_OFF(reg)	((reg) & ~BNXT_FW_STATUS_REG_TYPE_MASK)
 
 #define BNXT_GRCP_WINDOW_2_BASE		0x2000
+#define BNXT_GRCP_WINDOW_3_BASE		0x3000
 
 #define BNXT_HWRM_SHORT_REQ_LEN		sizeof(struct hwrm_short_input)
 struct bnxt {
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index d28f1bd8f9..40973c37b9 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -3566,6 +3566,19 @@ static const struct eth_dev_ops bnxt_dev_ops = {
 	.timesync_read_tx_timestamp = bnxt_timesync_read_tx_timestamp,
 };
 
+static uint32_t bnxt_map_reset_regs(struct bnxt *bp, uint32_t reg)
+{
+	uint32_t offset;
+
+	/* Only pre-map the reset GRC registers using window 3 */
+	rte_write32(reg & 0xfffff000, (uint8_t *)bp->bar0 +
+		    BNXT_GRCPF_REG_WINDOW_BASE_OUT + 8);
+
+	offset = BNXT_GRCP_WINDOW_3_BASE + (reg & 0xffc);
+
+	return offset;
+}
+
 int bnxt_map_fw_health_status_regs(struct bnxt *bp)
 {
 	struct bnxt_error_recovery_info *info = bp->recovery_info;
@@ -3600,6 +3613,34 @@ int bnxt_map_fw_health_status_regs(struct bnxt *bp)
 	return 0;
 }
 
+static void bnxt_write_fw_reset_reg(struct bnxt *bp, uint32_t index)
+{
+	struct bnxt_error_recovery_info *info = bp->recovery_info;
+	uint32_t delay = info->delay_after_reset[index];
+	uint32_t val = info->reset_reg_val[index];
+	uint32_t reg = info->reset_reg[index];
+	uint32_t type, offset;
+
+	type = BNXT_FW_STATUS_REG_TYPE(reg);
+	offset = BNXT_FW_STATUS_REG_OFF(reg);
+
+	switch (type) {
+	case BNXT_FW_STATUS_REG_TYPE_CFG:
+		rte_pci_write_config(bp->pdev, &val, sizeof(val), offset);
+		break;
+	case BNXT_FW_STATUS_REG_TYPE_GRC:
+		offset = bnxt_map_reset_regs(bp, offset);
+		rte_write32(val, (uint8_t *)bp->bar0 + offset);
+		break;
+	case BNXT_FW_STATUS_REG_TYPE_BAR0:
+		rte_write32(val, (uint8_t *)bp->bar0 + offset);
+		break;
+	}
+	/* wait on a specific interval of time until core reset is complete */
+	if (delay)
+		rte_delay_ms(delay);
+}
+
 static void bnxt_dev_cleanup(struct bnxt *bp)
 {
 	bnxt_set_hwrm_link_config(bp, false);
@@ -3711,6 +3752,59 @@ uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index)
 	return val;
 }
 
+static int bnxt_fw_reset_all(struct bnxt *bp)
+{
+	struct bnxt_error_recovery_info *info = bp->recovery_info;
+	uint32_t i;
+	int rc = 0;
+
+	if (info->flags & BNXT_FLAG_ERROR_RECOVERY_HOST) {
+		/* Reset through master function driver */
+		for (i = 0; i < info->reg_array_cnt; i++)
+			bnxt_write_fw_reset_reg(bp, i);
+		/* Wait for time specified by FW after triggering reset */
+		rte_delay_ms(info->master_func_wait_period_after_reset);
+	} else if (info->flags & BNXT_FLAG_ERROR_RECOVERY_CO_CPU) {
+		/* Reset with the help of Kong processor */
+		rc = bnxt_hwrm_fw_reset(bp);
+		if (rc)
+			PMD_DRV_LOG(ERR, "Failed to reset FW\n");
+	}
+
+	return rc;
+}
+
+static void bnxt_fw_reset_cb(void *arg)
+{
+	struct bnxt *bp = arg;
+	struct bnxt_error_recovery_info *info = bp->recovery_info;
+	int rc = 0;
+
+	/* Only Master function can do FW reset */
+	if (bnxt_is_master_func(bp) &&
+	    bnxt_is_recovery_enabled(bp)) {
+		rc = bnxt_fw_reset_all(bp);
+		if (rc) {
+			PMD_DRV_LOG(ERR, "Adapter recovery failed\n");
+			return;
+		}
+	}
+
+	/* if recovery method is ERROR_RECOVERY_CO_CPU, KONG will send
+	 * EXCEPTION_FATAL_ASYNC event to all the functions
+	 * (including MASTER FUNC). After receiving this Async, all the active
+	 * drivers should treat this case as FW initiated recovery
+	 */
+	if (info->flags & BNXT_FLAG_ERROR_RECOVERY_HOST) {
+		bp->fw_reset_min_msecs = BNXT_MIN_FW_READY_TIMEOUT;
+		bp->fw_reset_max_msecs = BNXT_MAX_FW_RESET_TIMEOUT;
+
+		/* To recover from error */
+		rte_eal_alarm_set(US_PER_MS, bnxt_dev_reset_and_resume,
+				  (void *)bp);
+	}
+}
+
 /* Driver should poll FW heartbeat, reset_counter with the frequency
  * advertised by FW in HWRM_ERROR_RECOVERY_QCFG.
  * When the driver detects heartbeat stop or change in reset_counter,
@@ -3723,7 +3817,7 @@ static void bnxt_check_fw_health(void *arg)
 {
 	struct bnxt *bp = arg;
 	struct bnxt_error_recovery_info *info = bp->recovery_info;
-	uint32_t val = 0;
+	uint32_t val = 0, wait_msec;
 
 	if (!info || !bnxt_is_recovery_enabled(bp) ||
 	    is_bnxt_in_error(bp))
@@ -3751,6 +3845,14 @@ static void bnxt_check_fw_health(void *arg)
 	bp->flags |= BNXT_FLAG_FW_RESET;
 
 	PMD_DRV_LOG(ERR, "Detected FW dead condition\n");
+
+	if (bnxt_is_master_func(bp))
+		wait_msec = info->master_func_wait_period;
+	else
+		wait_msec = info->normal_func_wait_period;
+
+	rte_eal_alarm_set(US_PER_MS * wait_msec,
+			  bnxt_fw_reset_cb, (void *)bp);
 }
 
 void bnxt_schedule_fw_health_check(struct bnxt *bp)
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index c7d6f9fdc4..e96e6cef91 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -4803,3 +4803,29 @@ int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp)
 	}
 	return rc;
 }
+
+int bnxt_hwrm_fw_reset(struct bnxt *bp)
+{
+	struct hwrm_fw_reset_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_fw_reset_input req = {0};
+	int rc;
+
+	if (!BNXT_PF(bp))
+		return -EOPNOTSUPP;
+
+	HWRM_PREP(req, FW_RESET, BNXT_USE_KONG(bp));
+
+	req.embedded_proc_type =
+		HWRM_FW_RESET_INPUT_EMBEDDED_PROC_TYPE_CHIP;
+	req.selfrst_status =
+		HWRM_FW_RESET_INPUT_SELFRST_STATUS_SELFRSTASAP;
+	req.flags = HWRM_FW_RESET_INPUT_FLAGS_RESET_GRACEFUL;
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req),
+				    BNXT_USE_KONG(bp));
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
+
+	return rc;
+}
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index 44e3355075..db25ad5919 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -205,4 +205,5 @@ int bnxt_hwrm_tunnel_redirect_info(struct bnxt *bp, uint8_t tun_type,
 int bnxt_hwrm_set_mac(struct bnxt *bp);
 int bnxt_hwrm_if_change(struct bnxt *bp, bool state);
 int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp);
+int bnxt_hwrm_fw_reset(struct bnxt *bp);
 #endif
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v3 11/15] net/bnxt: add hot firmware upgrade support for Stingray
  2019-10-02  1:23         ` [dpdk-dev] [PATCH v3 00/15] " Ajit Khaparde
                             ` (9 preceding siblings ...)
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 10/15] net/bnxt: add support for FW reset Ajit Khaparde
@ 2019-10-02  1:23           ` Ajit Khaparde
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 12/15] net/bnxt: reduce verbosity of logs Ajit Khaparde
                             ` (4 subsequent siblings)
  15 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-10-02  1:23 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Rahul Gupta, Ajit Kumar Khaparde

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

FW sets HWRM_FUNC_QCAPS_OUTPUT_FLAGS_ERR_RECOVER_RELOAD
in HWRM_FUNC_QCAPS command, if device requires to invoke
fastboot FW during FW reset.

Driver has to poll for shutdown bit in fw_status register:
1. in case of hot fw upgrade, this bit will be set after all
   function drivers unregistered with fw.
2. in case of fw initiated error recovery, this bit will be
   set after fw has collected the core dump

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Signed-off-by: Rahul Gupta <rahul.gupta@broadcom.com>
Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt.h                |  3 +++
 drivers/net/bnxt/bnxt_cpr.c            | 35 ++++++++++++++++++++++++++
 drivers/net/bnxt/bnxt_cpr.h            |  1 +
 drivers/net/bnxt/bnxt_ethdev.c         |  2 ++
 drivers/net/bnxt/bnxt_hwrm.c           |  5 ++++
 drivers/net/bnxt/hsi_struct_def_dpdk.h |  8 ++++++
 6 files changed, 54 insertions(+)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 637ee9a0f7..f47874882b 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -390,6 +390,8 @@ struct bnxt_error_recovery_info {
 #define BNXT_GRCP_WINDOW_2_BASE		0x2000
 #define BNXT_GRCP_WINDOW_3_BASE		0x3000
 
+#define BNXT_FW_STATUS_SHUTDOWN		0x100000
+
 #define BNXT_HWRM_SHORT_REQ_LEN		sizeof(struct hwrm_short_input)
 struct bnxt {
 	void				*bar0;
@@ -421,6 +423,7 @@ struct bnxt {
 #define BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE	(1 << 18)
 #define BNXT_FLAG_FW_CAP_ERROR_RECOVERY		(1 << 19)
 #define BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED	(1 << 20)
+#define BNXT_FLAG_FW_CAP_ERR_RECOVER_RELOAD	(1 << 21)
 #define BNXT_FLAG_EXT_STATS_SUPPORTED	(1 << 29)
 #define BNXT_FLAG_NEW_RM	(1 << 30)
 #define BNXT_FLAG_INIT_DONE	(1U << 31)
diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index 38931799eb..00ca6bbd1f 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -5,6 +5,7 @@
 
 #include <rte_malloc.h>
 #include <rte_alarm.h>
+#include <rte_cycles.h>
 
 #include "bnxt.h"
 #include "bnxt_cpr.h"
@@ -12,6 +13,40 @@
 #include "bnxt_ring.h"
 #include "hsi_struct_def_dpdk.h"
 
+void bnxt_wait_for_device_shutdown(struct bnxt *bp)
+{
+	uint32_t val, timeout;
+
+	/* if HWRM_FUNC_QCAPS_OUTPUT_FLAGS_ERR_RECOVER_RELOAD is set
+	 * in HWRM_FUNC_QCAPS command, wait for FW_STATUS to set
+	 * the SHUTDOWN bit in health register
+	 */
+	if (!(bp->recovery_info &&
+	      (bp->flags & BNXT_FLAG_FW_CAP_ERR_RECOVER_RELOAD)))
+		return;
+
+	/* Driver has to wait for fw_reset_max_msecs or shutdown bit which comes
+	 * first for FW to collect crash dump.
+	 */
+	timeout = bp->fw_reset_max_msecs;
+
+	/* Driver has to poll for shutdown bit in fw_status register
+	 *
+	 * 1. in case of hot fw upgrade, this bit will be set after all
+	 *    function drivers unregistered with fw.
+	 * 2. in case of fw initiated error recovery, this bit will be
+	 *    set after fw has collected the core dump
+	 */
+	do {
+		val = bnxt_read_fw_status_reg(bp, BNXT_FW_STATUS_REG);
+		if (val & BNXT_FW_STATUS_SHUTDOWN)
+			return;
+
+		rte_delay_ms(100);
+		timeout -= 100;
+	} while (timeout);
+}
+
 /*
  * Async event handling
  */
diff --git a/drivers/net/bnxt/bnxt_cpr.h b/drivers/net/bnxt/bnxt_cpr.h
index f118bda36e..c2880783f6 100644
--- a/drivers/net/bnxt/bnxt_cpr.h
+++ b/drivers/net/bnxt/bnxt_cpr.h
@@ -107,6 +107,7 @@ void bnxt_handle_async_event(struct bnxt *bp, struct cmpl_base *cmp);
 void bnxt_handle_fwd_req(struct bnxt *bp, struct cmpl_base *cmp);
 int bnxt_event_hwrm_resp_handler(struct bnxt *bp, struct cmpl_base *cmp);
 void bnxt_dev_reset_and_resume(void *arg);
+void bnxt_wait_for_device_shutdown(struct bnxt *bp);
 
 #define EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL     \
 	HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 40973c37b9..fbd00d1c72 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -3721,6 +3721,8 @@ void bnxt_dev_reset_and_resume(void *arg)
 
 	bnxt_dev_cleanup(bp);
 
+	bnxt_wait_for_device_shutdown(bp);
+
 	rc = rte_eal_alarm_set(US_PER_MS * bp->fw_reset_min_msecs,
 			       bnxt_dev_recover, (void *)bp);
 	if (rc)
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index e96e6cef91..7304cbf72c 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -637,6 +637,11 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
 		bp->flags &= ~BNXT_FLAG_FW_CAP_ERROR_RECOVERY;
 	}
 
+	if (flags & HWRM_FUNC_QCAPS_OUTPUT_FLAGS_ERR_RECOVER_RELOAD)
+		bp->flags |= BNXT_FLAG_FW_CAP_ERR_RECOVER_RELOAD;
+	else
+		bp->flags &= ~BNXT_FLAG_FW_CAP_ERR_RECOVER_RELOAD;
+
 	HWRM_UNLOCK();
 
 	return rc;
diff --git a/drivers/net/bnxt/hsi_struct_def_dpdk.h b/drivers/net/bnxt/hsi_struct_def_dpdk.h
index 809ea48736..bd04fe4838 100644
--- a/drivers/net/bnxt/hsi_struct_def_dpdk.h
+++ b/drivers/net/bnxt/hsi_struct_def_dpdk.h
@@ -7327,6 +7327,14 @@ struct hwrm_func_qcaps_output {
 	 */
 	#define HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT_STATS_SUPPORTED \
 		UINT32_C(0x1000000)
+	/*
+	 * If the query is for a VF, then this flag shall be ignored.
+	 * If this query is for a PF and this flag is set to 1, then host
+	 * must initiate reset or reload (or fastboot) the firmware image
+	 * upon detection of device shutdown state.
+	 */
+	#define HWRM_FUNC_QCAPS_OUTPUT_FLAGS_ERR_RECOVER_RELOAD \
+		UINT32_C(0x2000000)
 	/*
 	 * This value is current MAC address configured for this
 	 * function. A value of 00-00-00-00-00-00 indicates no
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v3 12/15] net/bnxt: reduce verbosity of logs
  2019-10-02  1:23         ` [dpdk-dev] [PATCH v3 00/15] " Ajit Khaparde
                             ` (10 preceding siblings ...)
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 11/15] net/bnxt: add hot firmware upgrade support for Stingray Ajit Khaparde
@ 2019-10-02  1:23           ` Ajit Khaparde
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 13/15] net/bnxt: avoid null pointer dereference Ajit Khaparde
                             ` (3 subsequent siblings)
  15 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-10-02  1:23 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Lance Richardson, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

When IOMMU is available, EAL picks IOVA as VA as the default IOVA mode.
This causes the bnxt driver to log warning messages saying
"Memzone physical address same as virtual." and "Using rte_mem_virt2iova()"
during load.

Reduce the verbosity of logs to DEBUG. Reduced couple of other
logs level to DEBUG as well.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Lance Richardson <lance.richardson@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt_cpr.c    |  4 ++--
 drivers/net/bnxt/bnxt_ethdev.c | 21 +++++++++------------
 drivers/net/bnxt/bnxt_filter.c |  2 +-
 drivers/net/bnxt/bnxt_ring.c   |  7 +++----
 drivers/net/bnxt/bnxt_vnic.c   |  7 +++----
 5 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index 00ca6bbd1f..4817672ef0 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -137,7 +137,7 @@ void bnxt_handle_async_event(struct bnxt *bp,
 		bnxt_schedule_fw_health_check(bp);
 		break;
 	default:
-		PMD_DRV_LOG(INFO, "handle_async_event id = 0x%x\n", event_id);
+		PMD_DRV_LOG(DEBUG, "handle_async_event id = 0x%x\n", event_id);
 		break;
 	}
 }
@@ -254,7 +254,7 @@ int bnxt_event_hwrm_resp_handler(struct bnxt *bp, struct cmpl_base *cmp)
 		break;
 	default:
 		/* Ignore any other events */
-		PMD_DRV_LOG(INFO, "Ignoring %02x completion\n", CMP_TYPE(cmp));
+		PMD_DRV_LOG(DEBUG, "Ignoring %02x completion\n", CMP_TYPE(cmp));
 		break;
 	}
 
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index fbd00d1c72..7c3ef93253 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -3976,10 +3976,9 @@ static int bnxt_alloc_ctx_mem_blk(__rte_unused struct bnxt *bp,
 		memset(mz->addr, 0, mz->len);
 		mz_phys_addr = mz->iova;
 		if ((unsigned long)mz->addr == mz_phys_addr) {
-			PMD_DRV_LOG(WARNING,
-				"Memzone physical address same as virtual.\n");
-			PMD_DRV_LOG(WARNING,
-				    "Using rte_mem_virt2iova()\n");
+			PMD_DRV_LOG(DEBUG,
+				    "physical address same as virtual\n");
+			PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n");
 			mz_phys_addr = rte_mem_virt2iova(mz->addr);
 			if (mz_phys_addr == RTE_BAD_IOVA) {
 				PMD_DRV_LOG(ERR,
@@ -4012,10 +4011,9 @@ static int bnxt_alloc_ctx_mem_blk(__rte_unused struct bnxt *bp,
 	memset(mz->addr, 0, mz->len);
 	mz_phys_addr = mz->iova;
 	if ((unsigned long)mz->addr == mz_phys_addr) {
-		PMD_DRV_LOG(WARNING,
+		PMD_DRV_LOG(DEBUG,
 			    "Memzone physical address same as virtual.\n");
-		PMD_DRV_LOG(WARNING,
-			    "Using rte_mem_virt2iova()\n");
+		PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n");
 		for (sz = 0; sz < mem_size; sz += BNXT_PAGE_SIZE)
 			rte_mem_lock_page(((char *)mz->addr) + sz);
 		mz_phys_addr = rte_mem_virt2iova(mz->addr);
@@ -4203,9 +4201,9 @@ static int bnxt_alloc_stats_mem(struct bnxt *bp)
 	memset(mz->addr, 0, mz->len);
 	mz_phys_addr = mz->iova;
 	if ((unsigned long)mz->addr == mz_phys_addr) {
-		PMD_DRV_LOG(WARNING,
+		PMD_DRV_LOG(DEBUG,
 			    "Memzone physical address same as virtual.\n");
-		PMD_DRV_LOG(WARNING,
+		PMD_DRV_LOG(DEBUG,
 			    "Using rte_mem_virt2iova()\n");
 		mz_phys_addr = rte_mem_virt2iova(mz->addr);
 		if (mz_phys_addr == RTE_BAD_IOVA) {
@@ -4241,10 +4239,9 @@ static int bnxt_alloc_stats_mem(struct bnxt *bp)
 	memset(mz->addr, 0, mz->len);
 	mz_phys_addr = mz->iova;
 	if ((unsigned long)mz->addr == mz_phys_addr) {
-		PMD_DRV_LOG(WARNING,
+		PMD_DRV_LOG(DEBUG,
 			    "Memzone physical address same as virtual\n");
-		PMD_DRV_LOG(WARNING,
-			    "Using rte_mem_virt2iova()\n");
+		PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n");
 		mz_phys_addr = rte_mem_virt2iova(mz->addr);
 		if (mz_phys_addr == RTE_BAD_IOVA) {
 			PMD_DRV_LOG(ERR,
diff --git a/drivers/net/bnxt/bnxt_filter.c b/drivers/net/bnxt/bnxt_filter.c
index 0aed29fb08..34db988181 100644
--- a/drivers/net/bnxt/bnxt_filter.c
+++ b/drivers/net/bnxt/bnxt_filter.c
@@ -119,7 +119,7 @@ void bnxt_free_filter_mem(struct bnxt *bp)
 		filter = &bp->filter_info[i];
 		if (filter->fw_l2_filter_id != ((uint64_t)-1) &&
 		    filter->filter_type == HWRM_CFA_L2_FILTER) {
-			PMD_DRV_LOG(ERR, "L2 filter is not free\n");
+			PMD_DRV_LOG(DEBUG, "L2 filter is not free\n");
 			/* Call HWRM to try to free filter again */
 			rc = bnxt_hwrm_clear_l2_filter(bp, filter);
 			if (rc)
diff --git a/drivers/net/bnxt/bnxt_ring.c b/drivers/net/bnxt/bnxt_ring.c
index f19865c832..2f57e038a0 100644
--- a/drivers/net/bnxt/bnxt_ring.c
+++ b/drivers/net/bnxt/bnxt_ring.c
@@ -212,10 +212,9 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
 	mz_phys_addr_base = mz->iova;
 	mz_phys_addr = mz->iova;
 	if ((unsigned long)mz->addr == mz_phys_addr_base) {
-		PMD_DRV_LOG(WARNING,
-			"Memzone physical address same as virtual.\n");
-		PMD_DRV_LOG(WARNING,
-			"Using rte_mem_virt2iova()\n");
+		PMD_DRV_LOG(DEBUG,
+			    "Memzone physical address same as virtual.\n");
+		PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n");
 		for (sz = 0; sz < total_alloc_len; sz += getpagesize())
 			rte_mem_lock_page(((char *)mz->addr) + sz);
 		mz_phys_addr_base = rte_mem_virt2iova(mz->addr);
diff --git a/drivers/net/bnxt/bnxt_vnic.c b/drivers/net/bnxt/bnxt_vnic.c
index 98415633e4..9ea99388b7 100644
--- a/drivers/net/bnxt/bnxt_vnic.c
+++ b/drivers/net/bnxt/bnxt_vnic.c
@@ -150,10 +150,9 @@ int bnxt_alloc_vnic_attributes(struct bnxt *bp)
 	}
 	mz_phys_addr = mz->iova;
 	if ((unsigned long)mz->addr == mz_phys_addr) {
-		PMD_DRV_LOG(WARNING,
-			"Memzone physical address same as virtual.\n");
-		PMD_DRV_LOG(WARNING,
-			"Using rte_mem_virt2iova()\n");
+		PMD_DRV_LOG(DEBUG,
+			    "Memzone physical address same as virtual.\n");
+		PMD_DRV_LOG(DEBUG, "Using rte_mem_virt2iova()\n");
 		mz_phys_addr = rte_mem_virt2iova(mz->addr);
 		if (mz_phys_addr == RTE_BAD_IOVA) {
 			PMD_DRV_LOG(ERR,
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v3 13/15] net/bnxt: avoid null pointer dereference
  2019-10-02  1:23         ` [dpdk-dev] [PATCH v3 00/15] " Ajit Khaparde
                             ` (11 preceding siblings ...)
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 12/15] net/bnxt: reduce verbosity of logs Ajit Khaparde
@ 2019-10-02  1:23           ` Ajit Khaparde
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 14/15] net/bnxt: use BIT macro instead of bit fields Ajit Khaparde
                             ` (2 subsequent siblings)
  15 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-10-02  1:23 UTC (permalink / raw)
  To: dev
  Cc: ferruh.yigit, Kalesh AP, stable, Ajit Kumar Khaparde,
	Rahul Gupta, Lance Richardson

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

Commit "bd0a14c99f65" enables the creation of a dedicated completion
ring for asynchronous event handling instead of handling these
events on a receive completion ring on non Stingray Platforms.

This causes a segfault due to NULL pointer defreference in
bnxt_alloc_async_cp_ring() on stingray. Fix this by checking the
pointer validity before accessing it.

Fixes: bd0a14c99f65 ("net/bnxt: use dedicated CPR for async events")
Cc: stable@dpdk.org

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Signed-off-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Rahul Gupta <rahul.gupta@broadcom.com>
Reviewed-by: Lance Richardson <lance.richardson@broadcom.com>
---
 drivers/net/bnxt/bnxt_ring.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_ring.c b/drivers/net/bnxt/bnxt_ring.c
index 2f57e038a0..ec17783cf8 100644
--- a/drivers/net/bnxt/bnxt_ring.c
+++ b/drivers/net/bnxt/bnxt_ring.c
@@ -694,13 +694,15 @@ int bnxt_alloc_hwrm_rings(struct bnxt *bp)
 int bnxt_alloc_async_cp_ring(struct bnxt *bp)
 {
 	struct bnxt_cp_ring_info *cpr = bp->async_cp_ring;
-	struct bnxt_ring *cp_ring = cpr->cp_ring_struct;
+	struct bnxt_ring *cp_ring;
 	uint8_t ring_type;
 	int rc;
 
-	if (BNXT_NUM_ASYNC_CPR(bp) == 0)
+	if (BNXT_NUM_ASYNC_CPR(bp) == 0 || cpr == NULL)
 		return 0;
 
+	cp_ring = cpr->cp_ring_struct;
+
 	if (BNXT_HAS_NQ(bp))
 		ring_type = HWRM_RING_ALLOC_INPUT_RING_TYPE_NQ;
 	else
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v3 14/15] net/bnxt: use BIT macro instead of bit fields
  2019-10-02  1:23         ` [dpdk-dev] [PATCH v3 00/15] " Ajit Khaparde
                             ` (12 preceding siblings ...)
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 13/15] net/bnxt: avoid null pointer dereference Ajit Khaparde
@ 2019-10-02  1:23           ` Ajit Khaparde
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 15/15] net/bnxt: add PTP support for Thor Ajit Khaparde
  2019-10-02 17:02           ` [dpdk-dev] [PATCH v3 00/15] bnxt patchset to support device error recovery Ferruh Yigit
  15 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-10-02  1:23 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

use BIT macro instead of bit fields.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt.h      | 79 ++++++++++++++++++------------------
 drivers/net/bnxt/bnxt_util.h |  4 ++
 2 files changed, 44 insertions(+), 39 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index f47874882b..310b730e68 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -19,6 +19,7 @@
 #include <rte_time.h>
 
 #include "bnxt_cpr.h"
+#include "bnxt_util.h"
 
 #define BNXT_MAX_MTU		9574
 #define VLAN_TAG_SIZE		4
@@ -198,16 +199,16 @@ struct bnxt_ptp_cfg {
 	struct bnxt		*bp;
 #define BNXT_MAX_TX_TS	1
 	uint16_t			rxctl;
-#define BNXT_PTP_MSG_SYNC			(1 << 0)
-#define BNXT_PTP_MSG_DELAY_REQ			(1 << 1)
-#define BNXT_PTP_MSG_PDELAY_REQ			(1 << 2)
-#define BNXT_PTP_MSG_PDELAY_RESP		(1 << 3)
-#define BNXT_PTP_MSG_FOLLOW_UP			(1 << 8)
-#define BNXT_PTP_MSG_DELAY_RESP			(1 << 9)
-#define BNXT_PTP_MSG_PDELAY_RESP_FOLLOW_UP	(1 << 10)
-#define BNXT_PTP_MSG_ANNOUNCE			(1 << 11)
-#define BNXT_PTP_MSG_SIGNALING			(1 << 12)
-#define BNXT_PTP_MSG_MANAGEMENT			(1 << 13)
+#define BNXT_PTP_MSG_SYNC			BIT(0)
+#define BNXT_PTP_MSG_DELAY_REQ			BIT(1)
+#define BNXT_PTP_MSG_PDELAY_REQ			BIT(2)
+#define BNXT_PTP_MSG_PDELAY_RESP		BIT(3)
+#define BNXT_PTP_MSG_FOLLOW_UP			BIT(8)
+#define BNXT_PTP_MSG_DELAY_RESP			BIT(9)
+#define BNXT_PTP_MSG_PDELAY_RESP_FOLLOW_UP	BIT(10)
+#define BNXT_PTP_MSG_ANNOUNCE			BIT(11)
+#define BNXT_PTP_MSG_SIGNALING			BIT(12)
+#define BNXT_PTP_MSG_MANAGEMENT			BIT(13)
 #define BNXT_PTP_MSG_EVENTS		(BNXT_PTP_MSG_SYNC |		\
 					 BNXT_PTP_MSG_DELAY_REQ |	\
 					 BNXT_PTP_MSG_PDELAY_REQ |	\
@@ -363,10 +364,10 @@ struct bnxt_error_recovery_info {
 	uint32_t	reset_reg[BNXT_NUM_RESET_REG];
 	uint32_t	reset_reg_val[BNXT_NUM_RESET_REG];
 	uint8_t		delay_after_reset[BNXT_NUM_RESET_REG];
-#define BNXT_FLAG_ERROR_RECOVERY_HOST	(1 << 0)
-#define BNXT_FLAG_ERROR_RECOVERY_CO_CPU	(1 << 1)
-#define BNXT_FLAG_MASTER_FUNC		(1 << 2)
-#define BNXT_FLAG_RECOVERY_ENABLED	(1 << 3)
+#define BNXT_FLAG_ERROR_RECOVERY_HOST	BIT(0)
+#define BNXT_FLAG_ERROR_RECOVERY_CO_CPU	BIT(1)
+#define BNXT_FLAG_MASTER_FUNC		BIT(2)
+#define BNXT_FLAG_RECOVERY_ENABLED	BIT(3)
 	uint32_t	flags;
 
 	uint32_t        last_heart_beat;
@@ -402,31 +403,31 @@ struct bnxt {
 	void				*doorbell_base;
 
 	uint32_t		flags;
-#define BNXT_FLAG_REGISTERED	(1 << 0)
-#define BNXT_FLAG_VF		(1 << 1)
-#define BNXT_FLAG_PORT_STATS	(1 << 2)
-#define BNXT_FLAG_JUMBO		(1 << 3)
-#define BNXT_FLAG_SHORT_CMD	(1 << 4)
-#define BNXT_FLAG_UPDATE_HASH	(1 << 5)
-#define BNXT_FLAG_PTP_SUPPORTED	(1 << 6)
-#define BNXT_FLAG_MULTI_HOST    (1 << 7)
-#define BNXT_FLAG_EXT_RX_PORT_STATS	(1 << 8)
-#define BNXT_FLAG_EXT_TX_PORT_STATS	(1 << 9)
-#define BNXT_FLAG_KONG_MB_EN	(1 << 10)
-#define BNXT_FLAG_TRUSTED_VF_EN	(1 << 11)
-#define BNXT_FLAG_DFLT_VNIC_SET	(1 << 12)
-#define BNXT_FLAG_THOR_CHIP	(1 << 13)
-#define BNXT_FLAG_STINGRAY	(1 << 14)
-#define BNXT_FLAG_FW_RESET	(1 << 15)
-#define BNXT_FLAG_FATAL_ERROR	(1 << 16)
-#define BNXT_FLAG_FW_CAP_IF_CHANGE	(1 << 17)
-#define BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE	(1 << 18)
-#define BNXT_FLAG_FW_CAP_ERROR_RECOVERY		(1 << 19)
-#define BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED	(1 << 20)
-#define BNXT_FLAG_FW_CAP_ERR_RECOVER_RELOAD	(1 << 21)
-#define BNXT_FLAG_EXT_STATS_SUPPORTED	(1 << 29)
-#define BNXT_FLAG_NEW_RM	(1 << 30)
-#define BNXT_FLAG_INIT_DONE	(1U << 31)
+#define BNXT_FLAG_REGISTERED		BIT(0)
+#define BNXT_FLAG_VF			BIT(1)
+#define BNXT_FLAG_PORT_STATS		BIT(2)
+#define BNXT_FLAG_JUMBO			BIT(3)
+#define BNXT_FLAG_SHORT_CMD		BIT(4)
+#define BNXT_FLAG_UPDATE_HASH		BIT(5)
+#define BNXT_FLAG_PTP_SUPPORTED		BIT(6)
+#define BNXT_FLAG_MULTI_HOST    	BIT(7)
+#define BNXT_FLAG_EXT_RX_PORT_STATS	BIT(8)
+#define BNXT_FLAG_EXT_TX_PORT_STATS	BIT(9)
+#define BNXT_FLAG_KONG_MB_EN		BIT(10)
+#define BNXT_FLAG_TRUSTED_VF_EN		BIT(11)
+#define BNXT_FLAG_DFLT_VNIC_SET		BIT(12)
+#define BNXT_FLAG_THOR_CHIP		BIT(13)
+#define BNXT_FLAG_STINGRAY		BIT(14)
+#define BNXT_FLAG_FW_RESET		BIT(15)
+#define BNXT_FLAG_FATAL_ERROR		BIT(16)
+#define BNXT_FLAG_FW_CAP_IF_CHANGE		BIT(17)
+#define BNXT_FLAG_IF_CHANGE_HOT_FW_RESET_DONE	BIT(18)
+#define BNXT_FLAG_FW_CAP_ERROR_RECOVERY		BIT(19)
+#define BNXT_FLAG_FW_HEALTH_CHECK_SCHEDULED	BIT(20)
+#define BNXT_FLAG_FW_CAP_ERR_RECOVER_RELOAD	BIT(21)
+#define BNXT_FLAG_EXT_STATS_SUPPORTED		BIT(22)
+#define BNXT_FLAG_NEW_RM			BIT(23)
+#define BNXT_FLAG_INIT_DONE			BIT(24)
 #define BNXT_PF(bp)		(!((bp)->flags & BNXT_FLAG_VF))
 #define BNXT_VF(bp)		((bp)->flags & BNXT_FLAG_VF)
 #define BNXT_NPAR(bp)		((bp)->port_partition_type)
diff --git a/drivers/net/bnxt/bnxt_util.h b/drivers/net/bnxt/bnxt_util.h
index 9f1868a78f..a15b3a1a95 100644
--- a/drivers/net/bnxt/bnxt_util.h
+++ b/drivers/net/bnxt/bnxt_util.h
@@ -6,6 +6,10 @@
 #ifndef _BNXT_UTIL_H_
 #define _BNXT_UTIL_H_
 
+#ifndef BIT
+#define BIT(n)	(1UL << (n))
+#endif /* BIT */
+
 int bnxt_check_zero_bytes(const uint8_t *bytes, int len);
 void bnxt_eth_hw_addr_random(uint8_t *mac_addr);
 
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* [dpdk-dev] [PATCH v3 15/15] net/bnxt: add PTP support for Thor
  2019-10-02  1:23         ` [dpdk-dev] [PATCH v3 00/15] " Ajit Khaparde
                             ` (13 preceding siblings ...)
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 14/15] net/bnxt: use BIT macro instead of bit fields Ajit Khaparde
@ 2019-10-02  1:23           ` Ajit Khaparde
  2019-10-02 17:02           ` [dpdk-dev] [PATCH v3 00/15] bnxt patchset to support device error recovery Ferruh Yigit
  15 siblings, 0 replies; 48+ messages in thread
From: Ajit Khaparde @ 2019-10-02  1:23 UTC (permalink / raw)
  To: dev; +Cc: ferruh.yigit, Kalesh AP, Somnath Kotur, Ajit Kumar Khaparde

From: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>

On Thor, direct access to PTP registers (via GRC) is not supported.
Driver must use HWRM to access the timestamp information.

Vectorized Rx/Tx cannot be enabled if RTE_LIBRTE_IEEE1588=y.
Remove the PTP flags handling code from the vector Rx path.

Add support to read tx timestamp value and the time from the
timesync clock.

On Thor, Rx timestamps are provided directly in the Rx completion
records to the driver. Only 32 bits of the timestamp is present in
the completion. Driver needs to read the current 48 bit free running
timer using the HWRM_PORT_TS_QUERY command and combine the upper
16 bits from the HWRM response with the lower 32 bits in the
Rx completion to produce the 48 bit timestamp for the Rx packet.

Signed-off-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt.h                |  8 +++
 drivers/net/bnxt/bnxt_ethdev.c         | 46 +++++++++++---
 drivers/net/bnxt/bnxt_hwrm.c           | 86 ++++++++++++++++++++------
 drivers/net/bnxt/bnxt_hwrm.h           |  2 +
 drivers/net/bnxt/bnxt_rxr.c            | 42 +++++++++++--
 drivers/net/bnxt/bnxt_txr.c            |  7 ++-
 drivers/net/bnxt/hsi_struct_def_dpdk.h | 86 ++++++++++++++++++++++++++
 7 files changed, 243 insertions(+), 34 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 310b730e68..818a49f461 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -189,6 +189,10 @@ struct rte_flow {
 	struct bnxt_vnic_info	*vnic;
 };
 
+#define BNXT_PTP_FLAGS_PATH_TX		0x0
+#define BNXT_PTP_FLAGS_PATH_RX		0x1
+#define BNXT_PTP_FLAGS_CURRENT_TIME	0x2
+
 struct bnxt_ptp_cfg {
 #define BNXT_GRCPF_REG_WINDOW_BASE_OUT  0x400
 #define BNXT_GRCPF_REG_SYNC_TIME        0x480
@@ -234,6 +238,9 @@ struct bnxt_ptp_cfg {
 	uint32_t			rx_mapped_regs[BNXT_PTP_RX_REGS];
 	uint32_t			tx_regs[BNXT_PTP_TX_REGS];
 	uint32_t			tx_mapped_regs[BNXT_PTP_TX_REGS];
+
+	/* On Thor, the Rx timestamp is present in the Rx completion record */
+	uint64_t			rx_timestamp;
 };
 
 struct bnxt_coal {
@@ -428,6 +435,7 @@ struct bnxt {
 #define BNXT_FLAG_EXT_STATS_SUPPORTED		BIT(22)
 #define BNXT_FLAG_NEW_RM			BIT(23)
 #define BNXT_FLAG_INIT_DONE			BIT(24)
+#define BNXT_FLAG_FW_CAP_ONE_STEP_TX_TS		BIT(25)
 #define BNXT_PF(bp)		(!((bp)->flags & BNXT_FLAG_VF))
 #define BNXT_VF(bp)		((bp)->flags & BNXT_FLAG_VF)
 #define BNXT_NPAR(bp)		((bp)->port_partition_type)
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 7c3ef93253..0083ba6e83 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -740,6 +740,7 @@ static eth_rx_burst_t
 bnxt_receive_function(__rte_unused struct rte_eth_dev *eth_dev)
 {
 #ifdef RTE_ARCH_X86
+#ifndef RTE_LIBRTE_IEEE1588
 	/*
 	 * Vector mode receive can be enabled only if scatter rx is not
 	 * in use and rx offloads are limited to VLAN stripping and
@@ -766,6 +767,7 @@ bnxt_receive_function(__rte_unused struct rte_eth_dev *eth_dev)
 		    eth_dev->data->port_id,
 		    eth_dev->data->scattered_rx,
 		    eth_dev->data->dev_conf.rxmode.offloads);
+#endif
 #endif
 	return bnxt_recv_pkts;
 }
@@ -774,6 +776,7 @@ static eth_tx_burst_t
 bnxt_transmit_function(__rte_unused struct rte_eth_dev *eth_dev)
 {
 #ifdef RTE_ARCH_X86
+#ifndef RTE_LIBRTE_IEEE1588
 	/*
 	 * Vector mode transmit can be enabled only if not using scatter rx
 	 * or tx offloads.
@@ -791,6 +794,7 @@ bnxt_transmit_function(__rte_unused struct rte_eth_dev *eth_dev)
 		    eth_dev->data->port_id,
 		    eth_dev->data->scattered_rx,
 		    eth_dev->data->dev_conf.txmode.offloads);
+#endif
 #endif
 	return bnxt_xmit_pkts;
 }
@@ -3223,18 +3227,24 @@ bnxt_timesync_write_time(struct rte_eth_dev *dev, const struct timespec *ts)
 static int
 bnxt_timesync_read_time(struct rte_eth_dev *dev, struct timespec *ts)
 {
-	uint64_t ns, systime_cycles;
 	struct bnxt *bp = dev->data->dev_private;
 	struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+	uint64_t ns, systime_cycles = 0;
+	int rc = 0;
 
 	if (!ptp)
 		return 0;
 
-	systime_cycles = bnxt_cc_read(bp);
+	if (BNXT_CHIP_THOR(bp))
+		rc = bnxt_hwrm_port_ts_query(bp, BNXT_PTP_FLAGS_CURRENT_TIME,
+					     &systime_cycles);
+	else
+		systime_cycles = bnxt_cc_read(bp);
+
 	ns = rte_timecounter_update(&ptp->tc, systime_cycles);
 	*ts = rte_ns_to_timespec(ns);
 
-	return 0;
+	return rc;
 }
 static int
 bnxt_timesync_enable(struct rte_eth_dev *dev)
@@ -3242,6 +3252,7 @@ bnxt_timesync_enable(struct rte_eth_dev *dev)
 	struct bnxt *bp = dev->data->dev_private;
 	struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
 	uint32_t shift = 0;
+	int rc;
 
 	if (!ptp)
 		return 0;
@@ -3250,8 +3261,9 @@ bnxt_timesync_enable(struct rte_eth_dev *dev)
 	ptp->tx_tstamp_en = 1;
 	ptp->rxctl = BNXT_PTP_MSG_EVENTS;
 
-	if (!bnxt_hwrm_ptp_cfg(bp))
-		bnxt_map_ptp_regs(bp);
+	rc = bnxt_hwrm_ptp_cfg(bp);
+	if (rc)
+		return rc;
 
 	memset(&ptp->tc, 0, sizeof(struct rte_timecounter));
 	memset(&ptp->rx_tstamp_tc, 0, sizeof(struct rte_timecounter));
@@ -3269,6 +3281,9 @@ bnxt_timesync_enable(struct rte_eth_dev *dev)
 	ptp->tx_tstamp_tc.cc_shift = shift;
 	ptp->tx_tstamp_tc.nsec_mask = (1ULL << shift) - 1;
 
+	if (!BNXT_CHIP_THOR(bp))
+		bnxt_map_ptp_regs(bp);
+
 	return 0;
 }
 
@@ -3287,7 +3302,8 @@ bnxt_timesync_disable(struct rte_eth_dev *dev)
 
 	bnxt_hwrm_ptp_cfg(bp);
 
-	bnxt_unmap_ptp_regs(bp);
+	if (!BNXT_CHIP_THOR(bp))
+		bnxt_unmap_ptp_regs(bp);
 
 	return 0;
 }
@@ -3305,7 +3321,11 @@ bnxt_timesync_read_rx_timestamp(struct rte_eth_dev *dev,
 	if (!ptp)
 		return 0;
 
-	bnxt_get_rx_ts(bp, &rx_tstamp_cycles);
+	if (BNXT_CHIP_THOR(bp))
+		rx_tstamp_cycles = ptp->rx_timestamp;
+	else
+		bnxt_get_rx_ts(bp, &rx_tstamp_cycles);
+
 	ns = rte_timecounter_update(&ptp->rx_tstamp_tc, rx_tstamp_cycles);
 	*timestamp = rte_ns_to_timespec(ns);
 	return  0;
@@ -3319,15 +3339,21 @@ bnxt_timesync_read_tx_timestamp(struct rte_eth_dev *dev,
 	struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
 	uint64_t tx_tstamp_cycles = 0;
 	uint64_t ns;
+	int rc = 0;
 
 	if (!ptp)
 		return 0;
 
-	bnxt_get_tx_ts(bp, &tx_tstamp_cycles);
+	if (BNXT_CHIP_THOR(bp))
+		rc = bnxt_hwrm_port_ts_query(bp, BNXT_PTP_FLAGS_PATH_TX,
+					     &tx_tstamp_cycles);
+	else
+		rc = bnxt_get_tx_ts(bp, &tx_tstamp_cycles);
+
 	ns = rte_timecounter_update(&ptp->tx_tstamp_tc, tx_tstamp_cycles);
 	*timestamp = rte_ns_to_timespec(ns);
 
-	return 0;
+	return rc;
 }
 
 static int
@@ -4574,6 +4600,8 @@ bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev)
 		}
 	}
 
+	rte_free(bp->ptp_cfg);
+	bp->ptp_cfg = NULL;
 	return rc;
 }
 
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 7304cbf72c..174dc75d54 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -506,31 +506,37 @@ static int bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
 
 	HWRM_CHECK_RESULT();
 
-	if (!(resp->flags & HWRM_PORT_MAC_PTP_QCFG_OUTPUT_FLAGS_DIRECT_ACCESS))
+	if (!BNXT_CHIP_THOR(bp) &&
+	    !(resp->flags & HWRM_PORT_MAC_PTP_QCFG_OUTPUT_FLAGS_DIRECT_ACCESS))
 		return 0;
 
+	if (resp->flags & HWRM_PORT_MAC_PTP_QCFG_OUTPUT_FLAGS_ONE_STEP_TX_TS)
+		bp->flags |= BNXT_FLAG_FW_CAP_ONE_STEP_TX_TS;
+
 	ptp = rte_zmalloc("ptp_cfg", sizeof(*ptp), 0);
 	if (!ptp)
 		return -ENOMEM;
 
-	ptp->rx_regs[BNXT_PTP_RX_TS_L] =
-		rte_le_to_cpu_32(resp->rx_ts_reg_off_lower);
-	ptp->rx_regs[BNXT_PTP_RX_TS_H] =
-		rte_le_to_cpu_32(resp->rx_ts_reg_off_upper);
-	ptp->rx_regs[BNXT_PTP_RX_SEQ] =
-		rte_le_to_cpu_32(resp->rx_ts_reg_off_seq_id);
-	ptp->rx_regs[BNXT_PTP_RX_FIFO] =
-		rte_le_to_cpu_32(resp->rx_ts_reg_off_fifo);
-	ptp->rx_regs[BNXT_PTP_RX_FIFO_ADV] =
-		rte_le_to_cpu_32(resp->rx_ts_reg_off_fifo_adv);
-	ptp->tx_regs[BNXT_PTP_TX_TS_L] =
-		rte_le_to_cpu_32(resp->tx_ts_reg_off_lower);
-	ptp->tx_regs[BNXT_PTP_TX_TS_H] =
-		rte_le_to_cpu_32(resp->tx_ts_reg_off_upper);
-	ptp->tx_regs[BNXT_PTP_TX_SEQ] =
-		rte_le_to_cpu_32(resp->tx_ts_reg_off_seq_id);
-	ptp->tx_regs[BNXT_PTP_TX_FIFO] =
-		rte_le_to_cpu_32(resp->tx_ts_reg_off_fifo);
+	if (!BNXT_CHIP_THOR(bp)) {
+		ptp->rx_regs[BNXT_PTP_RX_TS_L] =
+			rte_le_to_cpu_32(resp->rx_ts_reg_off_lower);
+		ptp->rx_regs[BNXT_PTP_RX_TS_H] =
+			rte_le_to_cpu_32(resp->rx_ts_reg_off_upper);
+		ptp->rx_regs[BNXT_PTP_RX_SEQ] =
+			rte_le_to_cpu_32(resp->rx_ts_reg_off_seq_id);
+		ptp->rx_regs[BNXT_PTP_RX_FIFO] =
+			rte_le_to_cpu_32(resp->rx_ts_reg_off_fifo);
+		ptp->rx_regs[BNXT_PTP_RX_FIFO_ADV] =
+			rte_le_to_cpu_32(resp->rx_ts_reg_off_fifo_adv);
+		ptp->tx_regs[BNXT_PTP_TX_TS_L] =
+			rte_le_to_cpu_32(resp->tx_ts_reg_off_lower);
+		ptp->tx_regs[BNXT_PTP_TX_TS_H] =
+			rte_le_to_cpu_32(resp->tx_ts_reg_off_upper);
+		ptp->tx_regs[BNXT_PTP_TX_SEQ] =
+			rte_le_to_cpu_32(resp->tx_ts_reg_off_seq_id);
+		ptp->tx_regs[BNXT_PTP_TX_FIFO] =
+			rte_le_to_cpu_32(resp->tx_ts_reg_off_fifo);
+	}
 
 	ptp->bp = bp;
 	bp->ptp_cfg = ptp;
@@ -4834,3 +4840,45 @@ int bnxt_hwrm_fw_reset(struct bnxt *bp)
 
 	return rc;
 }
+
+int bnxt_hwrm_port_ts_query(struct bnxt *bp, uint8_t path, uint64_t *timestamp)
+{
+	struct hwrm_port_ts_query_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_port_ts_query_input req = {0};
+	struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+	uint32_t flags = 0;
+	int rc;
+
+	if (!ptp)
+		return 0;
+
+	HWRM_PREP(req, PORT_TS_QUERY, BNXT_USE_CHIMP_MB);
+
+	switch (path) {
+	case BNXT_PTP_FLAGS_PATH_TX:
+		flags |= HWRM_PORT_TS_QUERY_INPUT_FLAGS_PATH_TX;
+		break;
+	case BNXT_PTP_FLAGS_PATH_RX:
+		flags |= HWRM_PORT_TS_QUERY_INPUT_FLAGS_PATH_RX;
+		break;
+	case BNXT_PTP_FLAGS_CURRENT_TIME:
+		flags |= HWRM_PORT_TS_QUERY_INPUT_FLAGS_CURRENT_TIME;
+		break;
+	}
+
+	req.flags = rte_cpu_to_le_32(flags);
+	req.port_id = rte_cpu_to_le_16(bp->pf.port_id);
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
+
+	HWRM_CHECK_RESULT();
+
+	if (timestamp) {
+		*timestamp = rte_le_to_cpu_32(resp->ptp_msg_ts[0]);
+		*timestamp |=
+			(uint64_t)(rte_le_to_cpu_32(resp->ptp_msg_ts[1])) << 32;
+	}
+	HWRM_UNLOCK();
+
+	return rc;
+}
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index db25ad5919..0d386952b6 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -206,4 +206,6 @@ int bnxt_hwrm_set_mac(struct bnxt *bp);
 int bnxt_hwrm_if_change(struct bnxt *bp, bool state);
 int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp);
 int bnxt_hwrm_fw_reset(struct bnxt *bp);
+int bnxt_hwrm_port_ts_query(struct bnxt *bp, uint8_t path,
+			    uint64_t *timestamp);
 #endif
diff --git a/drivers/net/bnxt/bnxt_rxr.c b/drivers/net/bnxt/bnxt_rxr.c
index 12313dd53c..28487fb38e 100644
--- a/drivers/net/bnxt/bnxt_rxr.c
+++ b/drivers/net/bnxt/bnxt_rxr.c
@@ -17,6 +17,9 @@
 #include "bnxt_rxr.h"
 #include "bnxt_rxq.h"
 #include "hsi_struct_def_dpdk.h"
+#ifdef RTE_LIBRTE_IEEE1588
+#include "bnxt_hwrm.h"
+#endif
 
 /*
  * RX Ring handling
@@ -348,6 +351,30 @@ bnxt_parse_pkt_type(struct rx_pkt_cmpl *rxcmp, struct rx_pkt_cmpl_hi *rxcmp1)
 	return pkt_type;
 }
 
+#ifdef RTE_LIBRTE_IEEE1588
+static void
+bnxt_get_rx_ts_thor(struct bnxt *bp, uint32_t rx_ts_cmpl)
+{
+	uint64_t systime_cycles = 0;
+
+	if (!BNXT_CHIP_THOR(bp))
+		return;
+
+	/* On Thor, Rx timestamps are provided directly in the
+	 * Rx completion records to the driver. Only 32 bits of
+	 * the timestamp is present in the completion. Driver needs
+	 * to read the current 48 bit free running timer using the
+	 * HWRM_PORT_TS_QUERY command and combine the upper 16 bits
+	 * from the HWRM response with the lower 32 bits in the
+	 * Rx completion to produce the 48 bit timestamp for the Rx packet
+	 */
+	bnxt_hwrm_port_ts_query(bp, BNXT_PTP_FLAGS_CURRENT_TIME,
+				&systime_cycles);
+	bp->ptp_cfg->rx_timestamp = (systime_cycles & 0xFFFF00000000);
+	bp->ptp_cfg->rx_timestamp |= rx_ts_cmpl;
+}
+#endif
+
 static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
 			    struct bnxt_rx_queue *rxq, uint32_t *raw_cons)
 {
@@ -363,6 +390,7 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
 	uint8_t agg_buf = 0;
 	uint16_t cmp_type;
 	uint32_t flags2_f = 0;
+	uint16_t flags_type;
 
 	rxcmp = (struct rx_pkt_cmpl *)
 	    &cpr->cp_desc_ring[cp_cons];
@@ -418,18 +446,22 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
 	mbuf->data_len = mbuf->pkt_len;
 	mbuf->port = rxq->port_id;
 	mbuf->ol_flags = 0;
-	if (rxcmp->flags_type & RX_PKT_CMPL_FLAGS_RSS_VALID) {
+
+	flags_type = rte_le_to_cpu_16(rxcmp->flags_type);
+	if (flags_type & RX_PKT_CMPL_FLAGS_RSS_VALID) {
 		mbuf->hash.rss = rxcmp->rss_hash;
 		mbuf->ol_flags |= PKT_RX_RSS_HASH;
 	} else {
 		mbuf->hash.fdir.id = rxcmp1->cfa_code;
 		mbuf->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
 	}
-
-	if ((rxcmp->flags_type & rte_cpu_to_le_16(RX_PKT_CMPL_FLAGS_MASK)) ==
-	     RX_PKT_CMPL_FLAGS_ITYPE_PTP_W_TIMESTAMP)
+#ifdef RTE_LIBRTE_IEEE1588
+	if (unlikely((flags_type & RX_PKT_CMPL_FLAGS_MASK) ==
+		     RX_PKT_CMPL_FLAGS_ITYPE_PTP_W_TIMESTAMP)) {
 		mbuf->ol_flags |= PKT_RX_IEEE1588_PTP | PKT_RX_IEEE1588_TMST;
-
+		bnxt_get_rx_ts_thor(rxq->bp, rxcmp1->reorder);
+	}
+#endif
 	if (agg_buf)
 		bnxt_rx_pages(rxq, mbuf, &tmp_raw_cons, agg_buf);
 
diff --git a/drivers/net/bnxt/bnxt_txr.c b/drivers/net/bnxt/bnxt_txr.c
index 35e7166bed..172b480b2e 100644
--- a/drivers/net/bnxt/bnxt_txr.c
+++ b/drivers/net/bnxt/bnxt_txr.c
@@ -155,7 +155,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
 				PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM |
 				PKT_TX_VLAN_PKT | PKT_TX_OUTER_IP_CKSUM |
 				PKT_TX_TUNNEL_GRE | PKT_TX_TUNNEL_VXLAN |
-				PKT_TX_TUNNEL_GENEVE))
+				PKT_TX_TUNNEL_GENEVE | PKT_TX_IEEE1588_TMST))
 		long_bd = true;
 
 	nr_bds = long_bd + tx_pkt->nb_segs;
@@ -324,6 +324,11 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
 			/* IP CSO */
 			txbd1->lflags |= TX_BD_LONG_LFLAGS_T_IP_CHKSUM;
 			txbd1->mss = 0;
+		} else if ((tx_pkt->ol_flags & PKT_TX_IEEE1588_TMST) ==
+			   PKT_TX_IEEE1588_TMST) {
+			/* PTP */
+			txbd1->lflags |= TX_BD_LONG_LFLAGS_STAMP;
+			txbd1->mss = 0;
 		}
 	} else {
 		txbd->flags_type |= TX_BD_SHORT_TYPE_TX_BD_SHORT;
diff --git a/drivers/net/bnxt/hsi_struct_def_dpdk.h b/drivers/net/bnxt/hsi_struct_def_dpdk.h
index bd04fe4838..26d12cf20a 100644
--- a/drivers/net/bnxt/hsi_struct_def_dpdk.h
+++ b/drivers/net/bnxt/hsi_struct_def_dpdk.h
@@ -33777,4 +33777,90 @@ struct hwrm_fw_reset_output {
 	uint8_t valid;
 } __attribute__((packed));
 
+/**********************
+ * hwrm_port_ts_query *
+ ***********************/
+
+
+/* hwrm_port_ts_query_input (size:192b/24B) */
+struct hwrm_port_ts_query_input {
+	/* The HWRM command request type. */
+	uint16_t	req_type;
+	/*
+	 * The completion ring to send the completion event on. This should
+	 * be the NQ ID returned from the `nq_alloc` HWRM command.
+	 */
+	uint16_t	cmpl_ring;
+	/*
+	 * The sequence ID is used by the driver for tracking multiple
+	 * commands. This ID is treated as opaque data by the firmware and
+	 * the value is returned in the `hwrm_resp_hdr` upon completion.
+	 */
+	uint16_t	seq_id;
+	/*
+	 * The target ID of the command:
+	 * * 0x0-0xFFF8 - The function ID
+	 * * 0xFFF8-0xFFFC, 0xFFFE - Reserved for internal processors
+	 * * 0xFFFD - Reserved for user-space HWRM interface
+	 * * 0xFFFF - HWRM
+	 */
+	uint16_t	target_id;
+	/*
+	 * A physical address pointer pointing to a host buffer that the
+	 * command's response data will be written. This can be either a host
+	 * physical address (HPA) or a guest physical address (GPA) and must
+	 * point to a physically contiguous block of memory.
+	 */
+	uint64_t	resp_addr;
+	uint32_t	flags;
+	/*
+	 * Enumeration denoting the RX, TX type of the resource.
+	 * This enumeration is used for resources that are similar for both
+	 * TX and RX paths of the chip.
+	 */
+	#define HWRM_PORT_TS_QUERY_INPUT_FLAGS_PATH		0x1UL
+	/* tx path */
+	#define HWRM_PORT_TS_QUERY_INPUT_FLAGS_PATH_TX		0x0UL
+	/* rx path */
+	#define HWRM_PORT_TS_QUERY_INPUT_FLAGS_PATH_RX		0x1UL
+	#define HWRM_PORT_TS_QUERY_INPUT_FLAGS_PATH_LAST	\
+		HWRM_PORT_TS_QUERY_INPUT_FLAGS_PATH_RX
+	/*
+	 * If set, the response includes the current value of the free
+	 * running timer.
+	 */
+	#define HWRM_PORT_TS_QUERY_INPUT_FLAGS_CURRENT_TIME	0x2UL
+	/* Port ID of port that is being queried. */
+	uint16_t	port_id;
+	uint8_t		unused_0[2];
+} __attribute__((packed));
+
+/* hwrm_port_ts_query_output (size:192b/24B) */
+struct hwrm_port_ts_query_output {
+	/* The specific error status for the command. */
+	uint16_t	error_code;
+	/* The HWRM command request type. */
+	uint16_t	req_type;
+	/* The sequence ID from the original command. */
+	uint16_t	seq_id;
+	/* The length of the response data in number of bytes. */
+	uint16_t	resp_len;
+	/*
+	 * Timestamp value of PTP message captured, or current value of
+	 * free running timer.
+	 */
+	uint32_t	ptp_msg_ts[2];
+	/* Sequence ID of the PTP message captured. */
+	uint16_t	ptp_msg_seqid;
+	uint8_t		unused_0[5];
+	/*
+	 * This field is used in Output records to indicate that the output
+	 * is completely written to RAM.  This field should be read as '1'
+	 * to indicate that the output has been completely written.
+	 * When writing a command completion or response to an internal processor,
+	 * the order of writes has to be such that this field is written last.
+	 */
+	uint8_t		valid;
+} __attribute__((packed));
+
 #endif /* _HSI_STRUCT_DEF_DPDK_H_ */
-- 
2.20.1 (Apple Git-117)


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [dpdk-dev] [PATCH v3 00/15] bnxt patchset to support device error recovery
  2019-10-02  1:23         ` [dpdk-dev] [PATCH v3 00/15] " Ajit Khaparde
                             ` (14 preceding siblings ...)
  2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 15/15] net/bnxt: add PTP support for Thor Ajit Khaparde
@ 2019-10-02 17:02           ` Ferruh Yigit
  15 siblings, 0 replies; 48+ messages in thread
From: Ferruh Yigit @ 2019-10-02 17:02 UTC (permalink / raw)
  To: Ajit Khaparde, dev

On 10/2/2019 2:23 AM, Ajit Khaparde wrote:
> This patchset adds support to monitor the health of the firmware and the
> underlying device and recover to an operational state in case of error.
> We can also detect if a FW upgrade is in progress and quiesce all
> access to the device and recover once FW indicates everything is ready.
> 
> Patchset against dpdk-next-net. Please apply.
> 
> v2->v3: Some APIs and dev_ops return types has been updated since
> 	v2 was submitted. This version addresses the conflicts on
> 	account of that.
> 
> Kalesh AP (15):
>   net/bnxt: add FW reset HWRM command
>   net/bnxt: prevent device access when device is in reset
>   net/bnxt: handle reset notify async event from FW
>   net/bnxt: inform firmware about IF state changes
>   net/bnxt: handle fatal event from FW under error conditions
>   net/bnxt: query firmware error recovery capabilities
>   net/bnxt: map status registers for FW health monitoring
>   net/bnxt: advertise error recovery capability and handle async event
>   net/bnxt: add code for periodic FW health monitoring
>   net/bnxt: add support for FW reset
>   net/bnxt: add hot firmware upgrade support for Stingray
>   net/bnxt: reduce verbosity of logs
>   net/bnxt: avoid null pointer dereference
>   net/bnxt: use BIT macro instead of bit fields
>   net/bnxt: add PTP support for Thor

Series applied to dpdk-next-net/master, thanks.

^ permalink raw reply	[flat|nested] 48+ messages in thread

end of thread, other threads:[~2019-10-02 17:02 UTC | newest]

Thread overview: 48+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-22  5:53 [dpdk-dev] [PATCH 00/13] bnxt patchset to support device error recovery Ajit Khaparde
2019-08-22  5:53 ` [dpdk-dev] [PATCH 01/13] net/bnxt: hsi version update Ajit Khaparde
2019-08-27 13:51   ` Ferruh Yigit
2019-08-30 16:35     ` [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery Ajit Khaparde
2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 01/13] net/bnxt: add FW reset HWRM command Ajit Khaparde
2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 02/13] net/bnxt: prevent device access when device is in reset Ajit Khaparde
2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 03/13] net/bnxt: handle reset notify async event from FW Ajit Khaparde
2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 04/13] net/bnxt: inform firmware about IF state changes Ajit Khaparde
2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 05/13] net/bnxt: handle fatal event from FW under error conditions Ajit Khaparde
2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 06/13] net/bnxt: query firmware error recovery capabilities Ajit Khaparde
2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 07/13] net/bnxt: map status registers for FW health monitoring Ajit Khaparde
2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 08/13] net/bnxt: advertise error recovery capability and handle async event Ajit Khaparde
2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 09/13] net/bnxt: add code for periodic FW health monitoring Ajit Khaparde
2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 10/13] net/bnxt: add support for FW reset Ajit Khaparde
2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 11/13] net/bnxt: reduce verbosity of logs Ajit Khaparde
2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 12/13] net/bnxt: use BIT macro instead of bit fields Ajit Khaparde
2019-08-30 16:35       ` [dpdk-dev] [PATCH v2 13/13] net/bnxt: avoid null pointer dereference Ajit Khaparde
2019-09-30 13:29       ` [dpdk-dev] [PATCH v2 00/13] bnxt patchset to support device error recovery Ferruh Yigit
2019-10-02  1:23         ` [dpdk-dev] [PATCH v3 00/15] " Ajit Khaparde
2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 01/15] net/bnxt: add FW reset HWRM command Ajit Khaparde
2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 02/15] net/bnxt: prevent device access when device is in reset Ajit Khaparde
2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 03/15] net/bnxt: handle reset notify async event from FW Ajit Khaparde
2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 04/15] net/bnxt: inform firmware about IF state changes Ajit Khaparde
2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 05/15] net/bnxt: handle fatal event from FW under error conditions Ajit Khaparde
2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 06/15] net/bnxt: query firmware error recovery capabilities Ajit Khaparde
2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 07/15] net/bnxt: map status registers for FW health monitoring Ajit Khaparde
2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 08/15] net/bnxt: advertise error recovery capability and handle async event Ajit Khaparde
2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 09/15] net/bnxt: add code for periodic FW health monitoring Ajit Khaparde
2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 10/15] net/bnxt: add support for FW reset Ajit Khaparde
2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 11/15] net/bnxt: add hot firmware upgrade support for Stingray Ajit Khaparde
2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 12/15] net/bnxt: reduce verbosity of logs Ajit Khaparde
2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 13/15] net/bnxt: avoid null pointer dereference Ajit Khaparde
2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 14/15] net/bnxt: use BIT macro instead of bit fields Ajit Khaparde
2019-10-02  1:23           ` [dpdk-dev] [PATCH v3 15/15] net/bnxt: add PTP support for Thor Ajit Khaparde
2019-10-02 17:02           ` [dpdk-dev] [PATCH v3 00/15] bnxt patchset to support device error recovery Ferruh Yigit
2019-08-22  5:53 ` [dpdk-dev] [PATCH 02/13] net/bnxt: prevent device access when device is in reset Ajit Khaparde
2019-08-27 15:00   ` Ferruh Yigit
2019-08-22  5:53 ` [dpdk-dev] [PATCH 03/13] net/bnxt: handle reset notify async event from FW Ajit Khaparde
2019-08-22  5:53 ` [dpdk-dev] [PATCH 04/13] net/bnxt: inform firmware about IF state changes Ajit Khaparde
2019-08-22  5:53 ` [dpdk-dev] [PATCH 05/13] net/bnxt: handle fatal event from FW under error conditions Ajit Khaparde
2019-08-22  5:53 ` [dpdk-dev] [PATCH 06/13] net/bnxt: query firmware error recovery capabilities Ajit Khaparde
2019-08-22  5:53 ` [dpdk-dev] [PATCH 07/13] net/bnxt: map status registers for FW health monitoring Ajit Khaparde
2019-08-22  5:53 ` [dpdk-dev] [PATCH 08/13] net/bnxt: advertise error recovery capability and handle async event Ajit Khaparde
2019-08-22  5:53 ` [dpdk-dev] [PATCH 09/13] net/bnxt: add code for periodic FW health monitoring Ajit Khaparde
2019-08-22  5:53 ` [dpdk-dev] [PATCH 10/13] net/bnxt: use BIT macro instead of bit fields Ajit Khaparde
2019-08-22  5:53 ` [dpdk-dev] [PATCH 11/13] net/bnxt: reschedule the health check alarm correctly Ajit Khaparde
2019-08-22  5:53 ` [dpdk-dev] [PATCH 12/13] net/bnxt: add support for FW reset Ajit Khaparde
2019-08-22  5:54 ` [dpdk-dev] [PATCH 13/13] net/bnxt: reduce verbosity of logs Ajit Khaparde

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).