DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] net/bnxt: add flow stats in extended stats
@ 2020-04-17  5:37 Ajit Khaparde
  2020-04-17  5:49 ` [dpdk-dev] [PATCH v2] " Ajit Khaparde
  0 siblings, 1 reply; 4+ messages in thread
From: Ajit Khaparde @ 2020-04-17  5:37 UTC (permalink / raw)
  To: dev; +Cc: Somnath Kotur, Sriharsha Basavapatna

From: Somnath Kotur <somnath.kotur@broadcom.com>

This patch allows to display flow stats in extended stats.
To do this, DMA-able memory is registered with the FW during device
initialization. Then the driver uses an alarm thread to query the
per flow stats using the HWRM_CFA_COUNTER_QSTATS HWRM command at
regular intervals and stores it locally which will be displayed
when the application queries the xstats.
The DMA-able memory is unregistered during driver cleanup.
This functionality can be enabled using the flow-xstat devarg and
will be disabled by default. The intention behind this is to allow
stats to be displayed for all the flows in one shot instead of
querying one at a time.

Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Reviewed-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt.h                |  24 ++-
 drivers/net/bnxt/bnxt_ethdev.c         | 263 +++++++++++++++++++++++
 drivers/net/bnxt/bnxt_filter.h         |   6 +
 drivers/net/bnxt/bnxt_flow.c           |  57 +++++
 drivers/net/bnxt/bnxt_hwrm.c           | 169 ++++++++++++++-
 drivers/net/bnxt/bnxt_hwrm.h           |  22 ++
 drivers/net/bnxt/bnxt_stats.c          | 232 +++++++++++++++++++-
 drivers/net/bnxt/hsi_struct_def_dpdk.h | 284 +++++++++++++++++++++++++
 8 files changed, 1043 insertions(+), 14 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index a70cdff07..00a4d0b3e 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -433,6 +433,13 @@ struct bnxt_ctx_mem_info {
 	struct bnxt_ctx_pg_info *tqm_mem[BNXT_MAX_TC_Q];
 };
 
+struct bnxt_ctx_mem_buf_info {
+	void		*va;
+	rte_iova_t	dma;
+	uint16_t	ctx_id;
+	size_t		size;
+};
+
 /* Maximum Firmware Reset bail out value in milliseconds */
 #define BNXT_MAX_FW_RESET_TIMEOUT	6000
 /* Minimum time required for the firmware readiness in milliseconds */
@@ -530,7 +537,7 @@ struct bnxt {
 #define BNXT_FLAG_NEW_RM			BIT(20)
 #define BNXT_FLAG_NPAR_PF			BIT(21)
 #define BNXT_FLAG_FW_CAP_ONE_STEP_TX_TS		BIT(22)
-#define BNXT_FLAG_ADV_FLOW_MGMT			BIT(23)
+#define BNXT_FLAG_FC_THREAD			BIT(23)
 #define BNXT_FLAG_RX_VECTOR_PKT_MODE		BIT(24)
 #define BNXT_PF(bp)		(!((bp)->flags & BNXT_FLAG_VF))
 #define BNXT_VF(bp)		((bp)->flags & BNXT_FLAG_VF)
@@ -550,6 +557,8 @@ struct bnxt {
 #define BNXT_FW_CAP_IF_CHANGE		BIT(1)
 #define BNXT_FW_CAP_ERROR_RECOVERY	BIT(2)
 #define BNXT_FW_CAP_ERR_RECOVER_RELOAD	BIT(3)
+#define BNXT_FW_CAP_ADV_FLOW_MGMT	BIT(5)
+#define BNXT_FW_CAP_ADV_FLOW_COUNTERS	BIT(6)
 
 	uint32_t		flow_flags;
 #define BNXT_FLOW_FLAG_L2_HDR_SRC_FILTER_EN	BIT(0)
@@ -690,8 +699,17 @@ struct bnxt {
 	struct tf		tfp;
 	struct bnxt_ulp_context	ulp_ctx;
 	uint8_t			truflow;
+	uint16_t                max_fc;
+	struct bnxt_ctx_mem_buf_info rx_fc_in_tbl;
+	struct bnxt_ctx_mem_buf_info rx_fc_out_tbl;
+	struct bnxt_ctx_mem_buf_info tx_fc_in_tbl;
+	struct bnxt_ctx_mem_buf_info tx_fc_out_tbl;
+	uint16_t		flow_count;
+	uint8_t			flow_xstat;
 };
 
+#define BNXT_FC_TIMER	1 /* Timer freq in Sec Flow Counters */
+
 int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu);
 int bnxt_link_update(struct rte_eth_dev *eth_dev, int wait_to_complete,
 		     bool exp_link_status);
@@ -738,4 +756,8 @@ void bnxt_ulp_deinit(struct bnxt *bp);
 uint16_t bnxt_get_vnic_id(uint16_t port);
 uint16_t bnxt_get_svif(uint16_t port_id, bool func_svif);
 
+void bnxt_cancel_fc_thread(struct bnxt *bp);
+void bnxt_flow_cnt_alarm_cb(void *arg);
+int bnxt_flow_stats_req(struct bnxt *bp);
+int bnxt_flow_stats_cnt(struct bnxt *bp);
 #endif
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 5d5b8e095..bd2c3fcb6 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -128,8 +128,10 @@ static const struct rte_pci_id bnxt_pci_id_map[] = {
 				     DEV_RX_OFFLOAD_RSS_HASH)
 
 #define BNXT_DEVARG_TRUFLOW	"host-based-truflow"
+#define BNXT_DEVARG_FLOW_XSTAT	"flow-xstat"
 static const char *const bnxt_dev_args[] = {
 	BNXT_DEVARG_TRUFLOW,
+	BNXT_DEVARG_FLOW_XSTAT,
 	NULL
 };
 
@@ -139,6 +141,12 @@ static const char *const bnxt_dev_args[] = {
  */
 #define	BNXT_DEVARG_TRUFLOW_INVALID(truflow)	((truflow) > 1)
 
+/*
+ * flow_xstat == false to disable the feature
+ * flow_xstat == true to enable the feature
+ */
+#define	BNXT_DEVARG_FLOW_XSTAT_INVALID(flow_xstat)	((flow_xstat) > 1)
+
 static int bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask);
 static void bnxt_print_link_info(struct rte_eth_dev *eth_dev);
 static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev);
@@ -333,6 +341,150 @@ static int bnxt_setup_one_vnic(struct bnxt *bp, uint16_t vnic_id)
 	return rc;
 }
 
+static int bnxt_register_fc_ctx_mem(struct bnxt *bp)
+{
+	int rc = 0;
+
+	rc = bnxt_hwrm_ctx_rgtr(bp, bp->rx_fc_in_tbl.dma,
+				&bp->rx_fc_in_tbl.ctx_id);
+	if (rc)
+		return rc;
+
+	PMD_DRV_LOG(DEBUG,
+		    "rx_fc_in_tbl.va = %p rx_fc_in_tbl.dma = %p"
+		    " rx_fc_in_tbl.ctx_id = %d\n",
+		    bp->rx_fc_in_tbl.va, (void *)bp->rx_fc_in_tbl.dma,
+		    bp->rx_fc_in_tbl.ctx_id);
+
+	rc = bnxt_hwrm_ctx_rgtr(bp, bp->rx_fc_out_tbl.dma,
+				&bp->rx_fc_out_tbl.ctx_id);
+	if (rc)
+		return rc;
+
+	PMD_DRV_LOG(DEBUG,
+		    "rx_fc_out_tbl.va = %p rx_fc_out_tbl.dma = %p"
+		    " rx_fc_out_tbl.ctx_id = %d\n",
+		    bp->rx_fc_out_tbl.va, (void *)bp->rx_fc_out_tbl.dma,
+		    bp->rx_fc_out_tbl.ctx_id);
+
+	rc = bnxt_hwrm_ctx_rgtr(bp, bp->tx_fc_in_tbl.dma,
+				&bp->tx_fc_in_tbl.ctx_id);
+	if (rc)
+		return rc;
+
+	PMD_DRV_LOG(DEBUG,
+		    "tx_fc_in_tbl.va = %p tx_fc_in_tbl.dma = %p"
+		    " tx_fc_in_tbl.ctx_id = %d\n",
+		    bp->tx_fc_in_tbl.va, (void *)bp->tx_fc_in_tbl.dma,
+		    bp->tx_fc_in_tbl.ctx_id);
+
+	rc = bnxt_hwrm_ctx_rgtr(bp, bp->tx_fc_out_tbl.dma,
+				&bp->tx_fc_out_tbl.ctx_id);
+	if (rc)
+		return rc;
+
+	PMD_DRV_LOG(DEBUG,
+		    "tx_fc_out_tbl.va = %p tx_fc_out_tbl.dma = %p"
+		    " tx_fc_out_tbl.ctx_id = %d\n",
+		    bp->tx_fc_out_tbl.va, (void *)bp->tx_fc_out_tbl.dma,
+		    bp->tx_fc_out_tbl.ctx_id);
+
+	memset(bp->rx_fc_out_tbl.va, 0, bp->rx_fc_out_tbl.size);
+	rc = bnxt_hwrm_cfa_counter_cfg(bp, BNXT_DIR_RX,
+				       CFA_COUNTER_CFG_IN_COUNTER_TYPE_FC,
+				       bp->rx_fc_out_tbl.ctx_id,
+				       bp->max_fc,
+				       true);
+	if (rc)
+		return rc;
+
+	memset(bp->tx_fc_out_tbl.va, 0, bp->tx_fc_out_tbl.size);
+	rc = bnxt_hwrm_cfa_counter_cfg(bp, BNXT_DIR_TX,
+				       CFA_COUNTER_CFG_IN_COUNTER_TYPE_FC,
+				       bp->tx_fc_out_tbl.ctx_id,
+				       bp->max_fc,
+				       true);
+
+	return rc;
+}
+
+static int bnxt_alloc_ctx_mem_buf(char *type, size_t size,
+				  struct bnxt_ctx_mem_buf_info *ctx)
+{
+	if (!ctx)
+		return -EINVAL;
+
+	ctx->va = rte_zmalloc(type, size, 0);
+	if (ctx->va == NULL)
+		return -ENOMEM;
+	rte_mem_lock_page(ctx->va);
+	ctx->size = size;
+	ctx->dma = rte_mem_virt2iova(ctx->va);
+	if (ctx->dma == RTE_BAD_IOVA)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int bnxt_init_fc_ctx_mem(struct bnxt *bp)
+{
+	struct rte_pci_device *pdev = bp->pdev;
+	char type[RTE_MEMZONE_NAMESIZE];
+	uint16_t max_fc;
+	int rc = 0;
+
+	max_fc = bp->max_fc;
+
+	sprintf(type, "bnxt_rx_fc_in_" PCI_PRI_FMT, pdev->addr.domain,
+		pdev->addr.bus, pdev->addr.devid, pdev->addr.function);
+	/* 4 bytes for each counter-id */
+	rc = bnxt_alloc_ctx_mem_buf(type, max_fc * 4, &bp->rx_fc_in_tbl);
+	if (rc)
+		return rc;
+
+	sprintf(type, "bnxt_rx_fc_out_" PCI_PRI_FMT, pdev->addr.domain,
+		pdev->addr.bus, pdev->addr.devid, pdev->addr.function);
+	/* 16 bytes for each counter - 8 bytes pkt_count, 8 bytes byte_count */
+	rc = bnxt_alloc_ctx_mem_buf(type, max_fc * 16, &bp->rx_fc_out_tbl);
+	if (rc)
+		return rc;
+
+	sprintf(type, "bnxt_tx_fc_in_" PCI_PRI_FMT, pdev->addr.domain,
+		pdev->addr.bus, pdev->addr.devid, pdev->addr.function);
+	/* 4 bytes for each counter-id */
+	rc = bnxt_alloc_ctx_mem_buf(type, max_fc * 4, &bp->tx_fc_in_tbl);
+	if (rc)
+		return rc;
+
+	sprintf(type, "bnxt_tx_fc_out_" PCI_PRI_FMT, pdev->addr.domain,
+		pdev->addr.bus, pdev->addr.devid, pdev->addr.function);
+	/* 16 bytes for each counter - 8 bytes pkt_count, 8 bytes byte_count */
+	rc = bnxt_alloc_ctx_mem_buf(type, max_fc * 16, &bp->tx_fc_out_tbl);
+	if (rc)
+		return rc;
+
+	rc = bnxt_register_fc_ctx_mem(bp);
+
+	return rc;
+}
+
+static int bnxt_init_ctx_mem(struct bnxt *bp)
+{
+	int rc = 0;
+
+	if (!(bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_COUNTERS) ||
+	    !(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp)))
+		return 0;
+
+	rc = bnxt_hwrm_cfa_counter_qcaps(bp, &bp->max_fc);
+	if (rc)
+		return rc;
+
+	rc = bnxt_init_fc_ctx_mem(bp);
+
+	return rc;
+}
+
 static int bnxt_init_chip(struct bnxt *bp)
 {
 	struct rte_eth_link new;
@@ -1005,6 +1157,7 @@ static void bnxt_dev_close_op(struct rte_eth_dev *eth_dev)
 	/* cancel the recovery handler before remove dev */
 	rte_eal_alarm_cancel(bnxt_dev_reset_and_resume, (void *)bp);
 	rte_eal_alarm_cancel(bnxt_dev_recover, (void *)bp);
+	bnxt_cancel_fc_thread(bp);
 
 	if (eth_dev->data->dev_started)
 		bnxt_dev_stop_op(eth_dev);
@@ -4871,6 +5024,12 @@ static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev)
 	if (rc)
 		return rc;
 
+	rc = bnxt_init_ctx_mem(bp);
+	if (rc) {
+		PMD_DRV_LOG(ERR, "Failed to init adv_flow_counters\n");
+		return rc;
+	}
+
 	rc = bnxt_init_locks(bp);
 	if (rc)
 		return rc;
@@ -4913,6 +5072,41 @@ bnxt_parse_devarg_truflow(__rte_unused const char *key,
 	return 0;
 }
 
+static int
+bnxt_parse_devarg_flow_xstat(__rte_unused const char *key,
+			     const char *value, void *opaque_arg)
+{
+	struct bnxt *bp = opaque_arg;
+	unsigned long flow_xstat;
+	char *end = NULL;
+
+	if (!value || !opaque_arg) {
+		PMD_DRV_LOG(ERR,
+			    "Invalid parameter passed to flow_xstat devarg.\n");
+		return -EINVAL;
+	}
+
+	flow_xstat = strtoul(value, &end, 10);
+	if (end == NULL || *end != '\0' ||
+	    (flow_xstat == ULONG_MAX && errno == ERANGE)) {
+		PMD_DRV_LOG(ERR,
+			    "Invalid parameter passed to flow_xstat devarg.\n");
+		return -EINVAL;
+	}
+
+	if (BNXT_DEVARG_FLOW_XSTAT_INVALID(flow_xstat)) {
+		PMD_DRV_LOG(ERR,
+			    "Invalid value passed to flow_xstat devarg.\n");
+		return -EINVAL;
+	}
+
+	bp->flow_xstat = flow_xstat;
+	if (bp->flow_xstat)
+		PMD_DRV_LOG(INFO, "flow_xstat feature enabled.\n");
+
+	return 0;
+}
+
 static void
 bnxt_parse_dev_args(struct bnxt *bp, struct rte_devargs *devargs)
 {
@@ -4932,6 +5126,13 @@ bnxt_parse_dev_args(struct bnxt *bp, struct rte_devargs *devargs)
 	rte_kvargs_process(kvlist, BNXT_DEVARG_TRUFLOW,
 			   bnxt_parse_devarg_truflow, bp);
 
+	/*
+	 * Handler for "flow_xstat" devarg.
+	 * Invoked as for ex: "-w 0000:00:0d.0,flow_xstat=1”
+	 */
+	rte_kvargs_process(kvlist, BNXT_DEVARG_FLOW_XSTAT,
+			   bnxt_parse_devarg_flow_xstat, bp);
+
 	rte_kvargs_free(kvlist);
 }
 
@@ -5016,6 +5217,66 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
 	return rc;
 }
 
+
+static void bnxt_free_ctx_mem_buf(struct bnxt_ctx_mem_buf_info *ctx)
+{
+	if (!ctx)
+		return;
+
+	if (ctx->va)
+		rte_free(ctx->va);
+
+	ctx->va = NULL;
+	ctx->dma = RTE_BAD_IOVA;
+	ctx->ctx_id = BNXT_CTX_VAL_INVAL;
+}
+
+static void bnxt_unregister_fc_ctx_mem(struct bnxt *bp)
+{
+	bnxt_hwrm_cfa_counter_cfg(bp, BNXT_DIR_RX,
+				  CFA_COUNTER_CFG_IN_COUNTER_TYPE_FC,
+				  bp->rx_fc_out_tbl.ctx_id,
+				  bp->max_fc,
+				  false);
+
+	bnxt_hwrm_cfa_counter_cfg(bp, BNXT_DIR_TX,
+				  CFA_COUNTER_CFG_IN_COUNTER_TYPE_FC,
+				  bp->tx_fc_out_tbl.ctx_id,
+				  bp->max_fc,
+				  false);
+
+	if (bp->rx_fc_in_tbl.ctx_id != BNXT_CTX_VAL_INVAL)
+		bnxt_hwrm_ctx_unrgtr(bp, bp->rx_fc_in_tbl.ctx_id);
+	bp->rx_fc_in_tbl.ctx_id = BNXT_CTX_VAL_INVAL;
+
+	if (bp->rx_fc_out_tbl.ctx_id != BNXT_CTX_VAL_INVAL)
+		bnxt_hwrm_ctx_unrgtr(bp, bp->rx_fc_out_tbl.ctx_id);
+	bp->rx_fc_out_tbl.ctx_id = BNXT_CTX_VAL_INVAL;
+
+	if (bp->tx_fc_in_tbl.ctx_id != BNXT_CTX_VAL_INVAL)
+		bnxt_hwrm_ctx_unrgtr(bp, bp->tx_fc_in_tbl.ctx_id);
+	bp->tx_fc_in_tbl.ctx_id = BNXT_CTX_VAL_INVAL;
+
+	if (bp->tx_fc_out_tbl.ctx_id != BNXT_CTX_VAL_INVAL)
+		bnxt_hwrm_ctx_unrgtr(bp, bp->tx_fc_out_tbl.ctx_id);
+	bp->tx_fc_out_tbl.ctx_id = BNXT_CTX_VAL_INVAL;
+}
+
+static void bnxt_uninit_fc_ctx_mem(struct bnxt *bp)
+{
+	bnxt_unregister_fc_ctx_mem(bp);
+
+	bnxt_free_ctx_mem_buf(&bp->rx_fc_in_tbl);
+	bnxt_free_ctx_mem_buf(&bp->rx_fc_out_tbl);
+	bnxt_free_ctx_mem_buf(&bp->tx_fc_in_tbl);
+	bnxt_free_ctx_mem_buf(&bp->tx_fc_out_tbl);
+}
+
+static void bnxt_uninit_ctx_mem(struct bnxt *bp)
+{
+	bnxt_uninit_fc_ctx_mem(bp);
+}
+
 static void
 bnxt_uninit_locks(struct bnxt *bp)
 {
@@ -5043,6 +5304,8 @@ bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev)
 		}
 	}
 
+	bnxt_uninit_ctx_mem(bp);
+
 	bnxt_uninit_locks(bp);
 	rte_free(bp->ptp_cfg);
 	bp->ptp_cfg = NULL;
diff --git a/drivers/net/bnxt/bnxt_filter.h b/drivers/net/bnxt/bnxt_filter.h
index 8f8a4c13b..4b2b3cadc 100644
--- a/drivers/net/bnxt/bnxt_filter.h
+++ b/drivers/net/bnxt/bnxt_filter.h
@@ -25,6 +25,11 @@ struct bnxt;
 #define BNXT_FLOW_PARSE_INNER_FLAG		BIT(6)
 #define BNXT_FLOW_MARK_FLAG			BIT(7)
 
+struct bnxt_flow_stats {
+	uint64_t	packets;
+	uint64_t	bytes;
+};
+
 struct bnxt_filter_info {
 	STAILQ_ENTRY(bnxt_filter_info)	next;
 	uint32_t		flow_id;
@@ -84,6 +89,7 @@ struct bnxt_filter_info {
 	 */
 	struct			bnxt_vnic_info *vnic;
 	uint32_t		mark;
+	struct bnxt_flow_stats	hw_stats;
 };
 
 struct bnxt_filter_info *bnxt_alloc_filter(struct bnxt *bp);
diff --git a/drivers/net/bnxt/bnxt_flow.c b/drivers/net/bnxt/bnxt_flow.c
index 9fb6dbdd9..6c6da84c6 100644
--- a/drivers/net/bnxt/bnxt_flow.c
+++ b/drivers/net/bnxt/bnxt_flow.c
@@ -10,6 +10,7 @@
 #include <rte_flow.h>
 #include <rte_flow_driver.h>
 #include <rte_tailq.h>
+#include <rte_alarm.h>
 
 #include "bnxt.h"
 #include "bnxt_filter.h"
@@ -1627,6 +1628,51 @@ bnxt_match_filter(struct bnxt *bp, struct bnxt_filter_info *nf)
 	return 0;
 }
 
+static void
+bnxt_setup_flow_counter(struct bnxt *bp)
+{
+	if (bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_COUNTERS &&
+	    !(bp->flags & BNXT_FLAG_FC_THREAD)) {
+		rte_eal_alarm_set(US_PER_S * BNXT_FC_TIMER,
+				  bnxt_flow_cnt_alarm_cb,
+				  (void *)bp);
+		bp->flags |= BNXT_FLAG_FC_THREAD;
+	}
+}
+
+void bnxt_flow_cnt_alarm_cb(void *arg)
+{
+	int rc = 0;
+	struct bnxt *bp = arg;
+
+	if (!bp->rx_fc_out_tbl.va) {
+		PMD_DRV_LOG(ERR, "bp->rx_fc_out_tbl.va is NULL?\n");
+		bnxt_cancel_fc_thread(bp);
+		return;
+	}
+
+	if (!bp->flow_count) {
+		bnxt_cancel_fc_thread(bp);
+		return;
+	}
+
+	if (!bp->eth_dev->data->dev_started) {
+		bnxt_cancel_fc_thread(bp);
+		return;
+	}
+
+	rc = bnxt_flow_stats_req(bp);
+	if (rc) {
+		PMD_DRV_LOG(ERR, "Flow stat alarm not rescheduled.\n");
+		return;
+	}
+
+	rte_eal_alarm_set(US_PER_S * BNXT_FC_TIMER,
+			  bnxt_flow_cnt_alarm_cb,
+			  (void *)bp);
+}
+
+
 static struct rte_flow *
 bnxt_flow_create(struct rte_eth_dev *dev,
 		 const struct rte_flow_attr *attr,
@@ -1783,7 +1829,9 @@ bnxt_flow_create(struct rte_eth_dev *dev,
 			bp->mark_table[flow_id].valid = true;
 			bp->mark_table[flow_id].mark_id = filter->mark;
 		}
+		bp->flow_count++;
 		bnxt_release_flow_lock(bp);
+		bnxt_setup_flow_counter(bp);
 		return flow;
 	}
 
@@ -1903,6 +1951,7 @@ _bnxt_flow_destroy(struct bnxt *bp,
 		bnxt_free_filter(bp, filter);
 		STAILQ_REMOVE(&vnic->flow_list, flow, rte_flow, next);
 		rte_free(flow);
+		bp->flow_count--;
 
 		/* If this was the last flow associated with this vnic,
 		 * switch the queue back to RSS pool.
@@ -1955,6 +2004,12 @@ bnxt_flow_destroy(struct rte_eth_dev *dev,
 	return ret;
 }
 
+void bnxt_cancel_fc_thread(struct bnxt *bp)
+{
+	bp->flags &= ~BNXT_FLAG_FC_THREAD;
+	rte_eal_alarm_cancel(bnxt_flow_cnt_alarm_cb, (void *)bp);
+}
+
 static int
 bnxt_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error)
 {
@@ -1981,6 +2036,8 @@ bnxt_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error)
 				break;
 		}
 	}
+
+	bnxt_cancel_fc_thread(bp);
 	bnxt_release_flow_lock(bp);
 
 	return ret;
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index d435f6570..09a73286b 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -744,6 +744,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
 	} else {
 		bp->max_vnics = 1;
 	}
+	PMD_DRV_LOG(DEBUG, "Max l2_cntxts is %d vnics is %d\n",
+		    bp->max_l2_ctx, bp->max_vnics);
 	bp->max_stat_ctx = rte_le_to_cpu_16(resp->max_stat_ctx);
 	if (BNXT_PF(bp)) {
 		bp->pf.total_vnics = rte_le_to_cpu_16(resp->max_vnics);
@@ -1169,10 +1171,17 @@ int bnxt_hwrm_ver_get(struct bnxt *bp, uint32_t timeout)
 		PMD_DRV_LOG(DEBUG, "FW supports Trusted VFs\n");
 	if (dev_caps_cfg &
 	    HWRM_VER_GET_OUTPUT_DEV_CAPS_CFG_CFA_ADV_FLOW_MGNT_SUPPORTED) {
-		bp->flags |= BNXT_FLAG_ADV_FLOW_MGMT;
+		bp->fw_cap |= BNXT_FW_CAP_ADV_FLOW_MGMT;
 		PMD_DRV_LOG(DEBUG, "FW supports advanced flow management\n");
 	}
 
+	if (dev_caps_cfg &
+	    HWRM_VER_GET_OUTPUT_DEV_CAPS_CFG_ADV_FLOW_COUNTERS_SUPPORTED) {
+		PMD_DRV_LOG(DEBUG, "FW supports advanced flow counters\n");
+		bp->fw_cap |= BNXT_FW_CAP_ADV_FLOW_COUNTERS;
+	}
+
+
 error:
 	HWRM_UNLOCK();
 	return rc;
@@ -5216,7 +5225,7 @@ int bnxt_hwrm_cfa_adv_flow_mgmt_qcaps(struct bnxt *bp)
 	uint32_t flags = 0;
 	int rc = 0;
 
-	if (!(bp->flags & BNXT_FLAG_ADV_FLOW_MGMT))
+	if (!(bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_MGMT))
 		return rc;
 
 	if (!(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp))) {
@@ -5239,3 +5248,159 @@ int bnxt_hwrm_cfa_adv_flow_mgmt_qcaps(struct bnxt *bp)
 
 	return rc;
 }
+
+int bnxt_hwrm_cfa_counter_qcaps(struct bnxt *bp, uint16_t *max_fc)
+{
+	int rc = 0;
+
+	struct hwrm_cfa_counter_qcaps_input req = {0};
+	struct hwrm_cfa_counter_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
+
+	if (!(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp))) {
+		PMD_DRV_LOG(DEBUG,
+			    "Not a PF or trusted VF. Command not supported\n");
+		return 0;
+	}
+
+	HWRM_PREP(&req, HWRM_CFA_COUNTER_QCAPS, BNXT_USE_KONG(bp));
+	req.target_id = rte_cpu_to_le_16(bp->fw_fid);
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_KONG(bp));
+
+	HWRM_CHECK_RESULT();
+	if (max_fc)
+		*max_fc = rte_le_to_cpu_16(resp->max_rx_fc);
+	HWRM_UNLOCK();
+
+	PMD_DRV_LOG(DEBUG, "max_fc = %d\n", *max_fc);
+	return 0;
+}
+
+int bnxt_hwrm_ctx_rgtr(struct bnxt *bp, rte_iova_t dma_addr, uint16_t *ctx_id)
+{
+	int rc = 0;
+	struct hwrm_cfa_ctx_mem_rgtr_input req = {.req_type = 0 };
+	struct hwrm_cfa_ctx_mem_rgtr_output *resp = bp->hwrm_cmd_resp_addr;
+
+	if (!(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp))) {
+		PMD_DRV_LOG(DEBUG,
+			    "Not a PF or trusted VF. Command not supported\n");
+		return 0;
+	}
+
+	HWRM_PREP(&req, HWRM_CFA_CTX_MEM_RGTR, BNXT_USE_KONG(bp));
+
+	req.page_level = HWRM_CFA_CTX_MEM_RGTR_INPUT_PAGE_LEVEL_LVL_0;
+	req.page_size = HWRM_CFA_CTX_MEM_RGTR_INPUT_PAGE_SIZE_2M;
+	req.page_dir = rte_cpu_to_le_64(dma_addr);
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_KONG(bp));
+
+	HWRM_CHECK_RESULT();
+	if (ctx_id) {
+		*ctx_id  = rte_le_to_cpu_16(resp->ctx_id);
+		PMD_DRV_LOG(DEBUG, "ctx_id = %d\n", *ctx_id);
+	}
+	HWRM_UNLOCK();
+
+	return 0;
+}
+
+int bnxt_hwrm_ctx_unrgtr(struct bnxt *bp, uint16_t ctx_id)
+{
+	int rc = 0;
+	struct hwrm_cfa_ctx_mem_unrgtr_input req = {.req_type = 0 };
+	struct hwrm_cfa_ctx_mem_unrgtr_output *resp = bp->hwrm_cmd_resp_addr;
+
+	if (!(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp))) {
+		PMD_DRV_LOG(DEBUG,
+			    "Not a PF or trusted VF. Command not supported\n");
+		return 0;
+	}
+
+	HWRM_PREP(&req, HWRM_CFA_CTX_MEM_UNRGTR, BNXT_USE_KONG(bp));
+
+	req.ctx_id = rte_cpu_to_le_16(ctx_id);
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_KONG(bp));
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
+
+	return rc;
+}
+
+int bnxt_hwrm_cfa_counter_cfg(struct bnxt *bp, enum bnxt_flow_dir dir,
+			      uint16_t cntr, uint16_t ctx_id,
+			      uint32_t num_entries, bool enable)
+{
+	struct hwrm_cfa_counter_cfg_input req = {0};
+	struct hwrm_cfa_counter_cfg_output *resp = bp->hwrm_cmd_resp_addr;
+	uint16_t flags = 0;
+	int rc;
+
+	if (!(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp))) {
+		PMD_DRV_LOG(DEBUG,
+			    "Not a PF or trusted VF. Command not supported\n");
+		return 0;
+	}
+
+	HWRM_PREP(&req, HWRM_CFA_COUNTER_CFG, BNXT_USE_KONG(bp));
+
+	req.target_id = rte_cpu_to_le_16(bp->fw_fid);
+	req.counter_type = rte_cpu_to_le_16(cntr);
+	flags = enable ? HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_CFG_MODE_ENABLE :
+		HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_CFG_MODE_DISABLE;
+	flags |= HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_PULL;
+	if (dir == BNXT_DIR_RX)
+		flags |=  HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_PATH_RX;
+	else if (dir == BNXT_DIR_TX)
+		flags |=  HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_PATH_TX;
+	req.flags = rte_cpu_to_le_16(flags);
+	req.ctx_id =  rte_cpu_to_le_16(ctx_id);
+	req.num_entries = rte_cpu_to_le_32(num_entries);
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_KONG(bp));
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
+
+	return 0;
+}
+
+int bnxt_hwrm_cfa_counter_qstats(struct bnxt *bp,
+				 enum bnxt_flow_dir dir,
+				 uint16_t cntr,
+				 uint16_t num_entries)
+{
+	struct hwrm_cfa_counter_qstats_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_cfa_counter_qstats_input req = {0};
+	uint16_t flow_ctx_id = 0;
+	uint16_t flags = 0;
+	int rc = 0;
+
+	if (!(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp))) {
+		PMD_DRV_LOG(DEBUG,
+			    "Not a PF or trusted VF. Command not supported\n");
+		return 0;
+	}
+
+	if (dir == BNXT_DIR_RX) {
+		flow_ctx_id = bp->rx_fc_in_tbl.ctx_id;
+		flags = HWRM_CFA_COUNTER_QSTATS_INPUT_FLAGS_PATH_RX;
+	} else if (dir == BNXT_DIR_TX) {
+		flow_ctx_id = bp->tx_fc_in_tbl.ctx_id;
+		flags = HWRM_CFA_COUNTER_QSTATS_INPUT_FLAGS_PATH_TX;
+	}
+
+	HWRM_PREP(&req, HWRM_CFA_COUNTER_QSTATS, BNXT_USE_KONG(bp));
+	req.target_id = rte_cpu_to_le_16(bp->fw_fid);
+	req.counter_type = rte_cpu_to_le_16(cntr);
+	req.input_flow_ctx_id = rte_cpu_to_le_16(flow_ctx_id);
+	req.num_entries = rte_cpu_to_le_16(num_entries);
+	req.flags = rte_cpu_to_le_16(flags);
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_KONG(bp));
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
+
+	return 0;
+}
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index 1b7e35306..58b414d4f 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -88,6 +88,18 @@ int bnxt_hwrm_tf_message_direct(struct bnxt *bp,
 				void *resp_msg,
 				uint32_t resp_len);
 
+#define CFA_COUNTER_CFG_IN_COUNTER_TYPE_FC \
+	HWRM_CFA_COUNTER_CFG_INPUT_COUNTER_TYPE_FC
+
+enum bnxt_flow_dir {
+	BNXT_DIR_RX = 0,
+	BNXT_DIR_TX,
+	BNXT_DIR_LOOPBACK,
+	BNXT_DIR_MAX
+};
+
+#define BNXT_CTX_VAL_INVAL	0xFFFF
+
 int bnxt_hwrm_cfa_l2_clear_rx_mask(struct bnxt *bp,
 				   struct bnxt_vnic_info *vnic);
 int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp, struct bnxt_vnic_info *vnic,
@@ -248,4 +260,14 @@ int bnxt_hwrm_fw_reset(struct bnxt *bp);
 int bnxt_hwrm_port_ts_query(struct bnxt *bp, uint8_t path,
 			    uint64_t *timestamp);
 int bnxt_hwrm_cfa_adv_flow_mgmt_qcaps(struct bnxt *bp);
+int bnxt_hwrm_cfa_counter_qcaps(struct bnxt *bp, uint16_t *max_fc);
+int bnxt_hwrm_ctx_rgtr(struct bnxt *bp, rte_iova_t dma_addr, uint16_t *ctx_id);
+int bnxt_hwrm_ctx_unrgtr(struct bnxt *bp, uint16_t ctx_id);
+int bnxt_hwrm_cfa_counter_cfg(struct bnxt *bp, enum bnxt_flow_dir dir,
+			      uint16_t cntr, uint16_t ctx_id,
+			      uint32_t num_entries, bool enable);
+int bnxt_hwrm_cfa_counter_qstats(struct bnxt *bp,
+				 enum bnxt_flow_dir dir,
+				 uint16_t cntr,
+				 uint16_t num_entries);
 #endif
diff --git a/drivers/net/bnxt/bnxt_stats.c b/drivers/net/bnxt/bnxt_stats.c
index 6afd11adb..1d3be16f8 100644
--- a/drivers/net/bnxt/bnxt_stats.c
+++ b/drivers/net/bnxt/bnxt_stats.c
@@ -10,10 +10,12 @@
 
 #include "bnxt.h"
 #include "bnxt_cpr.h"
+#include "bnxt_filter.h"
 #include "bnxt_hwrm.h"
 #include "bnxt_rxq.h"
 #include "bnxt_stats.h"
 #include "bnxt_txq.h"
+#include "bnxt_vnic.h"
 #include "hsi_struct_def_dpdk.h"
 
 static const struct bnxt_xstats_name_off bnxt_rx_stats_strings[] = {
@@ -611,7 +613,9 @@ int bnxt_dev_xstats_get_op(struct rte_eth_dev *eth_dev,
 		RTE_DIM(bnxt_tx_stats_strings) +
 		RTE_DIM(bnxt_func_stats_strings) +
 		RTE_DIM(bnxt_rx_ext_stats_strings) +
-		RTE_DIM(bnxt_tx_ext_stats_strings);
+		RTE_DIM(bnxt_tx_ext_stats_strings) +
+		bnxt_flow_stats_cnt(bp);
+
 	stat_count = count;
 
 	if (n < count)
@@ -660,24 +664,77 @@ int bnxt_dev_xstats_get_op(struct rte_eth_dev *eth_dev,
 		xstats[count].value = rte_le_to_cpu_64
 					(*(uint64_t *)((char *)tx_stats_ext +
 					 bnxt_tx_ext_stats_strings[i].offset));
-
 		count++;
 	}
 
+	if (bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_COUNTERS &&
+	    bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_MGMT &&
+	    bp->flow_xstat) {
+		int j;
+
+		i = 0;
+		for (j = 0; j < bp->max_vnics; j++) {
+			struct bnxt_filter_info *filter;
+			struct bnxt_vnic_info *vnic;
+			struct rte_flow *flow;
+
+			vnic = &bp->vnic_info[j];
+			if (vnic && vnic->fw_vnic_id == INVALID_VNIC_ID)
+				continue;
+
+			if (STAILQ_EMPTY(&vnic->flow_list))
+				continue;
+
+			STAILQ_FOREACH(flow, &vnic->flow_list, next) {
+				if (!flow || !flow->filter)
+					continue;
+
+				filter = flow->filter;
+				xstats[count].id = count;
+				xstats[count].value =
+					filter->hw_stats.bytes;
+				count++;
+				xstats[count].id = count;
+				xstats[count].value =
+					filter->hw_stats.packets;
+				count++;
+				if (++i > bp->max_l2_ctx)
+					break;
+			}
+			if (i > bp->max_l2_ctx)
+				break;
+		}
+	}
+
 	return stat_count;
 }
 
+int bnxt_flow_stats_cnt(struct bnxt *bp)
+{
+	if (bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_COUNTERS &&
+	    bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_MGMT &&
+	    bp->flow_xstat) {
+		struct bnxt_xstats_name_off flow_bytes[bp->max_l2_ctx];
+		struct bnxt_xstats_name_off flow_pkts[bp->max_l2_ctx];
+
+		return RTE_DIM(flow_bytes) + RTE_DIM(flow_pkts);
+	}
+
+	return 0;
+}
+
 int bnxt_dev_xstats_get_names_op(struct rte_eth_dev *eth_dev,
-				 struct rte_eth_xstat_name *xstats_names,
-				 __rte_unused unsigned int limit)
+		struct rte_eth_xstat_name *xstats_names,
+		__rte_unused unsigned int limit)
 {
+	struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
 	const unsigned int stat_cnt = RTE_DIM(bnxt_rx_stats_strings) +
 				RTE_DIM(bnxt_tx_stats_strings) +
 				RTE_DIM(bnxt_func_stats_strings) +
 				RTE_DIM(bnxt_rx_ext_stats_strings) +
-				RTE_DIM(bnxt_tx_ext_stats_strings);
-	struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
-	unsigned int i, count;
+				RTE_DIM(bnxt_tx_ext_stats_strings) +
+				bnxt_flow_stats_cnt(bp);
+	unsigned int i, count = 0;
 	int rc;
 
 	rc = is_bnxt_in_error(bp);
@@ -724,7 +781,26 @@ int bnxt_dev_xstats_get_names_op(struct rte_eth_dev *eth_dev,
 			count++;
 		}
 
+		if (bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_COUNTERS &&
+		    bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_MGMT &&
+		    bp->flow_xstat) {
+			for (i = 0; i < bp->max_l2_ctx; i++) {
+				char buf[RTE_ETH_XSTATS_NAME_SIZE];
+
+				sprintf(buf, "flow_%d_bytes", i);
+				strlcpy(xstats_names[count].name, buf,
+					sizeof(xstats_names[count].name));
+				count++;
+
+				sprintf(buf, "flow_%d_packets", i);
+				strlcpy(xstats_names[count].name, buf,
+					sizeof(xstats_names[count].name));
+
+				count++;
+			}
+		}
 	}
+
 	return stat_cnt;
 }
 
@@ -754,12 +830,13 @@ int bnxt_dev_xstats_reset_op(struct rte_eth_dev *eth_dev)
 int bnxt_dev_xstats_get_by_id_op(struct rte_eth_dev *dev, const uint64_t *ids,
 		uint64_t *values, unsigned int limit)
 {
+	struct bnxt *bp = dev->data->dev_private;
 	const unsigned int stat_cnt = RTE_DIM(bnxt_rx_stats_strings) +
 				RTE_DIM(bnxt_tx_stats_strings) +
 				RTE_DIM(bnxt_func_stats_strings) +
 				RTE_DIM(bnxt_rx_ext_stats_strings) +
-				RTE_DIM(bnxt_tx_ext_stats_strings);
-	struct bnxt *bp = dev->data->dev_private;
+				RTE_DIM(bnxt_tx_ext_stats_strings) +
+				bnxt_flow_stats_cnt(bp);
 	struct rte_eth_xstat xstats[stat_cnt];
 	uint64_t values_copy[stat_cnt];
 	uint16_t i;
@@ -787,13 +864,14 @@ int bnxt_dev_xstats_get_names_by_id_op(struct rte_eth_dev *dev,
 				struct rte_eth_xstat_name *xstats_names,
 				const uint64_t *ids, unsigned int limit)
 {
+	struct bnxt *bp = dev->data->dev_private;
 	const unsigned int stat_cnt = RTE_DIM(bnxt_rx_stats_strings) +
 				RTE_DIM(bnxt_tx_stats_strings) +
 				RTE_DIM(bnxt_func_stats_strings) +
 				RTE_DIM(bnxt_rx_ext_stats_strings) +
-				RTE_DIM(bnxt_tx_ext_stats_strings);
+				RTE_DIM(bnxt_tx_ext_stats_strings) +
+				bnxt_flow_stats_cnt(bp);
 	struct rte_eth_xstat_name xstats_names_copy[stat_cnt];
-	struct bnxt *bp = dev->data->dev_private;
 	uint16_t i;
 	int rc;
 
@@ -817,3 +895,135 @@ int bnxt_dev_xstats_get_names_by_id_op(struct rte_eth_dev *dev,
 	}
 	return stat_cnt;
 }
+
+/* Update the input context memory with the flow counter IDs
+ * of the flows that we are interested in.
+ * Also, update the output tables with the current local values
+ * since that is what will be used by FW to accumulate
+ */
+static void bnxt_update_fc_pre_qstat(uint32_t *in_tbl,
+				     uint64_t *out_tbl,
+				     struct bnxt_filter_info *filter,
+				     uint32_t *ptbl_cnt)
+{
+	uint32_t in_tbl_cnt = *ptbl_cnt;
+
+	in_tbl[in_tbl_cnt] = filter->flow_id;
+	out_tbl[2 * in_tbl_cnt] = filter->hw_stats.packets;
+	out_tbl[2 * in_tbl_cnt + 1] = filter->hw_stats.bytes;
+	in_tbl_cnt++;
+	*ptbl_cnt = in_tbl_cnt;
+}
+
+/* Post issuing counter_qstats cmd, update the driver's local stat
+ * entries with the values DMA-ed by FW in the output table
+ */
+static void bnxt_update_fc_post_qstat(struct bnxt_filter_info *filter,
+				      uint64_t *out_tbl,
+				      uint32_t out_tbl_idx)
+{
+	filter->hw_stats.packets = out_tbl[2 * out_tbl_idx];
+	filter->hw_stats.bytes = out_tbl[(2 * out_tbl_idx) + 1];
+}
+
+static int bnxt_update_fc_tbl(struct bnxt *bp, uint16_t ctr,
+			      struct bnxt_filter_info *en_tbl[],
+			      uint16_t in_flow_cnt)
+{
+	uint32_t *in_rx_tbl;
+	uint64_t *out_rx_tbl;
+	uint32_t in_rx_tbl_cnt = 0;
+	uint32_t out_rx_tbl_cnt = 0;
+	int i, rc = 0;
+
+	in_rx_tbl = (uint32_t *)bp->rx_fc_in_tbl.va;
+	out_rx_tbl = (uint64_t *)bp->rx_fc_out_tbl.va;
+
+	for (i = 0; i < in_flow_cnt; i++) {
+		if (!en_tbl[i])
+			continue;
+
+		/* Currently only ingress/Rx flows are supported anyway. */
+		bnxt_update_fc_pre_qstat(in_rx_tbl, out_rx_tbl,
+					 en_tbl[i], &in_rx_tbl_cnt);
+	}
+
+	/* Currently only ingress/Rx flows are supported */
+	if (in_rx_tbl_cnt) {
+		rc = bnxt_hwrm_cfa_counter_qstats(bp, BNXT_DIR_RX, ctr,
+						  in_rx_tbl_cnt);
+		if (rc)
+			return rc;
+	}
+
+	for (i = 0; i < in_flow_cnt; i++) {
+		if (!en_tbl[i])
+			continue;
+
+		/* Currently only ingress/Rx flows are supported */
+		bnxt_update_fc_post_qstat(en_tbl[i], out_rx_tbl,
+					  out_rx_tbl_cnt);
+		out_rx_tbl_cnt++;
+	}
+
+	return rc;
+}
+
+/* Walks through the list which has all the flows
+ * requesting for explicit flow counters.
+ */
+int bnxt_flow_stats_req(struct bnxt *bp)
+{
+	int i;
+	int rc = 0;
+	struct rte_flow *flow;
+	uint16_t in_flow_tbl_cnt = 0;
+	struct bnxt_vnic_info *vnic = NULL;
+	struct bnxt_filter_info *valid_en_tbl[bp->max_fc];
+	uint16_t counter_type = CFA_COUNTER_CFG_IN_COUNTER_TYPE_FC;
+
+	bnxt_acquire_flow_lock(bp);
+	for (i = 0; i < bp->max_vnics; i++) {
+		vnic = &bp->vnic_info[i];
+		if (vnic && vnic->fw_vnic_id == INVALID_VNIC_ID)
+			continue;
+
+		if (STAILQ_EMPTY(&vnic->flow_list))
+			continue;
+
+		STAILQ_FOREACH(flow, &vnic->flow_list, next) {
+			if (!flow || !flow->filter)
+				continue;
+
+			valid_en_tbl[in_flow_tbl_cnt++] = flow->filter;
+			if (in_flow_tbl_cnt >= bp->max_fc) {
+				rc = bnxt_update_fc_tbl(bp, counter_type,
+							valid_en_tbl,
+							in_flow_tbl_cnt);
+				if (rc)
+					goto err;
+				in_flow_tbl_cnt = 0;
+				continue;
+			}
+		}
+	}
+
+	if (!in_flow_tbl_cnt)
+		goto out;
+
+	rc = bnxt_update_fc_tbl(bp, counter_type, valid_en_tbl,
+				in_flow_tbl_cnt);
+	if (!rc) {
+		bnxt_release_flow_lock(bp);
+		return 0;
+	}
+
+err:
+	/* If cmd fails once, no need of
+	 * invoking again every second
+	 */
+	bnxt_release_flow_lock(bp);
+	bnxt_cancel_fc_thread(bp);
+out:
+	return rc;
+}
diff --git a/drivers/net/bnxt/hsi_struct_def_dpdk.h b/drivers/net/bnxt/hsi_struct_def_dpdk.h
index cde96e784..608eaa2f4 100644
--- a/drivers/net/bnxt/hsi_struct_def_dpdk.h
+++ b/drivers/net/bnxt/hsi_struct_def_dpdk.h
@@ -38338,4 +38338,288 @@ struct hwrm_port_ts_query_output {
 	uint8_t		valid;
 } __attribute__((packed));
 
+/**************************
+ * hwrm_cfa_counter_qcaps *
+ **************************/
+
+
+/* hwrm_cfa_counter_qcaps_input (size:128b/16B) */
+struct hwrm_cfa_counter_qcaps_input {
+	/* The HWRM command request type. */
+	uint16_t	req_type;
+	/*
+	 * The completion ring to send the completion event on. This should
+	 * be the NQ ID returned from the `nq_alloc` HWRM command.
+	 */
+	uint16_t	cmpl_ring;
+	/*
+	 * The sequence ID is used by the driver for tracking multiple
+	 * commands. This ID is treated as opaque data by the firmware and
+	 * the value is returned in the `hwrm_resp_hdr` upon completion.
+	 */
+	uint16_t	seq_id;
+	/*
+	 * The target ID of the command:
+	 * * 0x0-0xFFF8 - The function ID
+	 * * 0xFFF8-0xFFFC, 0xFFFE - Reserved for internal processors
+	 * * 0xFFFD - Reserved for user-space HWRM interface
+	 * * 0xFFFF - HWRM
+	 */
+	uint16_t	target_id;
+	/*
+	 * A physical address pointer pointing to a host buffer that the
+	 * command's response data will be written. This can be either a host
+	 * physical address (HPA) or a guest physical address (GPA) and must
+	 * point to a physically contiguous block of memory.
+	 */
+	uint64_t	resp_addr;
+} __attribute__((packed));
+
+/* hwrm_cfa_counter_qcaps_output (size:576b/72B) */
+struct hwrm_cfa_counter_qcaps_output {
+	/* The specific error status for the command. */
+	uint16_t	error_code;
+	/* The HWRM command request type. */
+	uint16_t	req_type;
+	/* The sequence ID from the original command. */
+	uint16_t	seq_id;
+	/* The length of the response data in number of bytes. */
+	uint16_t	resp_len;
+	uint32_t	flags;
+	/* Enumeration denoting the supported CFA counter format. */
+	#define HWRM_CFA_COUNTER_QCAPS_OUTPUT_FLAGS_COUNTER_FORMAT \
+		UINT32_C(0x1)
+	/* CFA counter types are not supported. */
+	#define HWRM_CFA_COUNTER_QCAPS_OUTPUT_FLAGS_COUNTER_FORMAT_NONE \
+		UINT32_C(0x0)
+	/* 64-bit packet counters followed by 64-bit byte counters format. */
+	#define HWRM_CFA_COUNTER_QCAPS_OUTPUT_FLAGS_COUNTER_FORMAT_64_BIT \
+		UINT32_C(0x1)
+	#define HWRM_CFA_COUNTER_QCAPS_OUTPUT_FLAGS_COUNTER_FORMAT_LAST \
+		HWRM_CFA_COUNTER_QCAPS_OUTPUT_FLAGS_COUNTER_FORMAT_64_BIT
+	uint32_t	unused_0;
+	/* Minimum guaranteed number of flow counters supported for this function, in RX direction. */
+	uint32_t	min_rx_fc;
+	/* Maximum non-guaranteed number of flow counters supported for this function, in RX direction. */
+	uint32_t	max_rx_fc;
+	/* Minimum guaranteed number of flow counters supported for this function, in TX direction. */
+	uint32_t	min_tx_fc;
+	/* Maximum non-guaranteed number of flow counters supported for this function, in TX direction. */
+	uint32_t	max_tx_fc;
+	/* Minimum guaranteed number of extension flow counters supported for this function, in RX direction. */
+	uint32_t	min_rx_efc;
+	/* Maximum non-guaranteed number of extension flow counters supported for this function, in RX direction. */
+	uint32_t	max_rx_efc;
+	/* Minimum guaranteed number of extension flow counters supported for this function, in TX direction. */
+	uint32_t	min_tx_efc;
+	/* Maximum non-guaranteed number of extension flow counters supported for this function, in TX direction. */
+	uint32_t	max_tx_efc;
+	/* Minimum guaranteed number of meter drop counters supported for this function, in RX direction. */
+	uint32_t	min_rx_mdc;
+	/* Maximum non-guaranteed number of meter drop counters supported for this function, in RX direction. */
+	uint32_t	max_rx_mdc;
+	/* Minimum guaranteed number of meter drop counters supported for this function, in TX direction. */
+	uint32_t	min_tx_mdc;
+	/* Maximum non-guaranteed number of meter drop counters supported for this function, in TX direction. */
+	uint32_t	max_tx_mdc;
+	/* Maximum guaranteed number of flow counters which can be used during flow alloc. */
+	uint32_t	max_flow_alloc_fc;
+	uint8_t	unused_1[3];
+	/*
+	 * This field is used in Output records to indicate that the output
+	 * is completely written to RAM.  This field should be read as '1'
+	 * to indicate that the output has been completely written.
+	 * When writing a command completion or response to an internal processor,
+	 * the order of writes has to be such that this field is written last.
+	 */
+	uint8_t	valid;
+} __attribute__((packed));
+
+/************************
+ * hwrm_cfa_counter_cfg *
+ ************************/
+
+
+/* hwrm_cfa_counter_cfg_input (size:256b/32B) */
+struct hwrm_cfa_counter_cfg_input {
+	/* The HWRM command request type. */
+	uint16_t	req_type;
+	/*
+	 * The completion ring to send the completion event on. This should
+	 * be the NQ ID returned from the `nq_alloc` HWRM command.
+	 */
+	uint16_t	cmpl_ring;
+	/*
+	 * The sequence ID is used by the driver for tracking multiple
+	 * commands. This ID is treated as opaque data by the firmware and
+	 * the value is returned in the `hwrm_resp_hdr` upon completion.
+	 */
+	uint16_t	seq_id;
+	/*
+	 * The target ID of the command:
+	 * * 0x0-0xFFF8 - The function ID
+	 * * 0xFFF8-0xFFFC, 0xFFFE - Reserved for internal processors
+	 * * 0xFFFD - Reserved for user-space HWRM interface
+	 * * 0xFFFF - HWRM
+	 */
+	uint16_t	target_id;
+	/*
+	 * A physical address pointer pointing to a host buffer that the
+	 * command's response data will be written. This can be either a host
+	 * physical address (HPA) or a guest physical address (GPA) and must
+	 * point to a physically contiguous block of memory.
+	 */
+	uint64_t	resp_addr;
+	uint16_t	flags;
+	/* Enumeration denoting the configuration mode. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_CFG_MODE \
+		UINT32_C(0x1)
+	/* Disable the configuration mode. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_CFG_MODE_DISABLE \
+		UINT32_C(0x0)
+	/* Enable the configuration mode. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_CFG_MODE_ENABLE \
+		UINT32_C(0x1)
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_CFG_MODE_LAST \
+		HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_CFG_MODE_ENABLE
+	/* Enumeration denoting the RX, TX type of the resource. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_PATH \
+		UINT32_C(0x2)
+	/* Tx path. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_PATH_TX \
+		(UINT32_C(0x0) << 1)
+	/* Rx path. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_PATH_RX \
+		(UINT32_C(0x1) << 1)
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_PATH_LAST \
+		HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_PATH_RX
+	/* Enumeration denoting the data transfer mode. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_MASK \
+		UINT32_C(0xc)
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_SFT       2
+	/* Push mode. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_PUSH \
+		(UINT32_C(0x0) << 2)
+	/* Pull mode. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_PULL \
+		(UINT32_C(0x1) << 2)
+	/* Pull on async update. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_PULL_ASYNC \
+		(UINT32_C(0x2) << 2)
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_LAST \
+		HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_PULL_ASYNC
+	uint16_t	counter_type;
+	/* Flow counters. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_COUNTER_TYPE_FC  UINT32_C(0x0)
+	/* Extended flow counters. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_COUNTER_TYPE_EFC UINT32_C(0x1)
+	/* Meter drop counters. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_COUNTER_TYPE_MDC UINT32_C(0x2)
+	#define HWRM_CFA_COUNTER_CFG_INPUT_COUNTER_TYPE_LAST \
+		HWRM_CFA_COUNTER_CFG_INPUT_COUNTER_TYPE_MDC
+	/* Ctx memory handle to be used for the counter. */
+	uint16_t	ctx_id;
+	/* Counter update cadence hint (only in Push mode). */
+	uint16_t	update_tmr_ms;
+	/* Total number of entries. */
+	uint32_t	num_entries;
+	uint32_t	unused_0;
+} __attribute__((packed));
+
+/* hwrm_cfa_counter_cfg_output (size:128b/16B) */
+struct hwrm_cfa_counter_cfg_output {
+	/* The specific error status for the command. */
+	uint16_t	error_code;
+	/* The HWRM command request type. */
+	uint16_t	req_type;
+	/* The sequence ID from the original command. */
+	uint16_t	seq_id;
+	/* The length of the response data in number of bytes. */
+	uint16_t	resp_len;
+	uint8_t	unused_0[7];
+	/*
+	 * This field is used in Output records to indicate that the output
+	 * is completely written to RAM.  This field should be read as '1'
+	 * to indicate that the output has been completely written.
+	 * When writing a command completion or response to an internal processor,
+	 * the order of writes has to be such that this field is written last.
+	 */
+	uint8_t	valid;
+} __attribute__((packed));
+
+/***************************
+ * hwrm_cfa_counter_qstats *
+ ***************************/
+
+
+/* hwrm_cfa_counter_qstats_input (size:320b/40B) */
+struct hwrm_cfa_counter_qstats_input {
+	/* The HWRM command request type. */
+	uint16_t	req_type;
+	/*
+	 * The completion ring to send the completion event on. This should
+	 * be the NQ ID returned from the `nq_alloc` HWRM command.
+	 */
+	uint16_t	cmpl_ring;
+	/*
+	 * The sequence ID is used by the driver for tracking multiple
+	 * commands. This ID is treated as opaque data by the firmware and
+	 * the value is returned in the `hwrm_resp_hdr` upon completion.
+	 */
+	uint16_t	seq_id;
+	/*
+	 * The target ID of the command:
+	 * * 0x0-0xFFF8 - The function ID
+	 * * 0xFFF8-0xFFFC, 0xFFFE - Reserved for internal processors
+	 * * 0xFFFD - Reserved for user-space HWRM interface
+	 * * 0xFFFF - HWRM
+	 */
+	uint16_t	target_id;
+	/*
+	 * A physical address pointer pointing to a host buffer that the
+	 * command's response data will be written. This can be either a host
+	 * physical address (HPA) or a guest physical address (GPA) and must
+	 * point to a physically contiguous block of memory.
+	 */
+	uint64_t	resp_addr;
+	uint16_t	flags;
+	/* Enumeration denoting the RX, TX type of the resource. */
+	#define HWRM_CFA_COUNTER_QSTATS_INPUT_FLAGS_PATH     UINT32_C(0x1)
+	/* Tx path. */
+	#define HWRM_CFA_COUNTER_QSTATS_INPUT_FLAGS_PATH_TX    UINT32_C(0x0)
+	/* Rx path. */
+	#define HWRM_CFA_COUNTER_QSTATS_INPUT_FLAGS_PATH_RX    UINT32_C(0x1)
+	#define HWRM_CFA_COUNTER_QSTATS_INPUT_FLAGS_PATH_LAST \
+		HWRM_CFA_COUNTER_QSTATS_INPUT_FLAGS_PATH_RX
+	uint16_t	counter_type;
+	uint16_t	input_flow_ctx_id;
+	uint16_t	num_entries;
+	uint16_t	delta_time_ms;
+	uint16_t	meter_instance_id;
+	uint16_t	mdc_ctx_id;
+	uint8_t	unused_0[2];
+	uint64_t	expected_count;
+} __attribute__((packed));
+
+/* hwrm_cfa_counter_qstats_output (size:128b/16B) */
+struct hwrm_cfa_counter_qstats_output {
+	/* The specific error status for the command. */
+	uint16_t	error_code;
+	/* The HWRM command request type. */
+	uint16_t	req_type;
+	/* The sequence ID from the original command. */
+	uint16_t	seq_id;
+	/* The length of the response data in number of bytes. */
+	uint16_t	resp_len;
+	uint8_t	unused_0[7];
+	/*
+	 * This field is used in Output records to indicate that the output
+	 * is completely written to RAM.  This field should be read as '1'
+	 * to indicate that the output has been completely written.
+	 * When writing a command completion or response to an internal processor,
+	 * the order of writes has to be such that this field is written last.
+	 */
+	uint8_t	valid;
+} __attribute__((packed));
+
 #endif /* _HSI_STRUCT_DEF_DPDK_H_ */
-- 
2.21.1 (Apple Git-122.3)


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [dpdk-dev] [PATCH v2] net/bnxt: add flow stats in extended stats
  2020-04-17  5:37 [dpdk-dev] [PATCH] net/bnxt: add flow stats in extended stats Ajit Khaparde
@ 2020-04-17  5:49 ` Ajit Khaparde
  2020-04-17 14:49   ` Ajit Khaparde
  0 siblings, 1 reply; 4+ messages in thread
From: Ajit Khaparde @ 2020-04-17  5:49 UTC (permalink / raw)
  To: dev; +Cc: Somnath Kotur, Sriharsha Basavapatna

From: Somnath Kotur <somnath.kotur@broadcom.com>

This patch allows to display flow stats in extended stats.
To do this, DMA-able memory is registered with the FW during device
initialization. Then the driver uses an alarm thread to query the
per flow stats using the HWRM_CFA_COUNTER_QSTATS HWRM command at
regular intervals and stores it locally which will be displayed
when the application queries the xstats.
The DMA-able memory is unregistered during driver cleanup.
This functionality can be enabled using the flow-xstat devarg and
will be disabled by default. The intention behind this is to allow
stats to be displayed for all the flows in one shot instead of
querying one at a time.

Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
---
v1->v2:
Updating patch authors.
---
 drivers/net/bnxt/bnxt.h                |  24 ++-
 drivers/net/bnxt/bnxt_ethdev.c         | 263 +++++++++++++++++++++++
 drivers/net/bnxt/bnxt_filter.h         |   6 +
 drivers/net/bnxt/bnxt_flow.c           |  57 +++++
 drivers/net/bnxt/bnxt_hwrm.c           | 169 ++++++++++++++-
 drivers/net/bnxt/bnxt_hwrm.h           |  22 ++
 drivers/net/bnxt/bnxt_stats.c          | 232 +++++++++++++++++++-
 drivers/net/bnxt/hsi_struct_def_dpdk.h | 284 +++++++++++++++++++++++++
 8 files changed, 1043 insertions(+), 14 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index a70cdff07..00a4d0b3e 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -433,6 +433,13 @@ struct bnxt_ctx_mem_info {
 	struct bnxt_ctx_pg_info *tqm_mem[BNXT_MAX_TC_Q];
 };
 
+struct bnxt_ctx_mem_buf_info {
+	void		*va;
+	rte_iova_t	dma;
+	uint16_t	ctx_id;
+	size_t		size;
+};
+
 /* Maximum Firmware Reset bail out value in milliseconds */
 #define BNXT_MAX_FW_RESET_TIMEOUT	6000
 /* Minimum time required for the firmware readiness in milliseconds */
@@ -530,7 +537,7 @@ struct bnxt {
 #define BNXT_FLAG_NEW_RM			BIT(20)
 #define BNXT_FLAG_NPAR_PF			BIT(21)
 #define BNXT_FLAG_FW_CAP_ONE_STEP_TX_TS		BIT(22)
-#define BNXT_FLAG_ADV_FLOW_MGMT			BIT(23)
+#define BNXT_FLAG_FC_THREAD			BIT(23)
 #define BNXT_FLAG_RX_VECTOR_PKT_MODE		BIT(24)
 #define BNXT_PF(bp)		(!((bp)->flags & BNXT_FLAG_VF))
 #define BNXT_VF(bp)		((bp)->flags & BNXT_FLAG_VF)
@@ -550,6 +557,8 @@ struct bnxt {
 #define BNXT_FW_CAP_IF_CHANGE		BIT(1)
 #define BNXT_FW_CAP_ERROR_RECOVERY	BIT(2)
 #define BNXT_FW_CAP_ERR_RECOVER_RELOAD	BIT(3)
+#define BNXT_FW_CAP_ADV_FLOW_MGMT	BIT(5)
+#define BNXT_FW_CAP_ADV_FLOW_COUNTERS	BIT(6)
 
 	uint32_t		flow_flags;
 #define BNXT_FLOW_FLAG_L2_HDR_SRC_FILTER_EN	BIT(0)
@@ -690,8 +699,17 @@ struct bnxt {
 	struct tf		tfp;
 	struct bnxt_ulp_context	ulp_ctx;
 	uint8_t			truflow;
+	uint16_t                max_fc;
+	struct bnxt_ctx_mem_buf_info rx_fc_in_tbl;
+	struct bnxt_ctx_mem_buf_info rx_fc_out_tbl;
+	struct bnxt_ctx_mem_buf_info tx_fc_in_tbl;
+	struct bnxt_ctx_mem_buf_info tx_fc_out_tbl;
+	uint16_t		flow_count;
+	uint8_t			flow_xstat;
 };
 
+#define BNXT_FC_TIMER	1 /* Timer freq in Sec Flow Counters */
+
 int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu);
 int bnxt_link_update(struct rte_eth_dev *eth_dev, int wait_to_complete,
 		     bool exp_link_status);
@@ -738,4 +756,8 @@ void bnxt_ulp_deinit(struct bnxt *bp);
 uint16_t bnxt_get_vnic_id(uint16_t port);
 uint16_t bnxt_get_svif(uint16_t port_id, bool func_svif);
 
+void bnxt_cancel_fc_thread(struct bnxt *bp);
+void bnxt_flow_cnt_alarm_cb(void *arg);
+int bnxt_flow_stats_req(struct bnxt *bp);
+int bnxt_flow_stats_cnt(struct bnxt *bp);
 #endif
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 5d5b8e095..bd2c3fcb6 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -128,8 +128,10 @@ static const struct rte_pci_id bnxt_pci_id_map[] = {
 				     DEV_RX_OFFLOAD_RSS_HASH)
 
 #define BNXT_DEVARG_TRUFLOW	"host-based-truflow"
+#define BNXT_DEVARG_FLOW_XSTAT	"flow-xstat"
 static const char *const bnxt_dev_args[] = {
 	BNXT_DEVARG_TRUFLOW,
+	BNXT_DEVARG_FLOW_XSTAT,
 	NULL
 };
 
@@ -139,6 +141,12 @@ static const char *const bnxt_dev_args[] = {
  */
 #define	BNXT_DEVARG_TRUFLOW_INVALID(truflow)	((truflow) > 1)
 
+/*
+ * flow_xstat == false to disable the feature
+ * flow_xstat == true to enable the feature
+ */
+#define	BNXT_DEVARG_FLOW_XSTAT_INVALID(flow_xstat)	((flow_xstat) > 1)
+
 static int bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask);
 static void bnxt_print_link_info(struct rte_eth_dev *eth_dev);
 static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev);
@@ -333,6 +341,150 @@ static int bnxt_setup_one_vnic(struct bnxt *bp, uint16_t vnic_id)
 	return rc;
 }
 
+static int bnxt_register_fc_ctx_mem(struct bnxt *bp)
+{
+	int rc = 0;
+
+	rc = bnxt_hwrm_ctx_rgtr(bp, bp->rx_fc_in_tbl.dma,
+				&bp->rx_fc_in_tbl.ctx_id);
+	if (rc)
+		return rc;
+
+	PMD_DRV_LOG(DEBUG,
+		    "rx_fc_in_tbl.va = %p rx_fc_in_tbl.dma = %p"
+		    " rx_fc_in_tbl.ctx_id = %d\n",
+		    bp->rx_fc_in_tbl.va, (void *)bp->rx_fc_in_tbl.dma,
+		    bp->rx_fc_in_tbl.ctx_id);
+
+	rc = bnxt_hwrm_ctx_rgtr(bp, bp->rx_fc_out_tbl.dma,
+				&bp->rx_fc_out_tbl.ctx_id);
+	if (rc)
+		return rc;
+
+	PMD_DRV_LOG(DEBUG,
+		    "rx_fc_out_tbl.va = %p rx_fc_out_tbl.dma = %p"
+		    " rx_fc_out_tbl.ctx_id = %d\n",
+		    bp->rx_fc_out_tbl.va, (void *)bp->rx_fc_out_tbl.dma,
+		    bp->rx_fc_out_tbl.ctx_id);
+
+	rc = bnxt_hwrm_ctx_rgtr(bp, bp->tx_fc_in_tbl.dma,
+				&bp->tx_fc_in_tbl.ctx_id);
+	if (rc)
+		return rc;
+
+	PMD_DRV_LOG(DEBUG,
+		    "tx_fc_in_tbl.va = %p tx_fc_in_tbl.dma = %p"
+		    " tx_fc_in_tbl.ctx_id = %d\n",
+		    bp->tx_fc_in_tbl.va, (void *)bp->tx_fc_in_tbl.dma,
+		    bp->tx_fc_in_tbl.ctx_id);
+
+	rc = bnxt_hwrm_ctx_rgtr(bp, bp->tx_fc_out_tbl.dma,
+				&bp->tx_fc_out_tbl.ctx_id);
+	if (rc)
+		return rc;
+
+	PMD_DRV_LOG(DEBUG,
+		    "tx_fc_out_tbl.va = %p tx_fc_out_tbl.dma = %p"
+		    " tx_fc_out_tbl.ctx_id = %d\n",
+		    bp->tx_fc_out_tbl.va, (void *)bp->tx_fc_out_tbl.dma,
+		    bp->tx_fc_out_tbl.ctx_id);
+
+	memset(bp->rx_fc_out_tbl.va, 0, bp->rx_fc_out_tbl.size);
+	rc = bnxt_hwrm_cfa_counter_cfg(bp, BNXT_DIR_RX,
+				       CFA_COUNTER_CFG_IN_COUNTER_TYPE_FC,
+				       bp->rx_fc_out_tbl.ctx_id,
+				       bp->max_fc,
+				       true);
+	if (rc)
+		return rc;
+
+	memset(bp->tx_fc_out_tbl.va, 0, bp->tx_fc_out_tbl.size);
+	rc = bnxt_hwrm_cfa_counter_cfg(bp, BNXT_DIR_TX,
+				       CFA_COUNTER_CFG_IN_COUNTER_TYPE_FC,
+				       bp->tx_fc_out_tbl.ctx_id,
+				       bp->max_fc,
+				       true);
+
+	return rc;
+}
+
+static int bnxt_alloc_ctx_mem_buf(char *type, size_t size,
+				  struct bnxt_ctx_mem_buf_info *ctx)
+{
+	if (!ctx)
+		return -EINVAL;
+
+	ctx->va = rte_zmalloc(type, size, 0);
+	if (ctx->va == NULL)
+		return -ENOMEM;
+	rte_mem_lock_page(ctx->va);
+	ctx->size = size;
+	ctx->dma = rte_mem_virt2iova(ctx->va);
+	if (ctx->dma == RTE_BAD_IOVA)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int bnxt_init_fc_ctx_mem(struct bnxt *bp)
+{
+	struct rte_pci_device *pdev = bp->pdev;
+	char type[RTE_MEMZONE_NAMESIZE];
+	uint16_t max_fc;
+	int rc = 0;
+
+	max_fc = bp->max_fc;
+
+	sprintf(type, "bnxt_rx_fc_in_" PCI_PRI_FMT, pdev->addr.domain,
+		pdev->addr.bus, pdev->addr.devid, pdev->addr.function);
+	/* 4 bytes for each counter-id */
+	rc = bnxt_alloc_ctx_mem_buf(type, max_fc * 4, &bp->rx_fc_in_tbl);
+	if (rc)
+		return rc;
+
+	sprintf(type, "bnxt_rx_fc_out_" PCI_PRI_FMT, pdev->addr.domain,
+		pdev->addr.bus, pdev->addr.devid, pdev->addr.function);
+	/* 16 bytes for each counter - 8 bytes pkt_count, 8 bytes byte_count */
+	rc = bnxt_alloc_ctx_mem_buf(type, max_fc * 16, &bp->rx_fc_out_tbl);
+	if (rc)
+		return rc;
+
+	sprintf(type, "bnxt_tx_fc_in_" PCI_PRI_FMT, pdev->addr.domain,
+		pdev->addr.bus, pdev->addr.devid, pdev->addr.function);
+	/* 4 bytes for each counter-id */
+	rc = bnxt_alloc_ctx_mem_buf(type, max_fc * 4, &bp->tx_fc_in_tbl);
+	if (rc)
+		return rc;
+
+	sprintf(type, "bnxt_tx_fc_out_" PCI_PRI_FMT, pdev->addr.domain,
+		pdev->addr.bus, pdev->addr.devid, pdev->addr.function);
+	/* 16 bytes for each counter - 8 bytes pkt_count, 8 bytes byte_count */
+	rc = bnxt_alloc_ctx_mem_buf(type, max_fc * 16, &bp->tx_fc_out_tbl);
+	if (rc)
+		return rc;
+
+	rc = bnxt_register_fc_ctx_mem(bp);
+
+	return rc;
+}
+
+static int bnxt_init_ctx_mem(struct bnxt *bp)
+{
+	int rc = 0;
+
+	if (!(bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_COUNTERS) ||
+	    !(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp)))
+		return 0;
+
+	rc = bnxt_hwrm_cfa_counter_qcaps(bp, &bp->max_fc);
+	if (rc)
+		return rc;
+
+	rc = bnxt_init_fc_ctx_mem(bp);
+
+	return rc;
+}
+
 static int bnxt_init_chip(struct bnxt *bp)
 {
 	struct rte_eth_link new;
@@ -1005,6 +1157,7 @@ static void bnxt_dev_close_op(struct rte_eth_dev *eth_dev)
 	/* cancel the recovery handler before remove dev */
 	rte_eal_alarm_cancel(bnxt_dev_reset_and_resume, (void *)bp);
 	rte_eal_alarm_cancel(bnxt_dev_recover, (void *)bp);
+	bnxt_cancel_fc_thread(bp);
 
 	if (eth_dev->data->dev_started)
 		bnxt_dev_stop_op(eth_dev);
@@ -4871,6 +5024,12 @@ static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev)
 	if (rc)
 		return rc;
 
+	rc = bnxt_init_ctx_mem(bp);
+	if (rc) {
+		PMD_DRV_LOG(ERR, "Failed to init adv_flow_counters\n");
+		return rc;
+	}
+
 	rc = bnxt_init_locks(bp);
 	if (rc)
 		return rc;
@@ -4913,6 +5072,41 @@ bnxt_parse_devarg_truflow(__rte_unused const char *key,
 	return 0;
 }
 
+static int
+bnxt_parse_devarg_flow_xstat(__rte_unused const char *key,
+			     const char *value, void *opaque_arg)
+{
+	struct bnxt *bp = opaque_arg;
+	unsigned long flow_xstat;
+	char *end = NULL;
+
+	if (!value || !opaque_arg) {
+		PMD_DRV_LOG(ERR,
+			    "Invalid parameter passed to flow_xstat devarg.\n");
+		return -EINVAL;
+	}
+
+	flow_xstat = strtoul(value, &end, 10);
+	if (end == NULL || *end != '\0' ||
+	    (flow_xstat == ULONG_MAX && errno == ERANGE)) {
+		PMD_DRV_LOG(ERR,
+			    "Invalid parameter passed to flow_xstat devarg.\n");
+		return -EINVAL;
+	}
+
+	if (BNXT_DEVARG_FLOW_XSTAT_INVALID(flow_xstat)) {
+		PMD_DRV_LOG(ERR,
+			    "Invalid value passed to flow_xstat devarg.\n");
+		return -EINVAL;
+	}
+
+	bp->flow_xstat = flow_xstat;
+	if (bp->flow_xstat)
+		PMD_DRV_LOG(INFO, "flow_xstat feature enabled.\n");
+
+	return 0;
+}
+
 static void
 bnxt_parse_dev_args(struct bnxt *bp, struct rte_devargs *devargs)
 {
@@ -4932,6 +5126,13 @@ bnxt_parse_dev_args(struct bnxt *bp, struct rte_devargs *devargs)
 	rte_kvargs_process(kvlist, BNXT_DEVARG_TRUFLOW,
 			   bnxt_parse_devarg_truflow, bp);
 
+	/*
+	 * Handler for "flow_xstat" devarg.
+	 * Invoked as for ex: "-w 0000:00:0d.0,flow_xstat=1”
+	 */
+	rte_kvargs_process(kvlist, BNXT_DEVARG_FLOW_XSTAT,
+			   bnxt_parse_devarg_flow_xstat, bp);
+
 	rte_kvargs_free(kvlist);
 }
 
@@ -5016,6 +5217,66 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
 	return rc;
 }
 
+
+static void bnxt_free_ctx_mem_buf(struct bnxt_ctx_mem_buf_info *ctx)
+{
+	if (!ctx)
+		return;
+
+	if (ctx->va)
+		rte_free(ctx->va);
+
+	ctx->va = NULL;
+	ctx->dma = RTE_BAD_IOVA;
+	ctx->ctx_id = BNXT_CTX_VAL_INVAL;
+}
+
+static void bnxt_unregister_fc_ctx_mem(struct bnxt *bp)
+{
+	bnxt_hwrm_cfa_counter_cfg(bp, BNXT_DIR_RX,
+				  CFA_COUNTER_CFG_IN_COUNTER_TYPE_FC,
+				  bp->rx_fc_out_tbl.ctx_id,
+				  bp->max_fc,
+				  false);
+
+	bnxt_hwrm_cfa_counter_cfg(bp, BNXT_DIR_TX,
+				  CFA_COUNTER_CFG_IN_COUNTER_TYPE_FC,
+				  bp->tx_fc_out_tbl.ctx_id,
+				  bp->max_fc,
+				  false);
+
+	if (bp->rx_fc_in_tbl.ctx_id != BNXT_CTX_VAL_INVAL)
+		bnxt_hwrm_ctx_unrgtr(bp, bp->rx_fc_in_tbl.ctx_id);
+	bp->rx_fc_in_tbl.ctx_id = BNXT_CTX_VAL_INVAL;
+
+	if (bp->rx_fc_out_tbl.ctx_id != BNXT_CTX_VAL_INVAL)
+		bnxt_hwrm_ctx_unrgtr(bp, bp->rx_fc_out_tbl.ctx_id);
+	bp->rx_fc_out_tbl.ctx_id = BNXT_CTX_VAL_INVAL;
+
+	if (bp->tx_fc_in_tbl.ctx_id != BNXT_CTX_VAL_INVAL)
+		bnxt_hwrm_ctx_unrgtr(bp, bp->tx_fc_in_tbl.ctx_id);
+	bp->tx_fc_in_tbl.ctx_id = BNXT_CTX_VAL_INVAL;
+
+	if (bp->tx_fc_out_tbl.ctx_id != BNXT_CTX_VAL_INVAL)
+		bnxt_hwrm_ctx_unrgtr(bp, bp->tx_fc_out_tbl.ctx_id);
+	bp->tx_fc_out_tbl.ctx_id = BNXT_CTX_VAL_INVAL;
+}
+
+static void bnxt_uninit_fc_ctx_mem(struct bnxt *bp)
+{
+	bnxt_unregister_fc_ctx_mem(bp);
+
+	bnxt_free_ctx_mem_buf(&bp->rx_fc_in_tbl);
+	bnxt_free_ctx_mem_buf(&bp->rx_fc_out_tbl);
+	bnxt_free_ctx_mem_buf(&bp->tx_fc_in_tbl);
+	bnxt_free_ctx_mem_buf(&bp->tx_fc_out_tbl);
+}
+
+static void bnxt_uninit_ctx_mem(struct bnxt *bp)
+{
+	bnxt_uninit_fc_ctx_mem(bp);
+}
+
 static void
 bnxt_uninit_locks(struct bnxt *bp)
 {
@@ -5043,6 +5304,8 @@ bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev)
 		}
 	}
 
+	bnxt_uninit_ctx_mem(bp);
+
 	bnxt_uninit_locks(bp);
 	rte_free(bp->ptp_cfg);
 	bp->ptp_cfg = NULL;
diff --git a/drivers/net/bnxt/bnxt_filter.h b/drivers/net/bnxt/bnxt_filter.h
index 8f8a4c13b..4b2b3cadc 100644
--- a/drivers/net/bnxt/bnxt_filter.h
+++ b/drivers/net/bnxt/bnxt_filter.h
@@ -25,6 +25,11 @@ struct bnxt;
 #define BNXT_FLOW_PARSE_INNER_FLAG		BIT(6)
 #define BNXT_FLOW_MARK_FLAG			BIT(7)
 
+struct bnxt_flow_stats {
+	uint64_t	packets;
+	uint64_t	bytes;
+};
+
 struct bnxt_filter_info {
 	STAILQ_ENTRY(bnxt_filter_info)	next;
 	uint32_t		flow_id;
@@ -84,6 +89,7 @@ struct bnxt_filter_info {
 	 */
 	struct			bnxt_vnic_info *vnic;
 	uint32_t		mark;
+	struct bnxt_flow_stats	hw_stats;
 };
 
 struct bnxt_filter_info *bnxt_alloc_filter(struct bnxt *bp);
diff --git a/drivers/net/bnxt/bnxt_flow.c b/drivers/net/bnxt/bnxt_flow.c
index 9fb6dbdd9..6c6da84c6 100644
--- a/drivers/net/bnxt/bnxt_flow.c
+++ b/drivers/net/bnxt/bnxt_flow.c
@@ -10,6 +10,7 @@
 #include <rte_flow.h>
 #include <rte_flow_driver.h>
 #include <rte_tailq.h>
+#include <rte_alarm.h>
 
 #include "bnxt.h"
 #include "bnxt_filter.h"
@@ -1627,6 +1628,51 @@ bnxt_match_filter(struct bnxt *bp, struct bnxt_filter_info *nf)
 	return 0;
 }
 
+static void
+bnxt_setup_flow_counter(struct bnxt *bp)
+{
+	if (bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_COUNTERS &&
+	    !(bp->flags & BNXT_FLAG_FC_THREAD)) {
+		rte_eal_alarm_set(US_PER_S * BNXT_FC_TIMER,
+				  bnxt_flow_cnt_alarm_cb,
+				  (void *)bp);
+		bp->flags |= BNXT_FLAG_FC_THREAD;
+	}
+}
+
+void bnxt_flow_cnt_alarm_cb(void *arg)
+{
+	int rc = 0;
+	struct bnxt *bp = arg;
+
+	if (!bp->rx_fc_out_tbl.va) {
+		PMD_DRV_LOG(ERR, "bp->rx_fc_out_tbl.va is NULL?\n");
+		bnxt_cancel_fc_thread(bp);
+		return;
+	}
+
+	if (!bp->flow_count) {
+		bnxt_cancel_fc_thread(bp);
+		return;
+	}
+
+	if (!bp->eth_dev->data->dev_started) {
+		bnxt_cancel_fc_thread(bp);
+		return;
+	}
+
+	rc = bnxt_flow_stats_req(bp);
+	if (rc) {
+		PMD_DRV_LOG(ERR, "Flow stat alarm not rescheduled.\n");
+		return;
+	}
+
+	rte_eal_alarm_set(US_PER_S * BNXT_FC_TIMER,
+			  bnxt_flow_cnt_alarm_cb,
+			  (void *)bp);
+}
+
+
 static struct rte_flow *
 bnxt_flow_create(struct rte_eth_dev *dev,
 		 const struct rte_flow_attr *attr,
@@ -1783,7 +1829,9 @@ bnxt_flow_create(struct rte_eth_dev *dev,
 			bp->mark_table[flow_id].valid = true;
 			bp->mark_table[flow_id].mark_id = filter->mark;
 		}
+		bp->flow_count++;
 		bnxt_release_flow_lock(bp);
+		bnxt_setup_flow_counter(bp);
 		return flow;
 	}
 
@@ -1903,6 +1951,7 @@ _bnxt_flow_destroy(struct bnxt *bp,
 		bnxt_free_filter(bp, filter);
 		STAILQ_REMOVE(&vnic->flow_list, flow, rte_flow, next);
 		rte_free(flow);
+		bp->flow_count--;
 
 		/* If this was the last flow associated with this vnic,
 		 * switch the queue back to RSS pool.
@@ -1955,6 +2004,12 @@ bnxt_flow_destroy(struct rte_eth_dev *dev,
 	return ret;
 }
 
+void bnxt_cancel_fc_thread(struct bnxt *bp)
+{
+	bp->flags &= ~BNXT_FLAG_FC_THREAD;
+	rte_eal_alarm_cancel(bnxt_flow_cnt_alarm_cb, (void *)bp);
+}
+
 static int
 bnxt_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error)
 {
@@ -1981,6 +2036,8 @@ bnxt_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error)
 				break;
 		}
 	}
+
+	bnxt_cancel_fc_thread(bp);
 	bnxt_release_flow_lock(bp);
 
 	return ret;
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index d435f6570..09a73286b 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -744,6 +744,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
 	} else {
 		bp->max_vnics = 1;
 	}
+	PMD_DRV_LOG(DEBUG, "Max l2_cntxts is %d vnics is %d\n",
+		    bp->max_l2_ctx, bp->max_vnics);
 	bp->max_stat_ctx = rte_le_to_cpu_16(resp->max_stat_ctx);
 	if (BNXT_PF(bp)) {
 		bp->pf.total_vnics = rte_le_to_cpu_16(resp->max_vnics);
@@ -1169,10 +1171,17 @@ int bnxt_hwrm_ver_get(struct bnxt *bp, uint32_t timeout)
 		PMD_DRV_LOG(DEBUG, "FW supports Trusted VFs\n");
 	if (dev_caps_cfg &
 	    HWRM_VER_GET_OUTPUT_DEV_CAPS_CFG_CFA_ADV_FLOW_MGNT_SUPPORTED) {
-		bp->flags |= BNXT_FLAG_ADV_FLOW_MGMT;
+		bp->fw_cap |= BNXT_FW_CAP_ADV_FLOW_MGMT;
 		PMD_DRV_LOG(DEBUG, "FW supports advanced flow management\n");
 	}
 
+	if (dev_caps_cfg &
+	    HWRM_VER_GET_OUTPUT_DEV_CAPS_CFG_ADV_FLOW_COUNTERS_SUPPORTED) {
+		PMD_DRV_LOG(DEBUG, "FW supports advanced flow counters\n");
+		bp->fw_cap |= BNXT_FW_CAP_ADV_FLOW_COUNTERS;
+	}
+
+
 error:
 	HWRM_UNLOCK();
 	return rc;
@@ -5216,7 +5225,7 @@ int bnxt_hwrm_cfa_adv_flow_mgmt_qcaps(struct bnxt *bp)
 	uint32_t flags = 0;
 	int rc = 0;
 
-	if (!(bp->flags & BNXT_FLAG_ADV_FLOW_MGMT))
+	if (!(bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_MGMT))
 		return rc;
 
 	if (!(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp))) {
@@ -5239,3 +5248,159 @@ int bnxt_hwrm_cfa_adv_flow_mgmt_qcaps(struct bnxt *bp)
 
 	return rc;
 }
+
+int bnxt_hwrm_cfa_counter_qcaps(struct bnxt *bp, uint16_t *max_fc)
+{
+	int rc = 0;
+
+	struct hwrm_cfa_counter_qcaps_input req = {0};
+	struct hwrm_cfa_counter_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
+
+	if (!(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp))) {
+		PMD_DRV_LOG(DEBUG,
+			    "Not a PF or trusted VF. Command not supported\n");
+		return 0;
+	}
+
+	HWRM_PREP(&req, HWRM_CFA_COUNTER_QCAPS, BNXT_USE_KONG(bp));
+	req.target_id = rte_cpu_to_le_16(bp->fw_fid);
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_KONG(bp));
+
+	HWRM_CHECK_RESULT();
+	if (max_fc)
+		*max_fc = rte_le_to_cpu_16(resp->max_rx_fc);
+	HWRM_UNLOCK();
+
+	PMD_DRV_LOG(DEBUG, "max_fc = %d\n", *max_fc);
+	return 0;
+}
+
+int bnxt_hwrm_ctx_rgtr(struct bnxt *bp, rte_iova_t dma_addr, uint16_t *ctx_id)
+{
+	int rc = 0;
+	struct hwrm_cfa_ctx_mem_rgtr_input req = {.req_type = 0 };
+	struct hwrm_cfa_ctx_mem_rgtr_output *resp = bp->hwrm_cmd_resp_addr;
+
+	if (!(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp))) {
+		PMD_DRV_LOG(DEBUG,
+			    "Not a PF or trusted VF. Command not supported\n");
+		return 0;
+	}
+
+	HWRM_PREP(&req, HWRM_CFA_CTX_MEM_RGTR, BNXT_USE_KONG(bp));
+
+	req.page_level = HWRM_CFA_CTX_MEM_RGTR_INPUT_PAGE_LEVEL_LVL_0;
+	req.page_size = HWRM_CFA_CTX_MEM_RGTR_INPUT_PAGE_SIZE_2M;
+	req.page_dir = rte_cpu_to_le_64(dma_addr);
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_KONG(bp));
+
+	HWRM_CHECK_RESULT();
+	if (ctx_id) {
+		*ctx_id  = rte_le_to_cpu_16(resp->ctx_id);
+		PMD_DRV_LOG(DEBUG, "ctx_id = %d\n", *ctx_id);
+	}
+	HWRM_UNLOCK();
+
+	return 0;
+}
+
+int bnxt_hwrm_ctx_unrgtr(struct bnxt *bp, uint16_t ctx_id)
+{
+	int rc = 0;
+	struct hwrm_cfa_ctx_mem_unrgtr_input req = {.req_type = 0 };
+	struct hwrm_cfa_ctx_mem_unrgtr_output *resp = bp->hwrm_cmd_resp_addr;
+
+	if (!(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp))) {
+		PMD_DRV_LOG(DEBUG,
+			    "Not a PF or trusted VF. Command not supported\n");
+		return 0;
+	}
+
+	HWRM_PREP(&req, HWRM_CFA_CTX_MEM_UNRGTR, BNXT_USE_KONG(bp));
+
+	req.ctx_id = rte_cpu_to_le_16(ctx_id);
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_KONG(bp));
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
+
+	return rc;
+}
+
+int bnxt_hwrm_cfa_counter_cfg(struct bnxt *bp, enum bnxt_flow_dir dir,
+			      uint16_t cntr, uint16_t ctx_id,
+			      uint32_t num_entries, bool enable)
+{
+	struct hwrm_cfa_counter_cfg_input req = {0};
+	struct hwrm_cfa_counter_cfg_output *resp = bp->hwrm_cmd_resp_addr;
+	uint16_t flags = 0;
+	int rc;
+
+	if (!(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp))) {
+		PMD_DRV_LOG(DEBUG,
+			    "Not a PF or trusted VF. Command not supported\n");
+		return 0;
+	}
+
+	HWRM_PREP(&req, HWRM_CFA_COUNTER_CFG, BNXT_USE_KONG(bp));
+
+	req.target_id = rte_cpu_to_le_16(bp->fw_fid);
+	req.counter_type = rte_cpu_to_le_16(cntr);
+	flags = enable ? HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_CFG_MODE_ENABLE :
+		HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_CFG_MODE_DISABLE;
+	flags |= HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_PULL;
+	if (dir == BNXT_DIR_RX)
+		flags |=  HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_PATH_RX;
+	else if (dir == BNXT_DIR_TX)
+		flags |=  HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_PATH_TX;
+	req.flags = rte_cpu_to_le_16(flags);
+	req.ctx_id =  rte_cpu_to_le_16(ctx_id);
+	req.num_entries = rte_cpu_to_le_32(num_entries);
+
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_KONG(bp));
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
+
+	return 0;
+}
+
+int bnxt_hwrm_cfa_counter_qstats(struct bnxt *bp,
+				 enum bnxt_flow_dir dir,
+				 uint16_t cntr,
+				 uint16_t num_entries)
+{
+	struct hwrm_cfa_counter_qstats_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_cfa_counter_qstats_input req = {0};
+	uint16_t flow_ctx_id = 0;
+	uint16_t flags = 0;
+	int rc = 0;
+
+	if (!(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp))) {
+		PMD_DRV_LOG(DEBUG,
+			    "Not a PF or trusted VF. Command not supported\n");
+		return 0;
+	}
+
+	if (dir == BNXT_DIR_RX) {
+		flow_ctx_id = bp->rx_fc_in_tbl.ctx_id;
+		flags = HWRM_CFA_COUNTER_QSTATS_INPUT_FLAGS_PATH_RX;
+	} else if (dir == BNXT_DIR_TX) {
+		flow_ctx_id = bp->tx_fc_in_tbl.ctx_id;
+		flags = HWRM_CFA_COUNTER_QSTATS_INPUT_FLAGS_PATH_TX;
+	}
+
+	HWRM_PREP(&req, HWRM_CFA_COUNTER_QSTATS, BNXT_USE_KONG(bp));
+	req.target_id = rte_cpu_to_le_16(bp->fw_fid);
+	req.counter_type = rte_cpu_to_le_16(cntr);
+	req.input_flow_ctx_id = rte_cpu_to_le_16(flow_ctx_id);
+	req.num_entries = rte_cpu_to_le_16(num_entries);
+	req.flags = rte_cpu_to_le_16(flags);
+	rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_KONG(bp));
+
+	HWRM_CHECK_RESULT();
+	HWRM_UNLOCK();
+
+	return 0;
+}
diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
index 1b7e35306..58b414d4f 100644
--- a/drivers/net/bnxt/bnxt_hwrm.h
+++ b/drivers/net/bnxt/bnxt_hwrm.h
@@ -88,6 +88,18 @@ int bnxt_hwrm_tf_message_direct(struct bnxt *bp,
 				void *resp_msg,
 				uint32_t resp_len);
 
+#define CFA_COUNTER_CFG_IN_COUNTER_TYPE_FC \
+	HWRM_CFA_COUNTER_CFG_INPUT_COUNTER_TYPE_FC
+
+enum bnxt_flow_dir {
+	BNXT_DIR_RX = 0,
+	BNXT_DIR_TX,
+	BNXT_DIR_LOOPBACK,
+	BNXT_DIR_MAX
+};
+
+#define BNXT_CTX_VAL_INVAL	0xFFFF
+
 int bnxt_hwrm_cfa_l2_clear_rx_mask(struct bnxt *bp,
 				   struct bnxt_vnic_info *vnic);
 int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp, struct bnxt_vnic_info *vnic,
@@ -248,4 +260,14 @@ int bnxt_hwrm_fw_reset(struct bnxt *bp);
 int bnxt_hwrm_port_ts_query(struct bnxt *bp, uint8_t path,
 			    uint64_t *timestamp);
 int bnxt_hwrm_cfa_adv_flow_mgmt_qcaps(struct bnxt *bp);
+int bnxt_hwrm_cfa_counter_qcaps(struct bnxt *bp, uint16_t *max_fc);
+int bnxt_hwrm_ctx_rgtr(struct bnxt *bp, rte_iova_t dma_addr, uint16_t *ctx_id);
+int bnxt_hwrm_ctx_unrgtr(struct bnxt *bp, uint16_t ctx_id);
+int bnxt_hwrm_cfa_counter_cfg(struct bnxt *bp, enum bnxt_flow_dir dir,
+			      uint16_t cntr, uint16_t ctx_id,
+			      uint32_t num_entries, bool enable);
+int bnxt_hwrm_cfa_counter_qstats(struct bnxt *bp,
+				 enum bnxt_flow_dir dir,
+				 uint16_t cntr,
+				 uint16_t num_entries);
 #endif
diff --git a/drivers/net/bnxt/bnxt_stats.c b/drivers/net/bnxt/bnxt_stats.c
index 6afd11adb..1d3be16f8 100644
--- a/drivers/net/bnxt/bnxt_stats.c
+++ b/drivers/net/bnxt/bnxt_stats.c
@@ -10,10 +10,12 @@
 
 #include "bnxt.h"
 #include "bnxt_cpr.h"
+#include "bnxt_filter.h"
 #include "bnxt_hwrm.h"
 #include "bnxt_rxq.h"
 #include "bnxt_stats.h"
 #include "bnxt_txq.h"
+#include "bnxt_vnic.h"
 #include "hsi_struct_def_dpdk.h"
 
 static const struct bnxt_xstats_name_off bnxt_rx_stats_strings[] = {
@@ -611,7 +613,9 @@ int bnxt_dev_xstats_get_op(struct rte_eth_dev *eth_dev,
 		RTE_DIM(bnxt_tx_stats_strings) +
 		RTE_DIM(bnxt_func_stats_strings) +
 		RTE_DIM(bnxt_rx_ext_stats_strings) +
-		RTE_DIM(bnxt_tx_ext_stats_strings);
+		RTE_DIM(bnxt_tx_ext_stats_strings) +
+		bnxt_flow_stats_cnt(bp);
+
 	stat_count = count;
 
 	if (n < count)
@@ -660,24 +664,77 @@ int bnxt_dev_xstats_get_op(struct rte_eth_dev *eth_dev,
 		xstats[count].value = rte_le_to_cpu_64
 					(*(uint64_t *)((char *)tx_stats_ext +
 					 bnxt_tx_ext_stats_strings[i].offset));
-
 		count++;
 	}
 
+	if (bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_COUNTERS &&
+	    bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_MGMT &&
+	    bp->flow_xstat) {
+		int j;
+
+		i = 0;
+		for (j = 0; j < bp->max_vnics; j++) {
+			struct bnxt_filter_info *filter;
+			struct bnxt_vnic_info *vnic;
+			struct rte_flow *flow;
+
+			vnic = &bp->vnic_info[j];
+			if (vnic && vnic->fw_vnic_id == INVALID_VNIC_ID)
+				continue;
+
+			if (STAILQ_EMPTY(&vnic->flow_list))
+				continue;
+
+			STAILQ_FOREACH(flow, &vnic->flow_list, next) {
+				if (!flow || !flow->filter)
+					continue;
+
+				filter = flow->filter;
+				xstats[count].id = count;
+				xstats[count].value =
+					filter->hw_stats.bytes;
+				count++;
+				xstats[count].id = count;
+				xstats[count].value =
+					filter->hw_stats.packets;
+				count++;
+				if (++i > bp->max_l2_ctx)
+					break;
+			}
+			if (i > bp->max_l2_ctx)
+				break;
+		}
+	}
+
 	return stat_count;
 }
 
+int bnxt_flow_stats_cnt(struct bnxt *bp)
+{
+	if (bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_COUNTERS &&
+	    bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_MGMT &&
+	    bp->flow_xstat) {
+		struct bnxt_xstats_name_off flow_bytes[bp->max_l2_ctx];
+		struct bnxt_xstats_name_off flow_pkts[bp->max_l2_ctx];
+
+		return RTE_DIM(flow_bytes) + RTE_DIM(flow_pkts);
+	}
+
+	return 0;
+}
+
 int bnxt_dev_xstats_get_names_op(struct rte_eth_dev *eth_dev,
-				 struct rte_eth_xstat_name *xstats_names,
-				 __rte_unused unsigned int limit)
+		struct rte_eth_xstat_name *xstats_names,
+		__rte_unused unsigned int limit)
 {
+	struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
 	const unsigned int stat_cnt = RTE_DIM(bnxt_rx_stats_strings) +
 				RTE_DIM(bnxt_tx_stats_strings) +
 				RTE_DIM(bnxt_func_stats_strings) +
 				RTE_DIM(bnxt_rx_ext_stats_strings) +
-				RTE_DIM(bnxt_tx_ext_stats_strings);
-	struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
-	unsigned int i, count;
+				RTE_DIM(bnxt_tx_ext_stats_strings) +
+				bnxt_flow_stats_cnt(bp);
+	unsigned int i, count = 0;
 	int rc;
 
 	rc = is_bnxt_in_error(bp);
@@ -724,7 +781,26 @@ int bnxt_dev_xstats_get_names_op(struct rte_eth_dev *eth_dev,
 			count++;
 		}
 
+		if (bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_COUNTERS &&
+		    bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_MGMT &&
+		    bp->flow_xstat) {
+			for (i = 0; i < bp->max_l2_ctx; i++) {
+				char buf[RTE_ETH_XSTATS_NAME_SIZE];
+
+				sprintf(buf, "flow_%d_bytes", i);
+				strlcpy(xstats_names[count].name, buf,
+					sizeof(xstats_names[count].name));
+				count++;
+
+				sprintf(buf, "flow_%d_packets", i);
+				strlcpy(xstats_names[count].name, buf,
+					sizeof(xstats_names[count].name));
+
+				count++;
+			}
+		}
 	}
+
 	return stat_cnt;
 }
 
@@ -754,12 +830,13 @@ int bnxt_dev_xstats_reset_op(struct rte_eth_dev *eth_dev)
 int bnxt_dev_xstats_get_by_id_op(struct rte_eth_dev *dev, const uint64_t *ids,
 		uint64_t *values, unsigned int limit)
 {
+	struct bnxt *bp = dev->data->dev_private;
 	const unsigned int stat_cnt = RTE_DIM(bnxt_rx_stats_strings) +
 				RTE_DIM(bnxt_tx_stats_strings) +
 				RTE_DIM(bnxt_func_stats_strings) +
 				RTE_DIM(bnxt_rx_ext_stats_strings) +
-				RTE_DIM(bnxt_tx_ext_stats_strings);
-	struct bnxt *bp = dev->data->dev_private;
+				RTE_DIM(bnxt_tx_ext_stats_strings) +
+				bnxt_flow_stats_cnt(bp);
 	struct rte_eth_xstat xstats[stat_cnt];
 	uint64_t values_copy[stat_cnt];
 	uint16_t i;
@@ -787,13 +864,14 @@ int bnxt_dev_xstats_get_names_by_id_op(struct rte_eth_dev *dev,
 				struct rte_eth_xstat_name *xstats_names,
 				const uint64_t *ids, unsigned int limit)
 {
+	struct bnxt *bp = dev->data->dev_private;
 	const unsigned int stat_cnt = RTE_DIM(bnxt_rx_stats_strings) +
 				RTE_DIM(bnxt_tx_stats_strings) +
 				RTE_DIM(bnxt_func_stats_strings) +
 				RTE_DIM(bnxt_rx_ext_stats_strings) +
-				RTE_DIM(bnxt_tx_ext_stats_strings);
+				RTE_DIM(bnxt_tx_ext_stats_strings) +
+				bnxt_flow_stats_cnt(bp);
 	struct rte_eth_xstat_name xstats_names_copy[stat_cnt];
-	struct bnxt *bp = dev->data->dev_private;
 	uint16_t i;
 	int rc;
 
@@ -817,3 +895,135 @@ int bnxt_dev_xstats_get_names_by_id_op(struct rte_eth_dev *dev,
 	}
 	return stat_cnt;
 }
+
+/* Update the input context memory with the flow counter IDs
+ * of the flows that we are interested in.
+ * Also, update the output tables with the current local values
+ * since that is what will be used by FW to accumulate
+ */
+static void bnxt_update_fc_pre_qstat(uint32_t *in_tbl,
+				     uint64_t *out_tbl,
+				     struct bnxt_filter_info *filter,
+				     uint32_t *ptbl_cnt)
+{
+	uint32_t in_tbl_cnt = *ptbl_cnt;
+
+	in_tbl[in_tbl_cnt] = filter->flow_id;
+	out_tbl[2 * in_tbl_cnt] = filter->hw_stats.packets;
+	out_tbl[2 * in_tbl_cnt + 1] = filter->hw_stats.bytes;
+	in_tbl_cnt++;
+	*ptbl_cnt = in_tbl_cnt;
+}
+
+/* Post issuing counter_qstats cmd, update the driver's local stat
+ * entries with the values DMA-ed by FW in the output table
+ */
+static void bnxt_update_fc_post_qstat(struct bnxt_filter_info *filter,
+				      uint64_t *out_tbl,
+				      uint32_t out_tbl_idx)
+{
+	filter->hw_stats.packets = out_tbl[2 * out_tbl_idx];
+	filter->hw_stats.bytes = out_tbl[(2 * out_tbl_idx) + 1];
+}
+
+static int bnxt_update_fc_tbl(struct bnxt *bp, uint16_t ctr,
+			      struct bnxt_filter_info *en_tbl[],
+			      uint16_t in_flow_cnt)
+{
+	uint32_t *in_rx_tbl;
+	uint64_t *out_rx_tbl;
+	uint32_t in_rx_tbl_cnt = 0;
+	uint32_t out_rx_tbl_cnt = 0;
+	int i, rc = 0;
+
+	in_rx_tbl = (uint32_t *)bp->rx_fc_in_tbl.va;
+	out_rx_tbl = (uint64_t *)bp->rx_fc_out_tbl.va;
+
+	for (i = 0; i < in_flow_cnt; i++) {
+		if (!en_tbl[i])
+			continue;
+
+		/* Currently only ingress/Rx flows are supported anyway. */
+		bnxt_update_fc_pre_qstat(in_rx_tbl, out_rx_tbl,
+					 en_tbl[i], &in_rx_tbl_cnt);
+	}
+
+	/* Currently only ingress/Rx flows are supported */
+	if (in_rx_tbl_cnt) {
+		rc = bnxt_hwrm_cfa_counter_qstats(bp, BNXT_DIR_RX, ctr,
+						  in_rx_tbl_cnt);
+		if (rc)
+			return rc;
+	}
+
+	for (i = 0; i < in_flow_cnt; i++) {
+		if (!en_tbl[i])
+			continue;
+
+		/* Currently only ingress/Rx flows are supported */
+		bnxt_update_fc_post_qstat(en_tbl[i], out_rx_tbl,
+					  out_rx_tbl_cnt);
+		out_rx_tbl_cnt++;
+	}
+
+	return rc;
+}
+
+/* Walks through the list which has all the flows
+ * requesting for explicit flow counters.
+ */
+int bnxt_flow_stats_req(struct bnxt *bp)
+{
+	int i;
+	int rc = 0;
+	struct rte_flow *flow;
+	uint16_t in_flow_tbl_cnt = 0;
+	struct bnxt_vnic_info *vnic = NULL;
+	struct bnxt_filter_info *valid_en_tbl[bp->max_fc];
+	uint16_t counter_type = CFA_COUNTER_CFG_IN_COUNTER_TYPE_FC;
+
+	bnxt_acquire_flow_lock(bp);
+	for (i = 0; i < bp->max_vnics; i++) {
+		vnic = &bp->vnic_info[i];
+		if (vnic && vnic->fw_vnic_id == INVALID_VNIC_ID)
+			continue;
+
+		if (STAILQ_EMPTY(&vnic->flow_list))
+			continue;
+
+		STAILQ_FOREACH(flow, &vnic->flow_list, next) {
+			if (!flow || !flow->filter)
+				continue;
+
+			valid_en_tbl[in_flow_tbl_cnt++] = flow->filter;
+			if (in_flow_tbl_cnt >= bp->max_fc) {
+				rc = bnxt_update_fc_tbl(bp, counter_type,
+							valid_en_tbl,
+							in_flow_tbl_cnt);
+				if (rc)
+					goto err;
+				in_flow_tbl_cnt = 0;
+				continue;
+			}
+		}
+	}
+
+	if (!in_flow_tbl_cnt)
+		goto out;
+
+	rc = bnxt_update_fc_tbl(bp, counter_type, valid_en_tbl,
+				in_flow_tbl_cnt);
+	if (!rc) {
+		bnxt_release_flow_lock(bp);
+		return 0;
+	}
+
+err:
+	/* If cmd fails once, no need of
+	 * invoking again every second
+	 */
+	bnxt_release_flow_lock(bp);
+	bnxt_cancel_fc_thread(bp);
+out:
+	return rc;
+}
diff --git a/drivers/net/bnxt/hsi_struct_def_dpdk.h b/drivers/net/bnxt/hsi_struct_def_dpdk.h
index cde96e784..608eaa2f4 100644
--- a/drivers/net/bnxt/hsi_struct_def_dpdk.h
+++ b/drivers/net/bnxt/hsi_struct_def_dpdk.h
@@ -38338,4 +38338,288 @@ struct hwrm_port_ts_query_output {
 	uint8_t		valid;
 } __attribute__((packed));
 
+/**************************
+ * hwrm_cfa_counter_qcaps *
+ **************************/
+
+
+/* hwrm_cfa_counter_qcaps_input (size:128b/16B) */
+struct hwrm_cfa_counter_qcaps_input {
+	/* The HWRM command request type. */
+	uint16_t	req_type;
+	/*
+	 * The completion ring to send the completion event on. This should
+	 * be the NQ ID returned from the `nq_alloc` HWRM command.
+	 */
+	uint16_t	cmpl_ring;
+	/*
+	 * The sequence ID is used by the driver for tracking multiple
+	 * commands. This ID is treated as opaque data by the firmware and
+	 * the value is returned in the `hwrm_resp_hdr` upon completion.
+	 */
+	uint16_t	seq_id;
+	/*
+	 * The target ID of the command:
+	 * * 0x0-0xFFF8 - The function ID
+	 * * 0xFFF8-0xFFFC, 0xFFFE - Reserved for internal processors
+	 * * 0xFFFD - Reserved for user-space HWRM interface
+	 * * 0xFFFF - HWRM
+	 */
+	uint16_t	target_id;
+	/*
+	 * A physical address pointer pointing to a host buffer that the
+	 * command's response data will be written. This can be either a host
+	 * physical address (HPA) or a guest physical address (GPA) and must
+	 * point to a physically contiguous block of memory.
+	 */
+	uint64_t	resp_addr;
+} __attribute__((packed));
+
+/* hwrm_cfa_counter_qcaps_output (size:576b/72B) */
+struct hwrm_cfa_counter_qcaps_output {
+	/* The specific error status for the command. */
+	uint16_t	error_code;
+	/* The HWRM command request type. */
+	uint16_t	req_type;
+	/* The sequence ID from the original command. */
+	uint16_t	seq_id;
+	/* The length of the response data in number of bytes. */
+	uint16_t	resp_len;
+	uint32_t	flags;
+	/* Enumeration denoting the supported CFA counter format. */
+	#define HWRM_CFA_COUNTER_QCAPS_OUTPUT_FLAGS_COUNTER_FORMAT \
+		UINT32_C(0x1)
+	/* CFA counter types are not supported. */
+	#define HWRM_CFA_COUNTER_QCAPS_OUTPUT_FLAGS_COUNTER_FORMAT_NONE \
+		UINT32_C(0x0)
+	/* 64-bit packet counters followed by 64-bit byte counters format. */
+	#define HWRM_CFA_COUNTER_QCAPS_OUTPUT_FLAGS_COUNTER_FORMAT_64_BIT \
+		UINT32_C(0x1)
+	#define HWRM_CFA_COUNTER_QCAPS_OUTPUT_FLAGS_COUNTER_FORMAT_LAST \
+		HWRM_CFA_COUNTER_QCAPS_OUTPUT_FLAGS_COUNTER_FORMAT_64_BIT
+	uint32_t	unused_0;
+	/* Minimum guaranteed number of flow counters supported for this function, in RX direction. */
+	uint32_t	min_rx_fc;
+	/* Maximum non-guaranteed number of flow counters supported for this function, in RX direction. */
+	uint32_t	max_rx_fc;
+	/* Minimum guaranteed number of flow counters supported for this function, in TX direction. */
+	uint32_t	min_tx_fc;
+	/* Maximum non-guaranteed number of flow counters supported for this function, in TX direction. */
+	uint32_t	max_tx_fc;
+	/* Minimum guaranteed number of extension flow counters supported for this function, in RX direction. */
+	uint32_t	min_rx_efc;
+	/* Maximum non-guaranteed number of extension flow counters supported for this function, in RX direction. */
+	uint32_t	max_rx_efc;
+	/* Minimum guaranteed number of extension flow counters supported for this function, in TX direction. */
+	uint32_t	min_tx_efc;
+	/* Maximum non-guaranteed number of extension flow counters supported for this function, in TX direction. */
+	uint32_t	max_tx_efc;
+	/* Minimum guaranteed number of meter drop counters supported for this function, in RX direction. */
+	uint32_t	min_rx_mdc;
+	/* Maximum non-guaranteed number of meter drop counters supported for this function, in RX direction. */
+	uint32_t	max_rx_mdc;
+	/* Minimum guaranteed number of meter drop counters supported for this function, in TX direction. */
+	uint32_t	min_tx_mdc;
+	/* Maximum non-guaranteed number of meter drop counters supported for this function, in TX direction. */
+	uint32_t	max_tx_mdc;
+	/* Maximum guaranteed number of flow counters which can be used during flow alloc. */
+	uint32_t	max_flow_alloc_fc;
+	uint8_t	unused_1[3];
+	/*
+	 * This field is used in Output records to indicate that the output
+	 * is completely written to RAM.  This field should be read as '1'
+	 * to indicate that the output has been completely written.
+	 * When writing a command completion or response to an internal processor,
+	 * the order of writes has to be such that this field is written last.
+	 */
+	uint8_t	valid;
+} __attribute__((packed));
+
+/************************
+ * hwrm_cfa_counter_cfg *
+ ************************/
+
+
+/* hwrm_cfa_counter_cfg_input (size:256b/32B) */
+struct hwrm_cfa_counter_cfg_input {
+	/* The HWRM command request type. */
+	uint16_t	req_type;
+	/*
+	 * The completion ring to send the completion event on. This should
+	 * be the NQ ID returned from the `nq_alloc` HWRM command.
+	 */
+	uint16_t	cmpl_ring;
+	/*
+	 * The sequence ID is used by the driver for tracking multiple
+	 * commands. This ID is treated as opaque data by the firmware and
+	 * the value is returned in the `hwrm_resp_hdr` upon completion.
+	 */
+	uint16_t	seq_id;
+	/*
+	 * The target ID of the command:
+	 * * 0x0-0xFFF8 - The function ID
+	 * * 0xFFF8-0xFFFC, 0xFFFE - Reserved for internal processors
+	 * * 0xFFFD - Reserved for user-space HWRM interface
+	 * * 0xFFFF - HWRM
+	 */
+	uint16_t	target_id;
+	/*
+	 * A physical address pointer pointing to a host buffer that the
+	 * command's response data will be written. This can be either a host
+	 * physical address (HPA) or a guest physical address (GPA) and must
+	 * point to a physically contiguous block of memory.
+	 */
+	uint64_t	resp_addr;
+	uint16_t	flags;
+	/* Enumeration denoting the configuration mode. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_CFG_MODE \
+		UINT32_C(0x1)
+	/* Disable the configuration mode. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_CFG_MODE_DISABLE \
+		UINT32_C(0x0)
+	/* Enable the configuration mode. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_CFG_MODE_ENABLE \
+		UINT32_C(0x1)
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_CFG_MODE_LAST \
+		HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_CFG_MODE_ENABLE
+	/* Enumeration denoting the RX, TX type of the resource. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_PATH \
+		UINT32_C(0x2)
+	/* Tx path. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_PATH_TX \
+		(UINT32_C(0x0) << 1)
+	/* Rx path. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_PATH_RX \
+		(UINT32_C(0x1) << 1)
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_PATH_LAST \
+		HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_PATH_RX
+	/* Enumeration denoting the data transfer mode. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_MASK \
+		UINT32_C(0xc)
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_SFT       2
+	/* Push mode. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_PUSH \
+		(UINT32_C(0x0) << 2)
+	/* Pull mode. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_PULL \
+		(UINT32_C(0x1) << 2)
+	/* Pull on async update. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_PULL_ASYNC \
+		(UINT32_C(0x2) << 2)
+	#define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_LAST \
+		HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_PULL_ASYNC
+	uint16_t	counter_type;
+	/* Flow counters. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_COUNTER_TYPE_FC  UINT32_C(0x0)
+	/* Extended flow counters. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_COUNTER_TYPE_EFC UINT32_C(0x1)
+	/* Meter drop counters. */
+	#define HWRM_CFA_COUNTER_CFG_INPUT_COUNTER_TYPE_MDC UINT32_C(0x2)
+	#define HWRM_CFA_COUNTER_CFG_INPUT_COUNTER_TYPE_LAST \
+		HWRM_CFA_COUNTER_CFG_INPUT_COUNTER_TYPE_MDC
+	/* Ctx memory handle to be used for the counter. */
+	uint16_t	ctx_id;
+	/* Counter update cadence hint (only in Push mode). */
+	uint16_t	update_tmr_ms;
+	/* Total number of entries. */
+	uint32_t	num_entries;
+	uint32_t	unused_0;
+} __attribute__((packed));
+
+/* hwrm_cfa_counter_cfg_output (size:128b/16B) */
+struct hwrm_cfa_counter_cfg_output {
+	/* The specific error status for the command. */
+	uint16_t	error_code;
+	/* The HWRM command request type. */
+	uint16_t	req_type;
+	/* The sequence ID from the original command. */
+	uint16_t	seq_id;
+	/* The length of the response data in number of bytes. */
+	uint16_t	resp_len;
+	uint8_t	unused_0[7];
+	/*
+	 * This field is used in Output records to indicate that the output
+	 * is completely written to RAM.  This field should be read as '1'
+	 * to indicate that the output has been completely written.
+	 * When writing a command completion or response to an internal processor,
+	 * the order of writes has to be such that this field is written last.
+	 */
+	uint8_t	valid;
+} __attribute__((packed));
+
+/***************************
+ * hwrm_cfa_counter_qstats *
+ ***************************/
+
+
+/* hwrm_cfa_counter_qstats_input (size:320b/40B) */
+struct hwrm_cfa_counter_qstats_input {
+	/* The HWRM command request type. */
+	uint16_t	req_type;
+	/*
+	 * The completion ring to send the completion event on. This should
+	 * be the NQ ID returned from the `nq_alloc` HWRM command.
+	 */
+	uint16_t	cmpl_ring;
+	/*
+	 * The sequence ID is used by the driver for tracking multiple
+	 * commands. This ID is treated as opaque data by the firmware and
+	 * the value is returned in the `hwrm_resp_hdr` upon completion.
+	 */
+	uint16_t	seq_id;
+	/*
+	 * The target ID of the command:
+	 * * 0x0-0xFFF8 - The function ID
+	 * * 0xFFF8-0xFFFC, 0xFFFE - Reserved for internal processors
+	 * * 0xFFFD - Reserved for user-space HWRM interface
+	 * * 0xFFFF - HWRM
+	 */
+	uint16_t	target_id;
+	/*
+	 * A physical address pointer pointing to a host buffer that the
+	 * command's response data will be written. This can be either a host
+	 * physical address (HPA) or a guest physical address (GPA) and must
+	 * point to a physically contiguous block of memory.
+	 */
+	uint64_t	resp_addr;
+	uint16_t	flags;
+	/* Enumeration denoting the RX, TX type of the resource. */
+	#define HWRM_CFA_COUNTER_QSTATS_INPUT_FLAGS_PATH     UINT32_C(0x1)
+	/* Tx path. */
+	#define HWRM_CFA_COUNTER_QSTATS_INPUT_FLAGS_PATH_TX    UINT32_C(0x0)
+	/* Rx path. */
+	#define HWRM_CFA_COUNTER_QSTATS_INPUT_FLAGS_PATH_RX    UINT32_C(0x1)
+	#define HWRM_CFA_COUNTER_QSTATS_INPUT_FLAGS_PATH_LAST \
+		HWRM_CFA_COUNTER_QSTATS_INPUT_FLAGS_PATH_RX
+	uint16_t	counter_type;
+	uint16_t	input_flow_ctx_id;
+	uint16_t	num_entries;
+	uint16_t	delta_time_ms;
+	uint16_t	meter_instance_id;
+	uint16_t	mdc_ctx_id;
+	uint8_t	unused_0[2];
+	uint64_t	expected_count;
+} __attribute__((packed));
+
+/* hwrm_cfa_counter_qstats_output (size:128b/16B) */
+struct hwrm_cfa_counter_qstats_output {
+	/* The specific error status for the command. */
+	uint16_t	error_code;
+	/* The HWRM command request type. */
+	uint16_t	req_type;
+	/* The sequence ID from the original command. */
+	uint16_t	seq_id;
+	/* The length of the response data in number of bytes. */
+	uint16_t	resp_len;
+	uint8_t	unused_0[7];
+	/*
+	 * This field is used in Output records to indicate that the output
+	 * is completely written to RAM.  This field should be read as '1'
+	 * to indicate that the output has been completely written.
+	 * When writing a command completion or response to an internal processor,
+	 * the order of writes has to be such that this field is written last.
+	 */
+	uint8_t	valid;
+} __attribute__((packed));
+
 #endif /* _HSI_STRUCT_DEF_DPDK_H_ */
-- 
2.21.1 (Apple Git-122.3)


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [dpdk-dev] [PATCH v2] net/bnxt: add flow stats in extended stats
  2020-04-17  5:49 ` [dpdk-dev] [PATCH v2] " Ajit Khaparde
@ 2020-04-17 14:49   ` Ajit Khaparde
  2020-04-17 22:03     ` Ferruh Yigit
  0 siblings, 1 reply; 4+ messages in thread
From: Ajit Khaparde @ 2020-04-17 14:49 UTC (permalink / raw)
  To: dpdk-dev; +Cc: Somnath Kotur, Sriharsha Basavapatna

On Thu, Apr 16, 2020 at 10:49 PM Ajit Khaparde <ajit.khaparde@broadcom.com>
wrote:

> From: Somnath Kotur <somnath.kotur@broadcom.com>
>
> This patch allows to display flow stats in extended stats.
> To do this, DMA-able memory is registered with the FW during device
> initialization. Then the driver uses an alarm thread to query the
> per flow stats using the HWRM_CFA_COUNTER_QSTATS HWRM command at
> regular intervals and stores it locally which will be displayed
> when the application queries the xstats.
> The DMA-able memory is unregistered during driver cleanup.
> This functionality can be enabled using the flow-xstat devarg and
> will be disabled by default. The intention behind this is to allow
> stats to be displayed for all the flows in one shot instead of
> querying one at a time.
>
> Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
> Signed-off-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
> Reviewed-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
>
Patch applied to dpdk-next-net-brcm.

---
> v1->v2:
> Updating patch authors.
> ---
>  drivers/net/bnxt/bnxt.h                |  24 ++-
>  drivers/net/bnxt/bnxt_ethdev.c         | 263 +++++++++++++++++++++++
>  drivers/net/bnxt/bnxt_filter.h         |   6 +
>  drivers/net/bnxt/bnxt_flow.c           |  57 +++++
>  drivers/net/bnxt/bnxt_hwrm.c           | 169 ++++++++++++++-
>  drivers/net/bnxt/bnxt_hwrm.h           |  22 ++
>  drivers/net/bnxt/bnxt_stats.c          | 232 +++++++++++++++++++-
>  drivers/net/bnxt/hsi_struct_def_dpdk.h | 284 +++++++++++++++++++++++++
>  8 files changed, 1043 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
> index a70cdff07..00a4d0b3e 100644
> --- a/drivers/net/bnxt/bnxt.h
> +++ b/drivers/net/bnxt/bnxt.h
> @@ -433,6 +433,13 @@ struct bnxt_ctx_mem_info {
>         struct bnxt_ctx_pg_info *tqm_mem[BNXT_MAX_TC_Q];
>  };
>
> +struct bnxt_ctx_mem_buf_info {
> +       void            *va;
> +       rte_iova_t      dma;
> +       uint16_t        ctx_id;
> +       size_t          size;
> +};
> +
>  /* Maximum Firmware Reset bail out value in milliseconds */
>  #define BNXT_MAX_FW_RESET_TIMEOUT      6000
>  /* Minimum time required for the firmware readiness in milliseconds */
> @@ -530,7 +537,7 @@ struct bnxt {
>  #define BNXT_FLAG_NEW_RM                       BIT(20)
>  #define BNXT_FLAG_NPAR_PF                      BIT(21)
>  #define BNXT_FLAG_FW_CAP_ONE_STEP_TX_TS                BIT(22)
> -#define BNXT_FLAG_ADV_FLOW_MGMT                        BIT(23)
> +#define BNXT_FLAG_FC_THREAD                    BIT(23)
>  #define BNXT_FLAG_RX_VECTOR_PKT_MODE           BIT(24)
>  #define BNXT_PF(bp)            (!((bp)->flags & BNXT_FLAG_VF))
>  #define BNXT_VF(bp)            ((bp)->flags & BNXT_FLAG_VF)
> @@ -550,6 +557,8 @@ struct bnxt {
>  #define BNXT_FW_CAP_IF_CHANGE          BIT(1)
>  #define BNXT_FW_CAP_ERROR_RECOVERY     BIT(2)
>  #define BNXT_FW_CAP_ERR_RECOVER_RELOAD BIT(3)
> +#define BNXT_FW_CAP_ADV_FLOW_MGMT      BIT(5)
> +#define BNXT_FW_CAP_ADV_FLOW_COUNTERS  BIT(6)
>
>         uint32_t                flow_flags;
>  #define BNXT_FLOW_FLAG_L2_HDR_SRC_FILTER_EN    BIT(0)
> @@ -690,8 +699,17 @@ struct bnxt {
>         struct tf               tfp;
>         struct bnxt_ulp_context ulp_ctx;
>         uint8_t                 truflow;
> +       uint16_t                max_fc;
> +       struct bnxt_ctx_mem_buf_info rx_fc_in_tbl;
> +       struct bnxt_ctx_mem_buf_info rx_fc_out_tbl;
> +       struct bnxt_ctx_mem_buf_info tx_fc_in_tbl;
> +       struct bnxt_ctx_mem_buf_info tx_fc_out_tbl;
> +       uint16_t                flow_count;
> +       uint8_t                 flow_xstat;
>  };
>
> +#define BNXT_FC_TIMER  1 /* Timer freq in Sec Flow Counters */
> +
>  int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu);
>  int bnxt_link_update(struct rte_eth_dev *eth_dev, int wait_to_complete,
>                      bool exp_link_status);
> @@ -738,4 +756,8 @@ void bnxt_ulp_deinit(struct bnxt *bp);
>  uint16_t bnxt_get_vnic_id(uint16_t port);
>  uint16_t bnxt_get_svif(uint16_t port_id, bool func_svif);
>
> +void bnxt_cancel_fc_thread(struct bnxt *bp);
> +void bnxt_flow_cnt_alarm_cb(void *arg);
> +int bnxt_flow_stats_req(struct bnxt *bp);
> +int bnxt_flow_stats_cnt(struct bnxt *bp);
>  #endif
> diff --git a/drivers/net/bnxt/bnxt_ethdev.c
> b/drivers/net/bnxt/bnxt_ethdev.c
> index 5d5b8e095..bd2c3fcb6 100644
> --- a/drivers/net/bnxt/bnxt_ethdev.c
> +++ b/drivers/net/bnxt/bnxt_ethdev.c
> @@ -128,8 +128,10 @@ static const struct rte_pci_id bnxt_pci_id_map[] = {
>                                      DEV_RX_OFFLOAD_RSS_HASH)
>
>  #define BNXT_DEVARG_TRUFLOW    "host-based-truflow"
> +#define BNXT_DEVARG_FLOW_XSTAT "flow-xstat"
>  static const char *const bnxt_dev_args[] = {
>         BNXT_DEVARG_TRUFLOW,
> +       BNXT_DEVARG_FLOW_XSTAT,
>         NULL
>  };
>
> @@ -139,6 +141,12 @@ static const char *const bnxt_dev_args[] = {
>   */
>  #define        BNXT_DEVARG_TRUFLOW_INVALID(truflow)    ((truflow) > 1)
>
> +/*
> + * flow_xstat == false to disable the feature
> + * flow_xstat == true to enable the feature
> + */
> +#define        BNXT_DEVARG_FLOW_XSTAT_INVALID(flow_xstat)
> ((flow_xstat) > 1)
> +
>  static int bnxt_vlan_offload_set_op(struct rte_eth_dev *dev, int mask);
>  static void bnxt_print_link_info(struct rte_eth_dev *eth_dev);
>  static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev);
> @@ -333,6 +341,150 @@ static int bnxt_setup_one_vnic(struct bnxt *bp,
> uint16_t vnic_id)
>         return rc;
>  }
>
> +static int bnxt_register_fc_ctx_mem(struct bnxt *bp)
> +{
> +       int rc = 0;
> +
> +       rc = bnxt_hwrm_ctx_rgtr(bp, bp->rx_fc_in_tbl.dma,
> +                               &bp->rx_fc_in_tbl.ctx_id);
> +       if (rc)
> +               return rc;
> +
> +       PMD_DRV_LOG(DEBUG,
> +                   "rx_fc_in_tbl.va = %p rx_fc_in_tbl.dma = %p"
> +                   " rx_fc_in_tbl.ctx_id = %d\n",
> +                   bp->rx_fc_in_tbl.va, (void *)bp->rx_fc_in_tbl.dma,
> +                   bp->rx_fc_in_tbl.ctx_id);
> +
> +       rc = bnxt_hwrm_ctx_rgtr(bp, bp->rx_fc_out_tbl.dma,
> +                               &bp->rx_fc_out_tbl.ctx_id);
> +       if (rc)
> +               return rc;
> +
> +       PMD_DRV_LOG(DEBUG,
> +                   "rx_fc_out_tbl.va = %p rx_fc_out_tbl.dma = %p"
> +                   " rx_fc_out_tbl.ctx_id = %d\n",
> +                   bp->rx_fc_out_tbl.va, (void *)bp->rx_fc_out_tbl.dma,
> +                   bp->rx_fc_out_tbl.ctx_id);
> +
> +       rc = bnxt_hwrm_ctx_rgtr(bp, bp->tx_fc_in_tbl.dma,
> +                               &bp->tx_fc_in_tbl.ctx_id);
> +       if (rc)
> +               return rc;
> +
> +       PMD_DRV_LOG(DEBUG,
> +                   "tx_fc_in_tbl.va = %p tx_fc_in_tbl.dma = %p"
> +                   " tx_fc_in_tbl.ctx_id = %d\n",
> +                   bp->tx_fc_in_tbl.va, (void *)bp->tx_fc_in_tbl.dma,
> +                   bp->tx_fc_in_tbl.ctx_id);
> +
> +       rc = bnxt_hwrm_ctx_rgtr(bp, bp->tx_fc_out_tbl.dma,
> +                               &bp->tx_fc_out_tbl.ctx_id);
> +       if (rc)
> +               return rc;
> +
> +       PMD_DRV_LOG(DEBUG,
> +                   "tx_fc_out_tbl.va = %p tx_fc_out_tbl.dma = %p"
> +                   " tx_fc_out_tbl.ctx_id = %d\n",
> +                   bp->tx_fc_out_tbl.va, (void *)bp->tx_fc_out_tbl.dma,
> +                   bp->tx_fc_out_tbl.ctx_id);
> +
> +       memset(bp->rx_fc_out_tbl.va, 0, bp->rx_fc_out_tbl.size);
> +       rc = bnxt_hwrm_cfa_counter_cfg(bp, BNXT_DIR_RX,
> +                                      CFA_COUNTER_CFG_IN_COUNTER_TYPE_FC,
> +                                      bp->rx_fc_out_tbl.ctx_id,
> +                                      bp->max_fc,
> +                                      true);
> +       if (rc)
> +               return rc;
> +
> +       memset(bp->tx_fc_out_tbl.va, 0, bp->tx_fc_out_tbl.size);
> +       rc = bnxt_hwrm_cfa_counter_cfg(bp, BNXT_DIR_TX,
> +                                      CFA_COUNTER_CFG_IN_COUNTER_TYPE_FC,
> +                                      bp->tx_fc_out_tbl.ctx_id,
> +                                      bp->max_fc,
> +                                      true);
> +
> +       return rc;
> +}
> +
> +static int bnxt_alloc_ctx_mem_buf(char *type, size_t size,
> +                                 struct bnxt_ctx_mem_buf_info *ctx)
> +{
> +       if (!ctx)
> +               return -EINVAL;
> +
> +       ctx->va = rte_zmalloc(type, size, 0);
> +       if (ctx->va == NULL)
> +               return -ENOMEM;
> +       rte_mem_lock_page(ctx->va);
> +       ctx->size = size;
> +       ctx->dma = rte_mem_virt2iova(ctx->va);
> +       if (ctx->dma == RTE_BAD_IOVA)
> +               return -ENOMEM;
> +
> +       return 0;
> +}
> +
> +static int bnxt_init_fc_ctx_mem(struct bnxt *bp)
> +{
> +       struct rte_pci_device *pdev = bp->pdev;
> +       char type[RTE_MEMZONE_NAMESIZE];
> +       uint16_t max_fc;
> +       int rc = 0;
> +
> +       max_fc = bp->max_fc;
> +
> +       sprintf(type, "bnxt_rx_fc_in_" PCI_PRI_FMT, pdev->addr.domain,
> +               pdev->addr.bus, pdev->addr.devid, pdev->addr.function);
> +       /* 4 bytes for each counter-id */
> +       rc = bnxt_alloc_ctx_mem_buf(type, max_fc * 4, &bp->rx_fc_in_tbl);
> +       if (rc)
> +               return rc;
> +
> +       sprintf(type, "bnxt_rx_fc_out_" PCI_PRI_FMT, pdev->addr.domain,
> +               pdev->addr.bus, pdev->addr.devid, pdev->addr.function);
> +       /* 16 bytes for each counter - 8 bytes pkt_count, 8 bytes
> byte_count */
> +       rc = bnxt_alloc_ctx_mem_buf(type, max_fc * 16, &bp->rx_fc_out_tbl);
> +       if (rc)
> +               return rc;
> +
> +       sprintf(type, "bnxt_tx_fc_in_" PCI_PRI_FMT, pdev->addr.domain,
> +               pdev->addr.bus, pdev->addr.devid, pdev->addr.function);
> +       /* 4 bytes for each counter-id */
> +       rc = bnxt_alloc_ctx_mem_buf(type, max_fc * 4, &bp->tx_fc_in_tbl);
> +       if (rc)
> +               return rc;
> +
> +       sprintf(type, "bnxt_tx_fc_out_" PCI_PRI_FMT, pdev->addr.domain,
> +               pdev->addr.bus, pdev->addr.devid, pdev->addr.function);
> +       /* 16 bytes for each counter - 8 bytes pkt_count, 8 bytes
> byte_count */
> +       rc = bnxt_alloc_ctx_mem_buf(type, max_fc * 16, &bp->tx_fc_out_tbl);
> +       if (rc)
> +               return rc;
> +
> +       rc = bnxt_register_fc_ctx_mem(bp);
> +
> +       return rc;
> +}
> +
> +static int bnxt_init_ctx_mem(struct bnxt *bp)
> +{
> +       int rc = 0;
> +
> +       if (!(bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_COUNTERS) ||
> +           !(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp)))
> +               return 0;
> +
> +       rc = bnxt_hwrm_cfa_counter_qcaps(bp, &bp->max_fc);
> +       if (rc)
> +               return rc;
> +
> +       rc = bnxt_init_fc_ctx_mem(bp);
> +
> +       return rc;
> +}
> +
>  static int bnxt_init_chip(struct bnxt *bp)
>  {
>         struct rte_eth_link new;
> @@ -1005,6 +1157,7 @@ static void bnxt_dev_close_op(struct rte_eth_dev
> *eth_dev)
>         /* cancel the recovery handler before remove dev */
>         rte_eal_alarm_cancel(bnxt_dev_reset_and_resume, (void *)bp);
>         rte_eal_alarm_cancel(bnxt_dev_recover, (void *)bp);
> +       bnxt_cancel_fc_thread(bp);
>
>         if (eth_dev->data->dev_started)
>                 bnxt_dev_stop_op(eth_dev);
> @@ -4871,6 +5024,12 @@ static int bnxt_init_resources(struct bnxt *bp,
> bool reconfig_dev)
>         if (rc)
>                 return rc;
>
> +       rc = bnxt_init_ctx_mem(bp);
> +       if (rc) {
> +               PMD_DRV_LOG(ERR, "Failed to init adv_flow_counters\n");
> +               return rc;
> +       }
> +
>         rc = bnxt_init_locks(bp);
>         if (rc)
>                 return rc;
> @@ -4913,6 +5072,41 @@ bnxt_parse_devarg_truflow(__rte_unused const char
> *key,
>         return 0;
>  }
>
> +static int
> +bnxt_parse_devarg_flow_xstat(__rte_unused const char *key,
> +                            const char *value, void *opaque_arg)
> +{
> +       struct bnxt *bp = opaque_arg;
> +       unsigned long flow_xstat;
> +       char *end = NULL;
> +
> +       if (!value || !opaque_arg) {
> +               PMD_DRV_LOG(ERR,
> +                           "Invalid parameter passed to flow_xstat
> devarg.\n");
> +               return -EINVAL;
> +       }
> +
> +       flow_xstat = strtoul(value, &end, 10);
> +       if (end == NULL || *end != '\0' ||
> +           (flow_xstat == ULONG_MAX && errno == ERANGE)) {
> +               PMD_DRV_LOG(ERR,
> +                           "Invalid parameter passed to flow_xstat
> devarg.\n");
> +               return -EINVAL;
> +       }
> +
> +       if (BNXT_DEVARG_FLOW_XSTAT_INVALID(flow_xstat)) {
> +               PMD_DRV_LOG(ERR,
> +                           "Invalid value passed to flow_xstat
> devarg.\n");
> +               return -EINVAL;
> +       }
> +
> +       bp->flow_xstat = flow_xstat;
> +       if (bp->flow_xstat)
> +               PMD_DRV_LOG(INFO, "flow_xstat feature enabled.\n");
> +
> +       return 0;
> +}
> +
>  static void
>  bnxt_parse_dev_args(struct bnxt *bp, struct rte_devargs *devargs)
>  {
> @@ -4932,6 +5126,13 @@ bnxt_parse_dev_args(struct bnxt *bp, struct
> rte_devargs *devargs)
>         rte_kvargs_process(kvlist, BNXT_DEVARG_TRUFLOW,
>                            bnxt_parse_devarg_truflow, bp);
>
> +       /*
> +        * Handler for "flow_xstat" devarg.
> +        * Invoked as for ex: "-w 0000:00:0d.0,flow_xstat=1”
> +        */
> +       rte_kvargs_process(kvlist, BNXT_DEVARG_FLOW_XSTAT,
> +                          bnxt_parse_devarg_flow_xstat, bp);
> +
>         rte_kvargs_free(kvlist);
>  }
>
> @@ -5016,6 +5217,66 @@ bnxt_dev_init(struct rte_eth_dev *eth_dev)
>         return rc;
>  }
>
> +
> +static void bnxt_free_ctx_mem_buf(struct bnxt_ctx_mem_buf_info *ctx)
> +{
> +       if (!ctx)
> +               return;
> +
> +       if (ctx->va)
> +               rte_free(ctx->va);
> +
> +       ctx->va = NULL;
> +       ctx->dma = RTE_BAD_IOVA;
> +       ctx->ctx_id = BNXT_CTX_VAL_INVAL;
> +}
> +
> +static void bnxt_unregister_fc_ctx_mem(struct bnxt *bp)
> +{
> +       bnxt_hwrm_cfa_counter_cfg(bp, BNXT_DIR_RX,
> +                                 CFA_COUNTER_CFG_IN_COUNTER_TYPE_FC,
> +                                 bp->rx_fc_out_tbl.ctx_id,
> +                                 bp->max_fc,
> +                                 false);
> +
> +       bnxt_hwrm_cfa_counter_cfg(bp, BNXT_DIR_TX,
> +                                 CFA_COUNTER_CFG_IN_COUNTER_TYPE_FC,
> +                                 bp->tx_fc_out_tbl.ctx_id,
> +                                 bp->max_fc,
> +                                 false);
> +
> +       if (bp->rx_fc_in_tbl.ctx_id != BNXT_CTX_VAL_INVAL)
> +               bnxt_hwrm_ctx_unrgtr(bp, bp->rx_fc_in_tbl.ctx_id);
> +       bp->rx_fc_in_tbl.ctx_id = BNXT_CTX_VAL_INVAL;
> +
> +       if (bp->rx_fc_out_tbl.ctx_id != BNXT_CTX_VAL_INVAL)
> +               bnxt_hwrm_ctx_unrgtr(bp, bp->rx_fc_out_tbl.ctx_id);
> +       bp->rx_fc_out_tbl.ctx_id = BNXT_CTX_VAL_INVAL;
> +
> +       if (bp->tx_fc_in_tbl.ctx_id != BNXT_CTX_VAL_INVAL)
> +               bnxt_hwrm_ctx_unrgtr(bp, bp->tx_fc_in_tbl.ctx_id);
> +       bp->tx_fc_in_tbl.ctx_id = BNXT_CTX_VAL_INVAL;
> +
> +       if (bp->tx_fc_out_tbl.ctx_id != BNXT_CTX_VAL_INVAL)
> +               bnxt_hwrm_ctx_unrgtr(bp, bp->tx_fc_out_tbl.ctx_id);
> +       bp->tx_fc_out_tbl.ctx_id = BNXT_CTX_VAL_INVAL;
> +}
> +
> +static void bnxt_uninit_fc_ctx_mem(struct bnxt *bp)
> +{
> +       bnxt_unregister_fc_ctx_mem(bp);
> +
> +       bnxt_free_ctx_mem_buf(&bp->rx_fc_in_tbl);
> +       bnxt_free_ctx_mem_buf(&bp->rx_fc_out_tbl);
> +       bnxt_free_ctx_mem_buf(&bp->tx_fc_in_tbl);
> +       bnxt_free_ctx_mem_buf(&bp->tx_fc_out_tbl);
> +}
> +
> +static void bnxt_uninit_ctx_mem(struct bnxt *bp)
> +{
> +       bnxt_uninit_fc_ctx_mem(bp);
> +}
> +
>  static void
>  bnxt_uninit_locks(struct bnxt *bp)
>  {
> @@ -5043,6 +5304,8 @@ bnxt_uninit_resources(struct bnxt *bp, bool
> reconfig_dev)
>                 }
>         }
>
> +       bnxt_uninit_ctx_mem(bp);
> +
>         bnxt_uninit_locks(bp);
>         rte_free(bp->ptp_cfg);
>         bp->ptp_cfg = NULL;
> diff --git a/drivers/net/bnxt/bnxt_filter.h
> b/drivers/net/bnxt/bnxt_filter.h
> index 8f8a4c13b..4b2b3cadc 100644
> --- a/drivers/net/bnxt/bnxt_filter.h
> +++ b/drivers/net/bnxt/bnxt_filter.h
> @@ -25,6 +25,11 @@ struct bnxt;
>  #define BNXT_FLOW_PARSE_INNER_FLAG             BIT(6)
>  #define BNXT_FLOW_MARK_FLAG                    BIT(7)
>
> +struct bnxt_flow_stats {
> +       uint64_t        packets;
> +       uint64_t        bytes;
> +};
> +
>  struct bnxt_filter_info {
>         STAILQ_ENTRY(bnxt_filter_info)  next;
>         uint32_t                flow_id;
> @@ -84,6 +89,7 @@ struct bnxt_filter_info {
>          */
>         struct                  bnxt_vnic_info *vnic;
>         uint32_t                mark;
> +       struct bnxt_flow_stats  hw_stats;
>  };
>
>  struct bnxt_filter_info *bnxt_alloc_filter(struct bnxt *bp);
> diff --git a/drivers/net/bnxt/bnxt_flow.c b/drivers/net/bnxt/bnxt_flow.c
> index 9fb6dbdd9..6c6da84c6 100644
> --- a/drivers/net/bnxt/bnxt_flow.c
> +++ b/drivers/net/bnxt/bnxt_flow.c
> @@ -10,6 +10,7 @@
>  #include <rte_flow.h>
>  #include <rte_flow_driver.h>
>  #include <rte_tailq.h>
> +#include <rte_alarm.h>
>
>  #include "bnxt.h"
>  #include "bnxt_filter.h"
> @@ -1627,6 +1628,51 @@ bnxt_match_filter(struct bnxt *bp, struct
> bnxt_filter_info *nf)
>         return 0;
>  }
>
> +static void
> +bnxt_setup_flow_counter(struct bnxt *bp)
> +{
> +       if (bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_COUNTERS &&
> +           !(bp->flags & BNXT_FLAG_FC_THREAD)) {
> +               rte_eal_alarm_set(US_PER_S * BNXT_FC_TIMER,
> +                                 bnxt_flow_cnt_alarm_cb,
> +                                 (void *)bp);
> +               bp->flags |= BNXT_FLAG_FC_THREAD;
> +       }
> +}
> +
> +void bnxt_flow_cnt_alarm_cb(void *arg)
> +{
> +       int rc = 0;
> +       struct bnxt *bp = arg;
> +
> +       if (!bp->rx_fc_out_tbl.va) {
> +               PMD_DRV_LOG(ERR, "bp->rx_fc_out_tbl.va is NULL?\n");
> +               bnxt_cancel_fc_thread(bp);
> +               return;
> +       }
> +
> +       if (!bp->flow_count) {
> +               bnxt_cancel_fc_thread(bp);
> +               return;
> +       }
> +
> +       if (!bp->eth_dev->data->dev_started) {
> +               bnxt_cancel_fc_thread(bp);
> +               return;
> +       }
> +
> +       rc = bnxt_flow_stats_req(bp);
> +       if (rc) {
> +               PMD_DRV_LOG(ERR, "Flow stat alarm not rescheduled.\n");
> +               return;
> +       }
> +
> +       rte_eal_alarm_set(US_PER_S * BNXT_FC_TIMER,
> +                         bnxt_flow_cnt_alarm_cb,
> +                         (void *)bp);
> +}
> +
> +
>  static struct rte_flow *
>  bnxt_flow_create(struct rte_eth_dev *dev,
>                  const struct rte_flow_attr *attr,
> @@ -1783,7 +1829,9 @@ bnxt_flow_create(struct rte_eth_dev *dev,
>                         bp->mark_table[flow_id].valid = true;
>                         bp->mark_table[flow_id].mark_id = filter->mark;
>                 }
> +               bp->flow_count++;
>                 bnxt_release_flow_lock(bp);
> +               bnxt_setup_flow_counter(bp);
>                 return flow;
>         }
>
> @@ -1903,6 +1951,7 @@ _bnxt_flow_destroy(struct bnxt *bp,
>                 bnxt_free_filter(bp, filter);
>                 STAILQ_REMOVE(&vnic->flow_list, flow, rte_flow, next);
>                 rte_free(flow);
> +               bp->flow_count--;
>
>                 /* If this was the last flow associated with this vnic,
>                  * switch the queue back to RSS pool.
> @@ -1955,6 +2004,12 @@ bnxt_flow_destroy(struct rte_eth_dev *dev,
>         return ret;
>  }
>
> +void bnxt_cancel_fc_thread(struct bnxt *bp)
> +{
> +       bp->flags &= ~BNXT_FLAG_FC_THREAD;
> +       rte_eal_alarm_cancel(bnxt_flow_cnt_alarm_cb, (void *)bp);
> +}
> +
>  static int
>  bnxt_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error)
>  {
> @@ -1981,6 +2036,8 @@ bnxt_flow_flush(struct rte_eth_dev *dev, struct
> rte_flow_error *error)
>                                 break;
>                 }
>         }
> +
> +       bnxt_cancel_fc_thread(bp);
>         bnxt_release_flow_lock(bp);
>
>         return ret;
> diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
> index d435f6570..09a73286b 100644
> --- a/drivers/net/bnxt/bnxt_hwrm.c
> +++ b/drivers/net/bnxt/bnxt_hwrm.c
> @@ -744,6 +744,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
>         } else {
>                 bp->max_vnics = 1;
>         }
> +       PMD_DRV_LOG(DEBUG, "Max l2_cntxts is %d vnics is %d\n",
> +                   bp->max_l2_ctx, bp->max_vnics);
>         bp->max_stat_ctx = rte_le_to_cpu_16(resp->max_stat_ctx);
>         if (BNXT_PF(bp)) {
>                 bp->pf.total_vnics = rte_le_to_cpu_16(resp->max_vnics);
> @@ -1169,10 +1171,17 @@ int bnxt_hwrm_ver_get(struct bnxt *bp, uint32_t
> timeout)
>                 PMD_DRV_LOG(DEBUG, "FW supports Trusted VFs\n");
>         if (dev_caps_cfg &
>             HWRM_VER_GET_OUTPUT_DEV_CAPS_CFG_CFA_ADV_FLOW_MGNT_SUPPORTED) {
> -               bp->flags |= BNXT_FLAG_ADV_FLOW_MGMT;
> +               bp->fw_cap |= BNXT_FW_CAP_ADV_FLOW_MGMT;
>                 PMD_DRV_LOG(DEBUG, "FW supports advanced flow
> management\n");
>         }
>
> +       if (dev_caps_cfg &
> +           HWRM_VER_GET_OUTPUT_DEV_CAPS_CFG_ADV_FLOW_COUNTERS_SUPPORTED) {
> +               PMD_DRV_LOG(DEBUG, "FW supports advanced flow counters\n");
> +               bp->fw_cap |= BNXT_FW_CAP_ADV_FLOW_COUNTERS;
> +       }
> +
> +
>  error:
>         HWRM_UNLOCK();
>         return rc;
> @@ -5216,7 +5225,7 @@ int bnxt_hwrm_cfa_adv_flow_mgmt_qcaps(struct bnxt
> *bp)
>         uint32_t flags = 0;
>         int rc = 0;
>
> -       if (!(bp->flags & BNXT_FLAG_ADV_FLOW_MGMT))
> +       if (!(bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_MGMT))
>                 return rc;
>
>         if (!(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp))) {
> @@ -5239,3 +5248,159 @@ int bnxt_hwrm_cfa_adv_flow_mgmt_qcaps(struct bnxt
> *bp)
>
>         return rc;
>  }
> +
> +int bnxt_hwrm_cfa_counter_qcaps(struct bnxt *bp, uint16_t *max_fc)
> +{
> +       int rc = 0;
> +
> +       struct hwrm_cfa_counter_qcaps_input req = {0};
> +       struct hwrm_cfa_counter_qcaps_output *resp =
> bp->hwrm_cmd_resp_addr;
> +
> +       if (!(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp))) {
> +               PMD_DRV_LOG(DEBUG,
> +                           "Not a PF or trusted VF. Command not
> supported\n");
> +               return 0;
> +       }
> +
> +       HWRM_PREP(&req, HWRM_CFA_COUNTER_QCAPS, BNXT_USE_KONG(bp));
> +       req.target_id = rte_cpu_to_le_16(bp->fw_fid);
> +       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req),
> BNXT_USE_KONG(bp));
> +
> +       HWRM_CHECK_RESULT();
> +       if (max_fc)
> +               *max_fc = rte_le_to_cpu_16(resp->max_rx_fc);
> +       HWRM_UNLOCK();
> +
> +       PMD_DRV_LOG(DEBUG, "max_fc = %d\n", *max_fc);
> +       return 0;
> +}
> +
> +int bnxt_hwrm_ctx_rgtr(struct bnxt *bp, rte_iova_t dma_addr, uint16_t
> *ctx_id)
> +{
> +       int rc = 0;
> +       struct hwrm_cfa_ctx_mem_rgtr_input req = {.req_type = 0 };
> +       struct hwrm_cfa_ctx_mem_rgtr_output *resp = bp->hwrm_cmd_resp_addr;
> +
> +       if (!(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp))) {
> +               PMD_DRV_LOG(DEBUG,
> +                           "Not a PF or trusted VF. Command not
> supported\n");
> +               return 0;
> +       }
> +
> +       HWRM_PREP(&req, HWRM_CFA_CTX_MEM_RGTR, BNXT_USE_KONG(bp));
> +
> +       req.page_level = HWRM_CFA_CTX_MEM_RGTR_INPUT_PAGE_LEVEL_LVL_0;
> +       req.page_size = HWRM_CFA_CTX_MEM_RGTR_INPUT_PAGE_SIZE_2M;
> +       req.page_dir = rte_cpu_to_le_64(dma_addr);
> +
> +       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req),
> BNXT_USE_KONG(bp));
> +
> +       HWRM_CHECK_RESULT();
> +       if (ctx_id) {
> +               *ctx_id  = rte_le_to_cpu_16(resp->ctx_id);
> +               PMD_DRV_LOG(DEBUG, "ctx_id = %d\n", *ctx_id);
> +       }
> +       HWRM_UNLOCK();
> +
> +       return 0;
> +}
> +
> +int bnxt_hwrm_ctx_unrgtr(struct bnxt *bp, uint16_t ctx_id)
> +{
> +       int rc = 0;
> +       struct hwrm_cfa_ctx_mem_unrgtr_input req = {.req_type = 0 };
> +       struct hwrm_cfa_ctx_mem_unrgtr_output *resp =
> bp->hwrm_cmd_resp_addr;
> +
> +       if (!(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp))) {
> +               PMD_DRV_LOG(DEBUG,
> +                           "Not a PF or trusted VF. Command not
> supported\n");
> +               return 0;
> +       }
> +
> +       HWRM_PREP(&req, HWRM_CFA_CTX_MEM_UNRGTR, BNXT_USE_KONG(bp));
> +
> +       req.ctx_id = rte_cpu_to_le_16(ctx_id);
> +
> +       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req),
> BNXT_USE_KONG(bp));
> +
> +       HWRM_CHECK_RESULT();
> +       HWRM_UNLOCK();
> +
> +       return rc;
> +}
> +
> +int bnxt_hwrm_cfa_counter_cfg(struct bnxt *bp, enum bnxt_flow_dir dir,
> +                             uint16_t cntr, uint16_t ctx_id,
> +                             uint32_t num_entries, bool enable)
> +{
> +       struct hwrm_cfa_counter_cfg_input req = {0};
> +       struct hwrm_cfa_counter_cfg_output *resp = bp->hwrm_cmd_resp_addr;
> +       uint16_t flags = 0;
> +       int rc;
> +
> +       if (!(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp))) {
> +               PMD_DRV_LOG(DEBUG,
> +                           "Not a PF or trusted VF. Command not
> supported\n");
> +               return 0;
> +       }
> +
> +       HWRM_PREP(&req, HWRM_CFA_COUNTER_CFG, BNXT_USE_KONG(bp));
> +
> +       req.target_id = rte_cpu_to_le_16(bp->fw_fid);
> +       req.counter_type = rte_cpu_to_le_16(cntr);
> +       flags = enable ? HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_CFG_MODE_ENABLE :
> +               HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_CFG_MODE_DISABLE;
> +       flags |= HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_PULL;
> +       if (dir == BNXT_DIR_RX)
> +               flags |=  HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_PATH_RX;
> +       else if (dir == BNXT_DIR_TX)
> +               flags |=  HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_PATH_TX;
> +       req.flags = rte_cpu_to_le_16(flags);
> +       req.ctx_id =  rte_cpu_to_le_16(ctx_id);
> +       req.num_entries = rte_cpu_to_le_32(num_entries);
> +
> +       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req),
> BNXT_USE_KONG(bp));
> +       HWRM_CHECK_RESULT();
> +       HWRM_UNLOCK();
> +
> +       return 0;
> +}
> +
> +int bnxt_hwrm_cfa_counter_qstats(struct bnxt *bp,
> +                                enum bnxt_flow_dir dir,
> +                                uint16_t cntr,
> +                                uint16_t num_entries)
> +{
> +       struct hwrm_cfa_counter_qstats_output *resp =
> bp->hwrm_cmd_resp_addr;
> +       struct hwrm_cfa_counter_qstats_input req = {0};
> +       uint16_t flow_ctx_id = 0;
> +       uint16_t flags = 0;
> +       int rc = 0;
> +
> +       if (!(BNXT_PF(bp) || BNXT_VF_IS_TRUSTED(bp))) {
> +               PMD_DRV_LOG(DEBUG,
> +                           "Not a PF or trusted VF. Command not
> supported\n");
> +               return 0;
> +       }
> +
> +       if (dir == BNXT_DIR_RX) {
> +               flow_ctx_id = bp->rx_fc_in_tbl.ctx_id;
> +               flags = HWRM_CFA_COUNTER_QSTATS_INPUT_FLAGS_PATH_RX;
> +       } else if (dir == BNXT_DIR_TX) {
> +               flow_ctx_id = bp->tx_fc_in_tbl.ctx_id;
> +               flags = HWRM_CFA_COUNTER_QSTATS_INPUT_FLAGS_PATH_TX;
> +       }
> +
> +       HWRM_PREP(&req, HWRM_CFA_COUNTER_QSTATS, BNXT_USE_KONG(bp));
> +       req.target_id = rte_cpu_to_le_16(bp->fw_fid);
> +       req.counter_type = rte_cpu_to_le_16(cntr);
> +       req.input_flow_ctx_id = rte_cpu_to_le_16(flow_ctx_id);
> +       req.num_entries = rte_cpu_to_le_16(num_entries);
> +       req.flags = rte_cpu_to_le_16(flags);
> +       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req),
> BNXT_USE_KONG(bp));
> +
> +       HWRM_CHECK_RESULT();
> +       HWRM_UNLOCK();
> +
> +       return 0;
> +}
> diff --git a/drivers/net/bnxt/bnxt_hwrm.h b/drivers/net/bnxt/bnxt_hwrm.h
> index 1b7e35306..58b414d4f 100644
> --- a/drivers/net/bnxt/bnxt_hwrm.h
> +++ b/drivers/net/bnxt/bnxt_hwrm.h
> @@ -88,6 +88,18 @@ int bnxt_hwrm_tf_message_direct(struct bnxt *bp,
>                                 void *resp_msg,
>                                 uint32_t resp_len);
>
> +#define CFA_COUNTER_CFG_IN_COUNTER_TYPE_FC \
> +       HWRM_CFA_COUNTER_CFG_INPUT_COUNTER_TYPE_FC
> +
> +enum bnxt_flow_dir {
> +       BNXT_DIR_RX = 0,
> +       BNXT_DIR_TX,
> +       BNXT_DIR_LOOPBACK,
> +       BNXT_DIR_MAX
> +};
> +
> +#define BNXT_CTX_VAL_INVAL     0xFFFF
> +
>  int bnxt_hwrm_cfa_l2_clear_rx_mask(struct bnxt *bp,
>                                    struct bnxt_vnic_info *vnic);
>  int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp, struct bnxt_vnic_info
> *vnic,
> @@ -248,4 +260,14 @@ int bnxt_hwrm_fw_reset(struct bnxt *bp);
>  int bnxt_hwrm_port_ts_query(struct bnxt *bp, uint8_t path,
>                             uint64_t *timestamp);
>  int bnxt_hwrm_cfa_adv_flow_mgmt_qcaps(struct bnxt *bp);
> +int bnxt_hwrm_cfa_counter_qcaps(struct bnxt *bp, uint16_t *max_fc);
> +int bnxt_hwrm_ctx_rgtr(struct bnxt *bp, rte_iova_t dma_addr, uint16_t
> *ctx_id);
> +int bnxt_hwrm_ctx_unrgtr(struct bnxt *bp, uint16_t ctx_id);
> +int bnxt_hwrm_cfa_counter_cfg(struct bnxt *bp, enum bnxt_flow_dir dir,
> +                             uint16_t cntr, uint16_t ctx_id,
> +                             uint32_t num_entries, bool enable);
> +int bnxt_hwrm_cfa_counter_qstats(struct bnxt *bp,
> +                                enum bnxt_flow_dir dir,
> +                                uint16_t cntr,
> +                                uint16_t num_entries);
>  #endif
> diff --git a/drivers/net/bnxt/bnxt_stats.c b/drivers/net/bnxt/bnxt_stats.c
> index 6afd11adb..1d3be16f8 100644
> --- a/drivers/net/bnxt/bnxt_stats.c
> +++ b/drivers/net/bnxt/bnxt_stats.c
> @@ -10,10 +10,12 @@
>
>  #include "bnxt.h"
>  #include "bnxt_cpr.h"
> +#include "bnxt_filter.h"
>  #include "bnxt_hwrm.h"
>  #include "bnxt_rxq.h"
>  #include "bnxt_stats.h"
>  #include "bnxt_txq.h"
> +#include "bnxt_vnic.h"
>  #include "hsi_struct_def_dpdk.h"
>
>  static const struct bnxt_xstats_name_off bnxt_rx_stats_strings[] = {
> @@ -611,7 +613,9 @@ int bnxt_dev_xstats_get_op(struct rte_eth_dev *eth_dev,
>                 RTE_DIM(bnxt_tx_stats_strings) +
>                 RTE_DIM(bnxt_func_stats_strings) +
>                 RTE_DIM(bnxt_rx_ext_stats_strings) +
> -               RTE_DIM(bnxt_tx_ext_stats_strings);
> +               RTE_DIM(bnxt_tx_ext_stats_strings) +
> +               bnxt_flow_stats_cnt(bp);
> +
>         stat_count = count;
>
>         if (n < count)
> @@ -660,24 +664,77 @@ int bnxt_dev_xstats_get_op(struct rte_eth_dev
> *eth_dev,
>                 xstats[count].value = rte_le_to_cpu_64
>                                         (*(uint64_t *)((char
> *)tx_stats_ext +
>
>  bnxt_tx_ext_stats_strings[i].offset));
> -
>                 count++;
>         }
>
> +       if (bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_COUNTERS &&
> +           bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_MGMT &&
> +           bp->flow_xstat) {
> +               int j;
> +
> +               i = 0;
> +               for (j = 0; j < bp->max_vnics; j++) {
> +                       struct bnxt_filter_info *filter;
> +                       struct bnxt_vnic_info *vnic;
> +                       struct rte_flow *flow;
> +
> +                       vnic = &bp->vnic_info[j];
> +                       if (vnic && vnic->fw_vnic_id == INVALID_VNIC_ID)
> +                               continue;
> +
> +                       if (STAILQ_EMPTY(&vnic->flow_list))
> +                               continue;
> +
> +                       STAILQ_FOREACH(flow, &vnic->flow_list, next) {
> +                               if (!flow || !flow->filter)
> +                                       continue;
> +
> +                               filter = flow->filter;
> +                               xstats[count].id = count;
> +                               xstats[count].value =
> +                                       filter->hw_stats.bytes;
> +                               count++;
> +                               xstats[count].id = count;
> +                               xstats[count].value =
> +                                       filter->hw_stats.packets;
> +                               count++;
> +                               if (++i > bp->max_l2_ctx)
> +                                       break;
> +                       }
> +                       if (i > bp->max_l2_ctx)
> +                               break;
> +               }
> +       }
> +
>         return stat_count;
>  }
>
> +int bnxt_flow_stats_cnt(struct bnxt *bp)
> +{
> +       if (bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_COUNTERS &&
> +           bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_MGMT &&
> +           bp->flow_xstat) {
> +               struct bnxt_xstats_name_off flow_bytes[bp->max_l2_ctx];
> +               struct bnxt_xstats_name_off flow_pkts[bp->max_l2_ctx];
> +
> +               return RTE_DIM(flow_bytes) + RTE_DIM(flow_pkts);
> +       }
> +
> +       return 0;
> +}
> +
>  int bnxt_dev_xstats_get_names_op(struct rte_eth_dev *eth_dev,
> -                                struct rte_eth_xstat_name *xstats_names,
> -                                __rte_unused unsigned int limit)
> +               struct rte_eth_xstat_name *xstats_names,
> +               __rte_unused unsigned int limit)
>  {
> +       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
>         const unsigned int stat_cnt = RTE_DIM(bnxt_rx_stats_strings) +
>                                 RTE_DIM(bnxt_tx_stats_strings) +
>                                 RTE_DIM(bnxt_func_stats_strings) +
>                                 RTE_DIM(bnxt_rx_ext_stats_strings) +
> -                               RTE_DIM(bnxt_tx_ext_stats_strings);
> -       struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
> -       unsigned int i, count;
> +                               RTE_DIM(bnxt_tx_ext_stats_strings) +
> +                               bnxt_flow_stats_cnt(bp);
> +       unsigned int i, count = 0;
>         int rc;
>
>         rc = is_bnxt_in_error(bp);
> @@ -724,7 +781,26 @@ int bnxt_dev_xstats_get_names_op(struct rte_eth_dev
> *eth_dev,
>                         count++;
>                 }
>
> +               if (bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_COUNTERS &&
> +                   bp->fw_cap & BNXT_FW_CAP_ADV_FLOW_MGMT &&
> +                   bp->flow_xstat) {
> +                       for (i = 0; i < bp->max_l2_ctx; i++) {
> +                               char buf[RTE_ETH_XSTATS_NAME_SIZE];
> +
> +                               sprintf(buf, "flow_%d_bytes", i);
> +                               strlcpy(xstats_names[count].name, buf,
> +                                       sizeof(xstats_names[count].name));
> +                               count++;
> +
> +                               sprintf(buf, "flow_%d_packets", i);
> +                               strlcpy(xstats_names[count].name, buf,
> +                                       sizeof(xstats_names[count].name));
> +
> +                               count++;
> +                       }
> +               }
>         }
> +
>         return stat_cnt;
>  }
>
> @@ -754,12 +830,13 @@ int bnxt_dev_xstats_reset_op(struct rte_eth_dev
> *eth_dev)
>  int bnxt_dev_xstats_get_by_id_op(struct rte_eth_dev *dev, const uint64_t
> *ids,
>                 uint64_t *values, unsigned int limit)
>  {
> +       struct bnxt *bp = dev->data->dev_private;
>         const unsigned int stat_cnt = RTE_DIM(bnxt_rx_stats_strings) +
>                                 RTE_DIM(bnxt_tx_stats_strings) +
>                                 RTE_DIM(bnxt_func_stats_strings) +
>                                 RTE_DIM(bnxt_rx_ext_stats_strings) +
> -                               RTE_DIM(bnxt_tx_ext_stats_strings);
> -       struct bnxt *bp = dev->data->dev_private;
> +                               RTE_DIM(bnxt_tx_ext_stats_strings) +
> +                               bnxt_flow_stats_cnt(bp);
>         struct rte_eth_xstat xstats[stat_cnt];
>         uint64_t values_copy[stat_cnt];
>         uint16_t i;
> @@ -787,13 +864,14 @@ int bnxt_dev_xstats_get_names_by_id_op(struct
> rte_eth_dev *dev,
>                                 struct rte_eth_xstat_name *xstats_names,
>                                 const uint64_t *ids, unsigned int limit)
>  {
> +       struct bnxt *bp = dev->data->dev_private;
>         const unsigned int stat_cnt = RTE_DIM(bnxt_rx_stats_strings) +
>                                 RTE_DIM(bnxt_tx_stats_strings) +
>                                 RTE_DIM(bnxt_func_stats_strings) +
>                                 RTE_DIM(bnxt_rx_ext_stats_strings) +
> -                               RTE_DIM(bnxt_tx_ext_stats_strings);
> +                               RTE_DIM(bnxt_tx_ext_stats_strings) +
> +                               bnxt_flow_stats_cnt(bp);
>         struct rte_eth_xstat_name xstats_names_copy[stat_cnt];
> -       struct bnxt *bp = dev->data->dev_private;
>         uint16_t i;
>         int rc;
>
> @@ -817,3 +895,135 @@ int bnxt_dev_xstats_get_names_by_id_op(struct
> rte_eth_dev *dev,
>         }
>         return stat_cnt;
>  }
> +
> +/* Update the input context memory with the flow counter IDs
> + * of the flows that we are interested in.
> + * Also, update the output tables with the current local values
> + * since that is what will be used by FW to accumulate
> + */
> +static void bnxt_update_fc_pre_qstat(uint32_t *in_tbl,
> +                                    uint64_t *out_tbl,
> +                                    struct bnxt_filter_info *filter,
> +                                    uint32_t *ptbl_cnt)
> +{
> +       uint32_t in_tbl_cnt = *ptbl_cnt;
> +
> +       in_tbl[in_tbl_cnt] = filter->flow_id;
> +       out_tbl[2 * in_tbl_cnt] = filter->hw_stats.packets;
> +       out_tbl[2 * in_tbl_cnt + 1] = filter->hw_stats.bytes;
> +       in_tbl_cnt++;
> +       *ptbl_cnt = in_tbl_cnt;
> +}
> +
> +/* Post issuing counter_qstats cmd, update the driver's local stat
> + * entries with the values DMA-ed by FW in the output table
> + */
> +static void bnxt_update_fc_post_qstat(struct bnxt_filter_info *filter,
> +                                     uint64_t *out_tbl,
> +                                     uint32_t out_tbl_idx)
> +{
> +       filter->hw_stats.packets = out_tbl[2 * out_tbl_idx];
> +       filter->hw_stats.bytes = out_tbl[(2 * out_tbl_idx) + 1];
> +}
> +
> +static int bnxt_update_fc_tbl(struct bnxt *bp, uint16_t ctr,
> +                             struct bnxt_filter_info *en_tbl[],
> +                             uint16_t in_flow_cnt)
> +{
> +       uint32_t *in_rx_tbl;
> +       uint64_t *out_rx_tbl;
> +       uint32_t in_rx_tbl_cnt = 0;
> +       uint32_t out_rx_tbl_cnt = 0;
> +       int i, rc = 0;
> +
> +       in_rx_tbl = (uint32_t *)bp->rx_fc_in_tbl.va;
> +       out_rx_tbl = (uint64_t *)bp->rx_fc_out_tbl.va;
> +
> +       for (i = 0; i < in_flow_cnt; i++) {
> +               if (!en_tbl[i])
> +                       continue;
> +
> +               /* Currently only ingress/Rx flows are supported anyway. */
> +               bnxt_update_fc_pre_qstat(in_rx_tbl, out_rx_tbl,
> +                                        en_tbl[i], &in_rx_tbl_cnt);
> +       }
> +
> +       /* Currently only ingress/Rx flows are supported */
> +       if (in_rx_tbl_cnt) {
> +               rc = bnxt_hwrm_cfa_counter_qstats(bp, BNXT_DIR_RX, ctr,
> +                                                 in_rx_tbl_cnt);
> +               if (rc)
> +                       return rc;
> +       }
> +
> +       for (i = 0; i < in_flow_cnt; i++) {
> +               if (!en_tbl[i])
> +                       continue;
> +
> +               /* Currently only ingress/Rx flows are supported */
> +               bnxt_update_fc_post_qstat(en_tbl[i], out_rx_tbl,
> +                                         out_rx_tbl_cnt);
> +               out_rx_tbl_cnt++;
> +       }
> +
> +       return rc;
> +}
> +
> +/* Walks through the list which has all the flows
> + * requesting for explicit flow counters.
> + */
> +int bnxt_flow_stats_req(struct bnxt *bp)
> +{
> +       int i;
> +       int rc = 0;
> +       struct rte_flow *flow;
> +       uint16_t in_flow_tbl_cnt = 0;
> +       struct bnxt_vnic_info *vnic = NULL;
> +       struct bnxt_filter_info *valid_en_tbl[bp->max_fc];
> +       uint16_t counter_type = CFA_COUNTER_CFG_IN_COUNTER_TYPE_FC;
> +
> +       bnxt_acquire_flow_lock(bp);
> +       for (i = 0; i < bp->max_vnics; i++) {
> +               vnic = &bp->vnic_info[i];
> +               if (vnic && vnic->fw_vnic_id == INVALID_VNIC_ID)
> +                       continue;
> +
> +               if (STAILQ_EMPTY(&vnic->flow_list))
> +                       continue;
> +
> +               STAILQ_FOREACH(flow, &vnic->flow_list, next) {
> +                       if (!flow || !flow->filter)
> +                               continue;
> +
> +                       valid_en_tbl[in_flow_tbl_cnt++] = flow->filter;
> +                       if (in_flow_tbl_cnt >= bp->max_fc) {
> +                               rc = bnxt_update_fc_tbl(bp, counter_type,
> +                                                       valid_en_tbl,
> +                                                       in_flow_tbl_cnt);
> +                               if (rc)
> +                                       goto err;
> +                               in_flow_tbl_cnt = 0;
> +                               continue;
> +                       }
> +               }
> +       }
> +
> +       if (!in_flow_tbl_cnt)
> +               goto out;
> +
> +       rc = bnxt_update_fc_tbl(bp, counter_type, valid_en_tbl,
> +                               in_flow_tbl_cnt);
> +       if (!rc) {
> +               bnxt_release_flow_lock(bp);
> +               return 0;
> +       }
> +
> +err:
> +       /* If cmd fails once, no need of
> +        * invoking again every second
> +        */
> +       bnxt_release_flow_lock(bp);
> +       bnxt_cancel_fc_thread(bp);
> +out:
> +       return rc;
> +}
> diff --git a/drivers/net/bnxt/hsi_struct_def_dpdk.h
> b/drivers/net/bnxt/hsi_struct_def_dpdk.h
> index cde96e784..608eaa2f4 100644
> --- a/drivers/net/bnxt/hsi_struct_def_dpdk.h
> +++ b/drivers/net/bnxt/hsi_struct_def_dpdk.h
> @@ -38338,4 +38338,288 @@ struct hwrm_port_ts_query_output {
>         uint8_t         valid;
>  } __attribute__((packed));
>
> +/**************************
> + * hwrm_cfa_counter_qcaps *
> + **************************/
> +
> +
> +/* hwrm_cfa_counter_qcaps_input (size:128b/16B) */
> +struct hwrm_cfa_counter_qcaps_input {
> +       /* The HWRM command request type. */
> +       uint16_t        req_type;
> +       /*
> +        * The completion ring to send the completion event on. This should
> +        * be the NQ ID returned from the `nq_alloc` HWRM command.
> +        */
> +       uint16_t        cmpl_ring;
> +       /*
> +        * The sequence ID is used by the driver for tracking multiple
> +        * commands. This ID is treated as opaque data by the firmware and
> +        * the value is returned in the `hwrm_resp_hdr` upon completion.
> +        */
> +       uint16_t        seq_id;
> +       /*
> +        * The target ID of the command:
> +        * * 0x0-0xFFF8 - The function ID
> +        * * 0xFFF8-0xFFFC, 0xFFFE - Reserved for internal processors
> +        * * 0xFFFD - Reserved for user-space HWRM interface
> +        * * 0xFFFF - HWRM
> +        */
> +       uint16_t        target_id;
> +       /*
> +        * A physical address pointer pointing to a host buffer that the
> +        * command's response data will be written. This can be either a
> host
> +        * physical address (HPA) or a guest physical address (GPA) and
> must
> +        * point to a physically contiguous block of memory.
> +        */
> +       uint64_t        resp_addr;
> +} __attribute__((packed));
> +
> +/* hwrm_cfa_counter_qcaps_output (size:576b/72B) */
> +struct hwrm_cfa_counter_qcaps_output {
> +       /* The specific error status for the command. */
> +       uint16_t        error_code;
> +       /* The HWRM command request type. */
> +       uint16_t        req_type;
> +       /* The sequence ID from the original command. */
> +       uint16_t        seq_id;
> +       /* The length of the response data in number of bytes. */
> +       uint16_t        resp_len;
> +       uint32_t        flags;
> +       /* Enumeration denoting the supported CFA counter format. */
> +       #define HWRM_CFA_COUNTER_QCAPS_OUTPUT_FLAGS_COUNTER_FORMAT \
> +               UINT32_C(0x1)
> +       /* CFA counter types are not supported. */
> +       #define HWRM_CFA_COUNTER_QCAPS_OUTPUT_FLAGS_COUNTER_FORMAT_NONE \
> +               UINT32_C(0x0)
> +       /* 64-bit packet counters followed by 64-bit byte counters format.
> */
> +       #define HWRM_CFA_COUNTER_QCAPS_OUTPUT_FLAGS_COUNTER_FORMAT_64_BIT \
> +               UINT32_C(0x1)
> +       #define HWRM_CFA_COUNTER_QCAPS_OUTPUT_FLAGS_COUNTER_FORMAT_LAST \
> +               HWRM_CFA_COUNTER_QCAPS_OUTPUT_FLAGS_COUNTER_FORMAT_64_BIT
> +       uint32_t        unused_0;
> +       /* Minimum guaranteed number of flow counters supported for this
> function, in RX direction. */
> +       uint32_t        min_rx_fc;
> +       /* Maximum non-guaranteed number of flow counters supported for
> this function, in RX direction. */
> +       uint32_t        max_rx_fc;
> +       /* Minimum guaranteed number of flow counters supported for this
> function, in TX direction. */
> +       uint32_t        min_tx_fc;
> +       /* Maximum non-guaranteed number of flow counters supported for
> this function, in TX direction. */
> +       uint32_t        max_tx_fc;
> +       /* Minimum guaranteed number of extension flow counters supported
> for this function, in RX direction. */
> +       uint32_t        min_rx_efc;
> +       /* Maximum non-guaranteed number of extension flow counters
> supported for this function, in RX direction. */
> +       uint32_t        max_rx_efc;
> +       /* Minimum guaranteed number of extension flow counters supported
> for this function, in TX direction. */
> +       uint32_t        min_tx_efc;
> +       /* Maximum non-guaranteed number of extension flow counters
> supported for this function, in TX direction. */
> +       uint32_t        max_tx_efc;
> +       /* Minimum guaranteed number of meter drop counters supported for
> this function, in RX direction. */
> +       uint32_t        min_rx_mdc;
> +       /* Maximum non-guaranteed number of meter drop counters supported
> for this function, in RX direction. */
> +       uint32_t        max_rx_mdc;
> +       /* Minimum guaranteed number of meter drop counters supported for
> this function, in TX direction. */
> +       uint32_t        min_tx_mdc;
> +       /* Maximum non-guaranteed number of meter drop counters supported
> for this function, in TX direction. */
> +       uint32_t        max_tx_mdc;
> +       /* Maximum guaranteed number of flow counters which can be used
> during flow alloc. */
> +       uint32_t        max_flow_alloc_fc;
> +       uint8_t unused_1[3];
> +       /*
> +        * This field is used in Output records to indicate that the output
> +        * is completely written to RAM.  This field should be read as '1'
> +        * to indicate that the output has been completely written.
> +        * When writing a command completion or response to an internal
> processor,
> +        * the order of writes has to be such that this field is written
> last.
> +        */
> +       uint8_t valid;
> +} __attribute__((packed));
> +
> +/************************
> + * hwrm_cfa_counter_cfg *
> + ************************/
> +
> +
> +/* hwrm_cfa_counter_cfg_input (size:256b/32B) */
> +struct hwrm_cfa_counter_cfg_input {
> +       /* The HWRM command request type. */
> +       uint16_t        req_type;
> +       /*
> +        * The completion ring to send the completion event on. This should
> +        * be the NQ ID returned from the `nq_alloc` HWRM command.
> +        */
> +       uint16_t        cmpl_ring;
> +       /*
> +        * The sequence ID is used by the driver for tracking multiple
> +        * commands. This ID is treated as opaque data by the firmware and
> +        * the value is returned in the `hwrm_resp_hdr` upon completion.
> +        */
> +       uint16_t        seq_id;
> +       /*
> +        * The target ID of the command:
> +        * * 0x0-0xFFF8 - The function ID
> +        * * 0xFFF8-0xFFFC, 0xFFFE - Reserved for internal processors
> +        * * 0xFFFD - Reserved for user-space HWRM interface
> +        * * 0xFFFF - HWRM
> +        */
> +       uint16_t        target_id;
> +       /*
> +        * A physical address pointer pointing to a host buffer that the
> +        * command's response data will be written. This can be either a
> host
> +        * physical address (HPA) or a guest physical address (GPA) and
> must
> +        * point to a physically contiguous block of memory.
> +        */
> +       uint64_t        resp_addr;
> +       uint16_t        flags;
> +       /* Enumeration denoting the configuration mode. */
> +       #define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_CFG_MODE \
> +               UINT32_C(0x1)
> +       /* Disable the configuration mode. */
> +       #define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_CFG_MODE_DISABLE \
> +               UINT32_C(0x0)
> +       /* Enable the configuration mode. */
> +       #define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_CFG_MODE_ENABLE \
> +               UINT32_C(0x1)
> +       #define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_CFG_MODE_LAST \
> +               HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_CFG_MODE_ENABLE
> +       /* Enumeration denoting the RX, TX type of the resource. */
> +       #define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_PATH \
> +               UINT32_C(0x2)
> +       /* Tx path. */
> +       #define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_PATH_TX \
> +               (UINT32_C(0x0) << 1)
> +       /* Rx path. */
> +       #define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_PATH_RX \
> +               (UINT32_C(0x1) << 1)
> +       #define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_PATH_LAST \
> +               HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_PATH_RX
> +       /* Enumeration denoting the data transfer mode. */
> +       #define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_MASK \
> +               UINT32_C(0xc)
> +       #define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_SFT
>    2
> +       /* Push mode. */
> +       #define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_PUSH \
> +               (UINT32_C(0x0) << 2)
> +       /* Pull mode. */
> +       #define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_PULL \
> +               (UINT32_C(0x1) << 2)
> +       /* Pull on async update. */
> +       #define
> HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_PULL_ASYNC \
> +               (UINT32_C(0x2) << 2)
> +       #define HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_LAST \
> +
>  HWRM_CFA_COUNTER_CFG_INPUT_FLAGS_DATA_TRANSFER_MODE_PULL_ASYNC
> +       uint16_t        counter_type;
> +       /* Flow counters. */
> +       #define HWRM_CFA_COUNTER_CFG_INPUT_COUNTER_TYPE_FC  UINT32_C(0x0)
> +       /* Extended flow counters. */
> +       #define HWRM_CFA_COUNTER_CFG_INPUT_COUNTER_TYPE_EFC UINT32_C(0x1)
> +       /* Meter drop counters. */
> +       #define HWRM_CFA_COUNTER_CFG_INPUT_COUNTER_TYPE_MDC UINT32_C(0x2)
> +       #define HWRM_CFA_COUNTER_CFG_INPUT_COUNTER_TYPE_LAST \
> +               HWRM_CFA_COUNTER_CFG_INPUT_COUNTER_TYPE_MDC
> +       /* Ctx memory handle to be used for the counter. */
> +       uint16_t        ctx_id;
> +       /* Counter update cadence hint (only in Push mode). */
> +       uint16_t        update_tmr_ms;
> +       /* Total number of entries. */
> +       uint32_t        num_entries;
> +       uint32_t        unused_0;
> +} __attribute__((packed));
> +
> +/* hwrm_cfa_counter_cfg_output (size:128b/16B) */
> +struct hwrm_cfa_counter_cfg_output {
> +       /* The specific error status for the command. */
> +       uint16_t        error_code;
> +       /* The HWRM command request type. */
> +       uint16_t        req_type;
> +       /* The sequence ID from the original command. */
> +       uint16_t        seq_id;
> +       /* The length of the response data in number of bytes. */
> +       uint16_t        resp_len;
> +       uint8_t unused_0[7];
> +       /*
> +        * This field is used in Output records to indicate that the output
> +        * is completely written to RAM.  This field should be read as '1'
> +        * to indicate that the output has been completely written.
> +        * When writing a command completion or response to an internal
> processor,
> +        * the order of writes has to be such that this field is written
> last.
> +        */
> +       uint8_t valid;
> +} __attribute__((packed));
> +
> +/***************************
> + * hwrm_cfa_counter_qstats *
> + ***************************/
> +
> +
> +/* hwrm_cfa_counter_qstats_input (size:320b/40B) */
> +struct hwrm_cfa_counter_qstats_input {
> +       /* The HWRM command request type. */
> +       uint16_t        req_type;
> +       /*
> +        * The completion ring to send the completion event on. This should
> +        * be the NQ ID returned from the `nq_alloc` HWRM command.
> +        */
> +       uint16_t        cmpl_ring;
> +       /*
> +        * The sequence ID is used by the driver for tracking multiple
> +        * commands. This ID is treated as opaque data by the firmware and
> +        * the value is returned in the `hwrm_resp_hdr` upon completion.
> +        */
> +       uint16_t        seq_id;
> +       /*
> +        * The target ID of the command:
> +        * * 0x0-0xFFF8 - The function ID
> +        * * 0xFFF8-0xFFFC, 0xFFFE - Reserved for internal processors
> +        * * 0xFFFD - Reserved for user-space HWRM interface
> +        * * 0xFFFF - HWRM
> +        */
> +       uint16_t        target_id;
> +       /*
> +        * A physical address pointer pointing to a host buffer that the
> +        * command's response data will be written. This can be either a
> host
> +        * physical address (HPA) or a guest physical address (GPA) and
> must
> +        * point to a physically contiguous block of memory.
> +        */
> +       uint64_t        resp_addr;
> +       uint16_t        flags;
> +       /* Enumeration denoting the RX, TX type of the resource. */
> +       #define HWRM_CFA_COUNTER_QSTATS_INPUT_FLAGS_PATH     UINT32_C(0x1)
> +       /* Tx path. */
> +       #define HWRM_CFA_COUNTER_QSTATS_INPUT_FLAGS_PATH_TX
> UINT32_C(0x0)
> +       /* Rx path. */
> +       #define HWRM_CFA_COUNTER_QSTATS_INPUT_FLAGS_PATH_RX
> UINT32_C(0x1)
> +       #define HWRM_CFA_COUNTER_QSTATS_INPUT_FLAGS_PATH_LAST \
> +               HWRM_CFA_COUNTER_QSTATS_INPUT_FLAGS_PATH_RX
> +       uint16_t        counter_type;
> +       uint16_t        input_flow_ctx_id;
> +       uint16_t        num_entries;
> +       uint16_t        delta_time_ms;
> +       uint16_t        meter_instance_id;
> +       uint16_t        mdc_ctx_id;
> +       uint8_t unused_0[2];
> +       uint64_t        expected_count;
> +} __attribute__((packed));
> +
> +/* hwrm_cfa_counter_qstats_output (size:128b/16B) */
> +struct hwrm_cfa_counter_qstats_output {
> +       /* The specific error status for the command. */
> +       uint16_t        error_code;
> +       /* The HWRM command request type. */
> +       uint16_t        req_type;
> +       /* The sequence ID from the original command. */
> +       uint16_t        seq_id;
> +       /* The length of the response data in number of bytes. */
> +       uint16_t        resp_len;
> +       uint8_t unused_0[7];
> +       /*
> +        * This field is used in Output records to indicate that the output
> +        * is completely written to RAM.  This field should be read as '1'
> +        * to indicate that the output has been completely written.
> +        * When writing a command completion or response to an internal
> processor,
> +        * the order of writes has to be such that this field is written
> last.
> +        */
> +       uint8_t valid;
> +} __attribute__((packed));
> +
>  #endif /* _HSI_STRUCT_DEF_DPDK_H_ */
> --
> 2.21.1 (Apple Git-122.3)
>
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [dpdk-dev] [PATCH v2] net/bnxt: add flow stats in extended stats
  2020-04-17 14:49   ` Ajit Khaparde
@ 2020-04-17 22:03     ` Ferruh Yigit
  0 siblings, 0 replies; 4+ messages in thread
From: Ferruh Yigit @ 2020-04-17 22:03 UTC (permalink / raw)
  To: Ajit Khaparde, dpdk-dev; +Cc: Somnath Kotur, Sriharsha Basavapatna

On 4/17/2020 3:49 PM, Ajit Khaparde wrote:
> On Thu, Apr 16, 2020 at 10:49 PM Ajit Khaparde <ajit.khaparde@broadcom.com>
> wrote:
> 
>> From: Somnath Kotur <somnath.kotur@broadcom.com>
>>
>> This patch allows to display flow stats in extended stats.
>> To do this, DMA-able memory is registered with the FW during device
>> initialization. Then the driver uses an alarm thread to query the
>> per flow stats using the HWRM_CFA_COUNTER_QSTATS HWRM command at
>> regular intervals and stores it locally which will be displayed
>> when the application queries the xstats.
>> The DMA-able memory is unregistered during driver cleanup.
>> This functionality can be enabled using the flow-xstat devarg and
>> will be disabled by default. The intention behind this is to allow
>> stats to be displayed for all the flows in one shot instead of
>> querying one at a time.
>>
>> Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
>> Signed-off-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
>> Reviewed-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
>>
> Patch applied to dpdk-next-net-brcm.
> 

<...>

>> +static int bnxt_register_fc_ctx_mem(struct bnxt *bp)
>> +{
>> +       int rc = 0;
>> +
>> +       rc = bnxt_hwrm_ctx_rgtr(bp, bp->rx_fc_in_tbl.dma,
>> +                               &bp->rx_fc_in_tbl.ctx_id);
>> +       if (rc)
>> +               return rc;
>> +
>> +       PMD_DRV_LOG(DEBUG,
>> +                   "rx_fc_in_tbl.va = %p rx_fc_in_tbl.dma = %p"
>> +                   " rx_fc_in_tbl.ctx_id = %d\n",
>> +                   bp->rx_fc_in_tbl.va, (void *)bp->rx_fc_in_tbl.dma,
>> +                   bp->rx_fc_in_tbl.ctx_id);
>> +
>> +       rc = bnxt_hwrm_ctx_rgtr(bp, bp->rx_fc_out_tbl.dma,
>> +                               &bp->rx_fc_out_tbl.ctx_id);
>> +       if (rc)
>> +               return rc;
>> +
>> +       PMD_DRV_LOG(DEBUG,
>> +                   "rx_fc_out_tbl.va = %p rx_fc_out_tbl.dma = %p"
>> +                   " rx_fc_out_tbl.ctx_id = %d\n",
>> +                   bp->rx_fc_out_tbl.va, (void *)bp->rx_fc_out_tbl.dma,

This fails on 32-bit, as it does in previous set, please check 32-bit build
before submitting.

build error [1], same for below logs. Fixing while merging as following:
 -                   bp->rx_fc_in_tbl.va, (void *)bp->rx_fc_in_tbl.dma,
 +                   bp->rx_fc_in_tbl.va,
 +                   (void *)((uintptr_t)bp->rx_fc_in_tbl.dma),


[1]
...dpdk/drivers/net/bnxt/bnxt_ethdev.c:356:28: error: cast to pointer from
integer of different size [-Werror=int-to-pointer-cast]
  356 |       bp->rx_fc_in_tbl.va, (void *)bp->rx_fc_in_tbl.dma,
      |                            ^
...dpdk/drivers/net/bnxt/bnxt.h:747:16: note: in definition of macro
‘PMD_DRV_LOG_RAW’
  747 |   __func__, ## args)
      |                ^~~~
...dpdk/drivers/net/bnxt/bnxt_ethdev.c:353:2: note: in expansion of macro
‘PMD_DRV_LOG’
  353 |  PMD_DRV_LOG(DEBUG,
      |  ^~~~~~~~~~~

<...>

>> +void bnxt_flow_cnt_alarm_cb(void *arg)
>> +{
>> +       int rc = 0;
>> +       struct bnxt *bp = arg;
>> +
>> +       if (!bp->rx_fc_out_tbl.va) {
>> +               PMD_DRV_LOG(ERR, "bp->rx_fc_out_tbl.va is NULL?\n");
>> +               bnxt_cancel_fc_thread(bp);
>> +               return;
>> +       }
>> +
>> +       if (!bp->flow_count) {
>> +               bnxt_cancel_fc_thread(bp);
>> +               return;
>> +       }
>> +
>> +       if (!bp->eth_dev->data->dev_started) {
>> +               bnxt_cancel_fc_thread(bp);
>> +               return;
>> +       }
>> +
>> +       rc = bnxt_flow_stats_req(bp);
>> +       if (rc) {
>> +               PMD_DRV_LOG(ERR, "Flow stat alarm not rescheduled.\n");
>> +               return;
>> +       }
>> +
>> +       rte_eal_alarm_set(US_PER_S * BNXT_FC_TIMER,
>> +                         bnxt_flow_cnt_alarm_cb,
>> +                         (void *)bp);
>> +}

Cross build is failing because of 'US_PER_S' [3], it seems header is missing,
adding it while merging [4].

[3]
...dpdk/drivers/net/bnxt/bnxt_flow.c: In function ‘bnxt_setup_flow_counter’:
...dpdk/drivers/net/bnxt/bnxt_flow.c:1636:21: error: ‘US_PER_S’ undeclared
(first use in this function); did you mean ‘US_PER_MS’?
   rte_eal_alarm_set(US_PER_S * BNXT_FC_TIMER,
                     ^~~~~~~~
                     US_PER_MS
...dpdk/drivers/net/bnxt/bnxt_flow.c:1636:21: note: each undeclared identifier
is reported only once for each function it appears in
...dpdk/drivers/net/bnxt/bnxt_flow.c: In function ‘bnxt_flow_cnt_alarm_cb’:
...dpdk/drivers/net/bnxt/bnxt_flow.c:1670:20: error: ‘US_PER_S’ undeclared
(first use in this function); did you mean ‘US_PER_MS’?
  rte_eal_alarm_set(US_PER_S * BNXT_FC_TIMER,
                    ^~~~~~~~
                    US_PER_MS

[4]
 --- a/drivers/net/bnxt/bnxt_flow.c
 +++ b/drivers/net/bnxt/bnxt_flow.c
 @@ -10,6 +10,8 @@
  #include <rte_flow.h>
  #include <rte_flow_driver.h>
  #include <rte_tailq.h>
 +#include <rte_alarm.h>
 +#include <rte_cycles.h>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-04-17 22:03 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-17  5:37 [dpdk-dev] [PATCH] net/bnxt: add flow stats in extended stats Ajit Khaparde
2020-04-17  5:49 ` [dpdk-dev] [PATCH v2] " Ajit Khaparde
2020-04-17 14:49   ` Ajit Khaparde
2020-04-17 22:03     ` Ferruh Yigit

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).