DPDK patches and discussions
 help / color / mirror / Atom feed
From: Suanming Mou <suanmingm@nvidia.com>
To: Matan Azrad <matan@nvidia.com>,
	Shahaf Shuler <shahafs@nvidia.com>,
	Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Cc: dev@dpdk.org
Subject: [dpdk-dev] [PATCH v2 3/8] net/mlx5: remove single counter container
Date: Tue, 20 Oct 2020 11:02:23 +0800
Message-ID: <1603162949-150001-4-git-send-email-suanmingm@nvidia.com> (raw)
In-Reply-To: <1603162949-150001-1-git-send-email-suanmingm@nvidia.com>

A flow counter which was allocated by a batch API couldn't be assigned
to a flow in the root table (group 0) in old rdma-core version.
Hence, a root table flow counter required PMD mechanism to manage
counters which were allocated singly.

Currently, the batch counters have already been supported in root table
includes a new rdma-core version with MLX5_FLOW_ACTION_COUNTER_OFFSET
enum and with a kernel driver includes
MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET enum.

When the PMD uses rdma-core API to assign a batch counter to a root
table flow using invalid counter offset, it should get an error only
if the batch counter assignment for root table is supported.
Using this trial in the initialization time can help to detect the
support.

Using the above trial, if the support is valid, remove the management of
single counter container in the fast counter mechanism. Otherwise, move
the counter mechanism to fallback mode.

Signed-off-by: Suanming Mou <suanmingm@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
 drivers/net/mlx5/linux/mlx5_os.c   |  42 +++-
 drivers/net/mlx5/mlx5.c            |  38 ++--
 drivers/net/mlx5/mlx5.h            |  28 +--
 drivers/net/mlx5/mlx5_flow.c       |  76 ++-----
 drivers/net/mlx5/mlx5_flow.h       |   1 +
 drivers/net/mlx5/mlx5_flow_dv.c    | 397 ++++++++++++++++---------------------
 drivers/net/mlx5/mlx5_flow_verbs.c |  26 +--
 7 files changed, 258 insertions(+), 350 deletions(-)

diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index d95082f..fbd95e7 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -562,6 +562,39 @@
 }
 
 /**
+ * DV flow counter mode detect and config.
+ *
+ * @param dev
+ *   Pointer to rte_eth_dev structure.
+ *
+ */
+static void
+mlx5_flow_counter_mode_config(struct rte_eth_dev *dev __rte_unused)
+{
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+	struct mlx5_priv *priv = dev->data->dev_private;
+
+	/* If devx is not supported or not DV mode, counters are not working. */
+	if (!priv->config.devx || !priv->config.dv_flow_en)
+		return;
+#ifndef HAVE_IBV_DEVX_ASYNC
+	priv->counter_fallback = 1;
+#else
+	priv->counter_fallback = 0;
+	if (!priv->config.hca_attr.flow_counters_dump ||
+	    !(priv->config.hca_attr.flow_counter_bulk_alloc_bitmap & 0x4) ||
+	    (mlx5_flow_dv_discover_counter_offset_support(dev) == -ENOTSUP))
+		priv->counter_fallback = 1;
+#endif
+	if (priv->counter_fallback)
+		DRV_LOG(INFO, "Use fall-back DV counter management. Flow "
+			"counter dump:%d, bulk_alloc_bitmap:0x%hhx.",
+			priv->config.hca_attr.flow_counters_dump,
+			priv->config.hca_attr.flow_counter_bulk_alloc_bitmap);
+#endif
+}
+
+/**
  * Spawn an Ethernet device from Verbs information.
  *
  * @param dpdk_dev
@@ -1029,19 +1062,11 @@
 		DRV_LOG(INFO, "Rx CQE padding is enabled");
 	}
 	if (config->devx) {
-		priv->counter_fallback = 0;
 		err = mlx5_devx_cmd_query_hca_attr(sh->ctx, &config->hca_attr);
 		if (err) {
 			err = -err;
 			goto error;
 		}
-		if (!config->hca_attr.flow_counters_dump)
-			priv->counter_fallback = 1;
-#ifndef HAVE_IBV_DEVX_ASYNC
-		priv->counter_fallback = 1;
-#endif
-		if (priv->counter_fallback)
-			DRV_LOG(INFO, "Use fall-back DV counter management");
 		/* Check for LRO support. */
 		if (config->dest_tir && config->hca_attr.lro_cap &&
 		    config->dv_flow_en) {
@@ -1443,6 +1468,7 @@
 			goto error;
 		}
 	}
+	mlx5_flow_counter_mode_config(eth_dev);
 	return eth_dev;
 error:
 	if (priv) {
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index a305e37..4d1ca9a 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -480,19 +480,18 @@ struct mlx5_flow_id_pool *
 static void
 mlx5_flow_counters_mng_init(struct mlx5_dev_ctx_shared *sh)
 {
-	int i, j;
+	int i;
 
 	memset(&sh->cmng, 0, sizeof(sh->cmng));
 	TAILQ_INIT(&sh->cmng.flow_counters);
-	for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) {
-		sh->cmng.ccont[i].min_id = MLX5_CNT_BATCH_OFFSET;
-		sh->cmng.ccont[i].max_id = -1;
-		sh->cmng.ccont[i].last_pool_idx = POOL_IDX_INVALID;
-		TAILQ_INIT(&sh->cmng.ccont[i].pool_list);
-		rte_spinlock_init(&sh->cmng.ccont[i].resize_sl);
-		for (j = 0; j < MLX5_COUNTER_TYPE_MAX; j++)
-			TAILQ_INIT(&sh->cmng.ccont[i].counters[j]);
-		rte_spinlock_init(&sh->cmng.ccont[i].csl);
+	sh->cmng.min_id = MLX5_CNT_BATCH_OFFSET;
+	sh->cmng.max_id = -1;
+	sh->cmng.last_pool_idx = POOL_IDX_INVALID;
+	TAILQ_INIT(&sh->cmng.pool_list);
+	rte_spinlock_init(&sh->cmng.resize_sl);
+	for (i = 0; i < MLX5_COUNTER_TYPE_MAX; i++) {
+		TAILQ_INIT(&sh->cmng.counters[i]);
+		rte_spinlock_init(&sh->cmng.csl[i]);
 	}
 }
 
@@ -523,7 +522,6 @@ struct mlx5_flow_id_pool *
 mlx5_flow_counters_mng_close(struct mlx5_dev_ctx_shared *sh)
 {
 	struct mlx5_counter_stats_mem_mng *mng;
-	int i;
 	int j;
 	int retries = 1024;
 
@@ -534,15 +532,13 @@ struct mlx5_flow_id_pool *
 			break;
 		rte_pause();
 	}
-	for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) {
+
+	if (sh->cmng.pools) {
 		struct mlx5_flow_counter_pool *pool;
-		uint32_t batch = (i == MLX5_CCONT_TYPE_BATCH);
 
-		if (!sh->cmng.ccont[i].pools)
-			continue;
-		pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
+		pool = TAILQ_FIRST(&sh->cmng.pool_list);
 		while (pool) {
-			if (batch && pool->min_dcs)
+			if (!IS_EXT_POOL(pool) && pool->min_dcs)
 				claim_zero(mlx5_devx_cmd_destroy
 							       (pool->min_dcs));
 			for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) {
@@ -551,17 +547,17 @@ struct mlx5_flow_id_pool *
 					 (mlx5_glue->destroy_flow_action
 					  (MLX5_POOL_GET_CNT
 					  (pool, j)->action));
-				if (!batch && MLX5_GET_POOL_CNT_EXT
+				if (IS_EXT_POOL(pool) && MLX5_GET_POOL_CNT_EXT
 				    (pool, j)->dcs)
 					claim_zero(mlx5_devx_cmd_destroy
 						   (MLX5_GET_POOL_CNT_EXT
 						    (pool, j)->dcs));
 			}
-			TAILQ_REMOVE(&sh->cmng.ccont[i].pool_list, pool, next);
+			TAILQ_REMOVE(&sh->cmng.pool_list, pool, next);
 			mlx5_free(pool);
-			pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
+			pool = TAILQ_FIRST(&sh->cmng.pool_list);
 		}
-		mlx5_free(sh->cmng.ccont[i].pools);
+		mlx5_free(sh->cmng.pools);
 	}
 	mng = LIST_FIRST(&sh->cmng.mem_mngs);
 	while (mng) {
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index e3ac07f..78cdac3 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -281,8 +281,10 @@ struct mlx5_drop {
 #define AGE_SIZE (sizeof(struct mlx5_age_param))
 #define CNT_POOL_TYPE_EXT	(1 << 0)
 #define CNT_POOL_TYPE_AGE	(1 << 1)
+
 #define IS_EXT_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_EXT)
 #define IS_AGE_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_AGE)
+
 #define MLX5_CNT_LEN(pool) \
 	(CNT_SIZE + \
 	(IS_AGE_POOL(pool) ? AGE_SIZE : 0) + \
@@ -323,14 +325,6 @@ enum {
 	AGE_TMOUT, /* Timeout, wait for rte_flow_get_aged_flows and destroy. */
 };
 
-#define MLX5_CNT_CONTAINER(sh, batch) (&(sh)->cmng.ccont[batch])
-
-enum {
-	MLX5_CCONT_TYPE_SINGLE,
-	MLX5_CCONT_TYPE_BATCH,
-	MLX5_CCONT_TYPE_MAX,
-};
-
 enum mlx5_counter_type {
 	MLX5_COUNTER_TYPE_ORIGIN,
 	MLX5_COUNTER_TYPE_AGE,
@@ -385,7 +379,6 @@ struct mlx5_flow_counter {
 
 /* Extend counters information for none batch fallback counters. */
 struct mlx5_flow_counter_ext {
-	uint32_t skipped:1; /* This counter is skipped or not. */
 	union {
 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
 		struct ibv_counter_set *cs;
@@ -409,9 +402,8 @@ struct mlx5_flow_counter_pool {
 	/* The devx object of the minimum counter ID. */
 	uint64_t time_of_last_age_check;
 	/* System time (from rte_rdtsc()) read in the last aging check. */
-	uint32_t index:28; /* Pool index in container. */
+	uint32_t index:29; /* Pool index in container. */
 	uint32_t type:2; /* Memory type behind the counter array. */
-	uint32_t skip_cnt:1; /* Pool contains skipped counter. */
 	volatile uint32_t query_gen:1; /* Query round. */
 	rte_spinlock_t sl; /* The pool lock. */
 	struct mlx5_counter_stats_raw *raw;
@@ -429,36 +421,30 @@ struct mlx5_counter_stats_mem_mng {
 /* Raw memory structure for the counter statistics values of a pool. */
 struct mlx5_counter_stats_raw {
 	LIST_ENTRY(mlx5_counter_stats_raw) next;
-	int min_dcs_id;
 	struct mlx5_counter_stats_mem_mng *mem_mng;
 	volatile struct flow_counter_stats *data;
 };
 
 TAILQ_HEAD(mlx5_counter_pools, mlx5_flow_counter_pool);
 
-/* Container structure for counter pools. */
-struct mlx5_pools_container {
+/* Counter global management structure. */
+struct mlx5_flow_counter_mng {
 	rte_atomic16_t n_valid; /* Number of valid pools. */
 	uint16_t n; /* Number of pools. */
 	uint16_t last_pool_idx; /* Last used pool index */
 	int min_id; /* The minimum counter ID in the pools. */
 	int max_id; /* The maximum counter ID in the pools. */
 	rte_spinlock_t resize_sl; /* The resize lock. */
-	rte_spinlock_t csl; /* The counter free list lock. */
+	rte_spinlock_t csl[MLX5_COUNTER_TYPE_MAX];
+	/* The counter free list lock. */
 	struct mlx5_counters counters[MLX5_COUNTER_TYPE_MAX];
 	/* Free counter list. */
 	struct mlx5_counter_pools pool_list; /* Counter pool list. */
 	struct mlx5_flow_counter_pool **pools; /* Counter pool array. */
 	struct mlx5_counter_stats_mem_mng *mem_mng;
 	/* Hold the memory management for the next allocated pools raws. */
-};
-
-/* Counter global management structure. */
-struct mlx5_flow_counter_mng {
-	struct mlx5_pools_container ccont[MLX5_CCONT_TYPE_MAX];
 	struct mlx5_counters flow_counters; /* Legacy flow counter list. */
 	uint8_t pending_queries;
-	uint8_t batch;
 	uint16_t pool_index;
 	uint8_t query_thread_on;
 	LIST_HEAD(mem_mngs, mlx5_counter_stats_mem_mng) mem_mngs;
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 598422c..dae7ac3 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -6592,26 +6592,6 @@ struct mlx5_meter_domains_infos *
 #define MLX5_POOL_QUERY_FREQ_US 1000000
 
 /**
- * Get number of all validate pools.
- *
- * @param[in] sh
- *   Pointer to mlx5_dev_ctx_shared object.
- *
- * @return
- *   The number of all validate pools.
- */
-static uint32_t
-mlx5_get_all_valid_pool_count(struct mlx5_dev_ctx_shared *sh)
-{
-	int i;
-	uint32_t pools_n = 0;
-
-	for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i)
-		pools_n += rte_atomic16_read(&sh->cmng.ccont[i].n_valid);
-	return pools_n;
-}
-
-/**
  * Set the periodic procedure for triggering asynchronous batch queries for all
  * the counter pools.
  *
@@ -6623,7 +6603,7 @@ struct mlx5_meter_domains_infos *
 {
 	uint32_t pools_n, us;
 
-	pools_n = mlx5_get_all_valid_pool_count(sh);
+	pools_n = rte_atomic16_read(&sh->cmng.n_valid);
 	us = MLX5_POOL_QUERY_FREQ_US / pools_n;
 	DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
 	if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
@@ -6645,31 +6625,16 @@ struct mlx5_meter_domains_infos *
 mlx5_flow_query_alarm(void *arg)
 {
 	struct mlx5_dev_ctx_shared *sh = arg;
-	struct mlx5_devx_obj *dcs;
-	uint16_t offset;
 	int ret;
-	uint8_t batch = sh->cmng.batch;
 	uint16_t pool_index = sh->cmng.pool_index;
-	struct mlx5_pools_container *cont;
+	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
 	struct mlx5_flow_counter_pool *pool;
-	int cont_loop = MLX5_CCONT_TYPE_MAX;
 
 	if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
 		goto set_alarm;
-next_container:
-	cont = MLX5_CNT_CONTAINER(sh, batch);
-	rte_spinlock_lock(&cont->resize_sl);
-	if (!cont->pools) {
-		rte_spinlock_unlock(&cont->resize_sl);
-		/* Check if all the containers are empty. */
-		if (unlikely(--cont_loop == 0))
-			goto set_alarm;
-		batch ^= 0x1;
-		pool_index = 0;
-		goto next_container;
-	}
-	pool = cont->pools[pool_index];
-	rte_spinlock_unlock(&cont->resize_sl);
+	rte_spinlock_lock(&cmng->resize_sl);
+	pool = cmng->pools[pool_index];
+	rte_spinlock_unlock(&cmng->resize_sl);
 	if (pool->raw_hw)
 		/* There is a pool query in progress. */
 		goto set_alarm;
@@ -6678,14 +6643,6 @@ struct mlx5_meter_domains_infos *
 	if (!pool->raw_hw)
 		/* No free counter statistics raw memory. */
 		goto set_alarm;
-	dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read
-							      (&pool->a64_dcs);
-	if (dcs->id & (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1)) {
-		/* Pool without valid counter. */
-		pool->raw_hw = NULL;
-		goto next_pool;
-	}
-	offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL;
 	/*
 	 * Identify the counters released between query trigger and query
 	 * handle more efficiently. The counter released in this gap period
@@ -6693,11 +6650,12 @@ struct mlx5_meter_domains_infos *
 	 * will not be taken into account.
 	 */
 	pool->query_gen++;
-	ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL -
-					       offset, NULL, NULL,
+	ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0,
+					       MLX5_COUNTERS_PER_POOL,
+					       NULL, NULL,
 					       pool->raw_hw->mem_mng->dm->id,
 					       (void *)(uintptr_t)
-					       (pool->raw_hw->data + offset),
+					       pool->raw_hw->data,
 					       sh->devx_comp,
 					       (uint64_t)(uintptr_t)pool);
 	if (ret) {
@@ -6706,17 +6664,12 @@ struct mlx5_meter_domains_infos *
 		pool->raw_hw = NULL;
 		goto set_alarm;
 	}
-	pool->raw_hw->min_dcs_id = dcs->id;
 	LIST_REMOVE(pool->raw_hw, next);
 	sh->cmng.pending_queries++;
-next_pool:
 	pool_index++;
-	if (pool_index >= rte_atomic16_read(&cont->n_valid)) {
-		batch ^= 0x1;
+	if (pool_index >= rte_atomic16_read(&cmng->n_valid))
 		pool_index = 0;
-	}
 set_alarm:
-	sh->cmng.batch = batch;
 	sh->cmng.pool_index = pool_index;
 	mlx5_set_query_alarm(sh);
 }
@@ -6809,8 +6762,7 @@ struct mlx5_meter_domains_infos *
 		(struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
 	struct mlx5_counter_stats_raw *raw_to_free;
 	uint8_t query_gen = pool->query_gen ^ 1;
-	struct mlx5_pools_container *cont =
-		MLX5_CNT_CONTAINER(sh, !IS_EXT_POOL(pool));
+	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
 	enum mlx5_counter_type cnt_type =
 		IS_AGE_POOL(pool) ? MLX5_COUNTER_TYPE_AGE :
 				    MLX5_COUNTER_TYPE_ORIGIN;
@@ -6827,10 +6779,10 @@ struct mlx5_meter_domains_infos *
 		/* Be sure the new raw counters data is updated in memory. */
 		rte_io_wmb();
 		if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
-			rte_spinlock_lock(&cont->csl);
-			TAILQ_CONCAT(&cont->counters[cnt_type],
+			rte_spinlock_lock(&cmng->csl[cnt_type]);
+			TAILQ_CONCAT(&cmng->counters[cnt_type],
 				     &pool->counters[query_gen], next);
-			rte_spinlock_unlock(&cont->csl);
+			rte_spinlock_unlock(&cmng->csl[cnt_type]);
 		}
 	}
 	LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index ec6aa19..b4be476 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -1144,4 +1144,5 @@ int mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev,
 				    const struct rte_flow_attr *attr);
 int mlx5_flow_meter_flush(struct rte_eth_dev *dev,
 			  struct rte_mtr_error *error);
+int mlx5_flow_dv_discover_counter_offset_support(struct rte_eth_dev *dev);
 #endif /* RTE_PMD_MLX5_FLOW_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index b16db1d..bd29140 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -4603,19 +4603,13 @@ struct field_modify_info modify_tcp[] = {
 			   struct mlx5_flow_counter_pool **ppool)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_pools_container *cont;
+	struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
 	struct mlx5_flow_counter_pool *pool;
-	uint32_t batch = 0;
 
 	/* Decrease to original index and clear shared bit. */
 	idx = (idx - 1) & (MLX5_CNT_SHARED_OFFSET - 1);
-	if (IS_BATCH_CNT(idx)) {
-		idx -= MLX5_CNT_BATCH_OFFSET;
-		batch = 1;
-	}
-	cont = MLX5_CNT_CONTAINER(priv->sh, batch);
-	MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < cont->n);
-	pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL];
+	MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < cmng->n);
+	pool = cmng->pools[idx / MLX5_COUNTERS_PER_POOL];
 	MLX5_ASSERT(pool);
 	if (ppool)
 		*ppool = pool;
@@ -4647,8 +4641,8 @@ struct field_modify_info modify_tcp[] = {
 /**
  * Get a pool by devx counter ID.
  *
- * @param[in] cont
- *   Pointer to the counter container.
+ * @param[in] cmng
+ *   Pointer to the counter management.
  * @param[in] id
  *   The counter devx ID.
  *
@@ -4656,25 +4650,25 @@ struct field_modify_info modify_tcp[] = {
  *   The counter pool pointer if exists, NULL otherwise,
  */
 static struct mlx5_flow_counter_pool *
-flow_dv_find_pool_by_id(struct mlx5_pools_container *cont, int id)
+flow_dv_find_pool_by_id(struct mlx5_flow_counter_mng *cmng, int id)
 {
 	uint32_t i;
 
 	/* Check last used pool. */
-	if (cont->last_pool_idx != POOL_IDX_INVALID &&
-	    flow_dv_is_counter_in_pool(cont->pools[cont->last_pool_idx], id))
-		return cont->pools[cont->last_pool_idx];
+	if (cmng->last_pool_idx != POOL_IDX_INVALID &&
+	    flow_dv_is_counter_in_pool(cmng->pools[cmng->last_pool_idx], id))
+		return cmng->pools[cmng->last_pool_idx];
 	/* ID out of range means no suitable pool in the container. */
-	if (id > cont->max_id || id < cont->min_id)
+	if (id > cmng->max_id || id < cmng->min_id)
 		return NULL;
 	/*
 	 * Find the pool from the end of the container, since mostly counter
 	 * ID is sequence increasing, and the last pool should be the needed
 	 * one.
 	 */
-	i = rte_atomic16_read(&cont->n_valid);
+	i = rte_atomic16_read(&cmng->n_valid);
 	while (i--) {
-		struct mlx5_flow_counter_pool *pool = cont->pools[i];
+		struct mlx5_flow_counter_pool *pool = cmng->pools[i];
 
 		if (flow_dv_is_counter_in_pool(pool, id))
 			return pool;
@@ -4764,20 +4758,18 @@ struct field_modify_info modify_tcp[] = {
  *
  * @param[in] dev
  *   Pointer to the Ethernet device structure.
- * @param[in] batch
- *   Whether the pool is for counter that was allocated by batch command.
  *
  * @return
  *   0 on success, otherwise negative errno value and rte_errno is set.
  */
 static int
-flow_dv_container_resize(struct rte_eth_dev *dev, uint32_t batch)
+flow_dv_container_resize(struct rte_eth_dev *dev)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch);
+	struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
 	struct mlx5_counter_stats_mem_mng *mem_mng = NULL;
-	void *old_pools = cont->pools;
-	uint32_t resize = cont->n + MLX5_CNT_CONTAINER_RESIZE;
+	void *old_pools = cmng->pools;
+	uint32_t resize = cmng->n + MLX5_CNT_CONTAINER_RESIZE;
 	uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) * resize;
 	void *pools = mlx5_malloc(MLX5_MEM_ZERO, mem_size, 0, SOCKET_ID_ANY);
 
@@ -4786,7 +4778,7 @@ struct field_modify_info modify_tcp[] = {
 		return -ENOMEM;
 	}
 	if (old_pools)
-		memcpy(pools, old_pools, cont->n *
+		memcpy(pools, old_pools, cmng->n *
 				       sizeof(struct mlx5_flow_counter_pool *));
 	/*
 	 * Fallback mode query the counter directly, no background query
@@ -4807,11 +4799,11 @@ struct field_modify_info modify_tcp[] = {
 					 MLX5_CNT_CONTAINER_RESIZE +
 					 i, next);
 	}
-	rte_spinlock_lock(&cont->resize_sl);
-	cont->n = resize;
-	cont->mem_mng = mem_mng;
-	cont->pools = pools;
-	rte_spinlock_unlock(&cont->resize_sl);
+	rte_spinlock_lock(&cmng->resize_sl);
+	cmng->n = resize;
+	cmng->mem_mng = mem_mng;
+	cmng->pools = pools;
+	rte_spinlock_unlock(&cmng->resize_sl);
 	if (old_pools)
 		mlx5_free(old_pools);
 	return 0;
@@ -4844,27 +4836,15 @@ struct field_modify_info modify_tcp[] = {
 
 	cnt = flow_dv_counter_get_by_idx(dev, counter, &pool);
 	MLX5_ASSERT(pool);
-	if (!IS_BATCH_CNT(counter)) {
+	if (priv->counter_fallback) {
 		cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt);
-		if (priv->counter_fallback)
-			return mlx5_devx_cmd_flow_counter_query(cnt_ext->dcs, 0,
+		return mlx5_devx_cmd_flow_counter_query(cnt_ext->dcs, 0,
 					0, pkts, bytes, 0, NULL, NULL, 0);
 	}
-
 	rte_spinlock_lock(&pool->sl);
-	/*
-	 * The single counters allocation may allocate smaller ID than the
-	 * current allocated in parallel to the host reading.
-	 * In this case the new counter values must be reported as 0.
-	 */
-	if (unlikely(cnt_ext && cnt_ext->dcs->id < pool->raw->min_dcs_id)) {
-		*pkts = 0;
-		*bytes = 0;
-	} else {
-		offset = MLX5_CNT_ARRAY_IDX(pool, cnt);
-		*pkts = rte_be_to_cpu_64(pool->raw->data[offset].hits);
-		*bytes = rte_be_to_cpu_64(pool->raw->data[offset].bytes);
-	}
+	offset = MLX5_CNT_ARRAY_IDX(pool, cnt);
+	*pkts = rte_be_to_cpu_64(pool->raw->data[offset].hits);
+	*bytes = rte_be_to_cpu_64(pool->raw->data[offset].bytes);
 	rte_spinlock_unlock(&pool->sl);
 	return 0;
 }
@@ -4876,8 +4856,6 @@ struct field_modify_info modify_tcp[] = {
  *   Pointer to the Ethernet device structure.
  * @param[out] dcs
  *   The devX counter handle.
- * @param[in] batch
- *   Whether the pool is for counter that was allocated by batch command.
  * @param[in] age
  *   Whether the pool is for counter that was allocated for aging.
  * @param[in/out] cont_cur
@@ -4888,124 +4866,64 @@ struct field_modify_info modify_tcp[] = {
  */
 static struct mlx5_flow_counter_pool *
 flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
-		    uint32_t batch, uint32_t age)
+		    uint32_t age)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_flow_counter_pool *pool;
-	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch);
-	int16_t n_valid = rte_atomic16_read(&cont->n_valid);
+	struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
+	int16_t n_valid = rte_atomic16_read(&cmng->n_valid);
+	uint32_t fallback = priv->counter_fallback;
 	uint32_t size = sizeof(*pool);
 
-	if (cont->n == n_valid && flow_dv_container_resize(dev, batch))
+	if (cmng->n == n_valid && flow_dv_container_resize(dev))
 		return NULL;
 	size += MLX5_COUNTERS_PER_POOL * CNT_SIZE;
-	size += (batch ? 0 : MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE);
+	size += (!fallback ? 0 : MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE);
 	size += (!age ? 0 : MLX5_COUNTERS_PER_POOL * AGE_SIZE);
 	pool = mlx5_malloc(MLX5_MEM_ZERO, size, 0, SOCKET_ID_ANY);
 	if (!pool) {
 		rte_errno = ENOMEM;
 		return NULL;
 	}
-	pool->min_dcs = dcs;
-	if (!priv->counter_fallback)
-		pool->raw = cont->mem_mng->raws + n_valid %
+	if (!fallback) {
+		pool->min_dcs = dcs;
+		pool->raw = cmng->mem_mng->raws + n_valid %
 						      MLX5_CNT_CONTAINER_RESIZE;
+	}
 	pool->raw_hw = NULL;
 	pool->type = 0;
-	pool->type |= (batch ? 0 :  CNT_POOL_TYPE_EXT);
+	pool->type |= (!fallback ? 0 :  CNT_POOL_TYPE_EXT);
 	pool->type |= (!age ? 0 :  CNT_POOL_TYPE_AGE);
 	pool->query_gen = 0;
 	rte_spinlock_init(&pool->sl);
 	TAILQ_INIT(&pool->counters[0]);
 	TAILQ_INIT(&pool->counters[1]);
-	TAILQ_INSERT_HEAD(&cont->pool_list, pool, next);
+	TAILQ_INSERT_HEAD(&cmng->pool_list, pool, next);
 	pool->index = n_valid;
 	pool->time_of_last_age_check = MLX5_CURR_TIME_SEC;
-	cont->pools[n_valid] = pool;
-	if (!batch) {
+	cmng->pools[n_valid] = pool;
+	if (fallback) {
 		int base = RTE_ALIGN_FLOOR(dcs->id, MLX5_COUNTERS_PER_POOL);
 
-		if (base < cont->min_id)
-			cont->min_id = base;
-		if (base > cont->max_id)
-			cont->max_id = base + MLX5_COUNTERS_PER_POOL - 1;
-		cont->last_pool_idx = pool->index;
+		if (base < cmng->min_id)
+			cmng->min_id = base;
+		if (base > cmng->max_id)
+			cmng->max_id = base + MLX5_COUNTERS_PER_POOL - 1;
+		cmng->last_pool_idx = pool->index;
 	}
 	/* Pool initialization must be updated before host thread access. */
 	rte_io_wmb();
-	rte_atomic16_add(&cont->n_valid, 1);
+	rte_atomic16_add(&cmng->n_valid, 1);
 	return pool;
 }
 
 /**
- * Restore skipped counters in the pool.
- *
- * As counter pool query requires the first counter dcs
- * ID start with 4 alinged, if the pool counters with
- * min_dcs ID are not aligned with 4, the counters will
- * be skipped.
- * Once other min_dcs ID less than these skipped counter
- * dcs ID appears, the skipped counters will be safe to
- * use.
- * Should be called when min_dcs is updated.
- *
- * @param[in] pool
- *   Current counter pool.
- * @param[in] last_min_dcs
- *   Last min_dcs.
- */
-static void
-flow_dv_counter_restore(struct mlx5_flow_counter_pool *pool,
-			struct mlx5_devx_obj *last_min_dcs)
-{
-	struct mlx5_flow_counter_ext *cnt_ext;
-	uint32_t offset, new_offset;
-	uint32_t skip_cnt = 0;
-	uint32_t i;
-
-	if (!pool->skip_cnt)
-		return;
-	/*
-	 * If last min_dcs is not valid. The skipped counter may even after
-	 * last min_dcs, set the offset to the whole pool.
-	 */
-	if (last_min_dcs->id & (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1))
-		offset = MLX5_COUNTERS_PER_POOL;
-	else
-		offset = last_min_dcs->id % MLX5_COUNTERS_PER_POOL;
-	new_offset = pool->min_dcs->id % MLX5_COUNTERS_PER_POOL;
-	/*
-	 * Check the counters from 1 to the last_min_dcs range. Counters
-	 * before new min_dcs indicates pool still has skipped counters.
-	 * Counters be skipped after new min_dcs will be ready to use.
-	 * Offset 0 counter must be empty or min_dcs, start from 1.
-	 */
-	for (i = 1; i < offset; i++) {
-		cnt_ext = MLX5_GET_POOL_CNT_EXT(pool, i);
-		if (cnt_ext->skipped) {
-			if (i > new_offset) {
-				cnt_ext->skipped = 0;
-				TAILQ_INSERT_TAIL
-					(&pool->counters[pool->query_gen],
-					 MLX5_POOL_GET_CNT(pool, i), next);
-			} else {
-				skip_cnt++;
-			}
-		}
-	}
-	if (!skip_cnt)
-		pool->skip_cnt = 0;
-}
-
-/**
  * Prepare a new counter and/or a new counter pool.
  *
  * @param[in] dev
  *   Pointer to the Ethernet device structure.
  * @param[out] cnt_free
  *   Where to put the pointer of a new counter.
- * @param[in] batch
- *   Whether the pool is for counter that was allocated by batch command.
  * @param[in] age
  *   Whether the pool is for counter that was allocated for aging.
  *
@@ -5016,98 +4934,45 @@ struct field_modify_info modify_tcp[] = {
 static struct mlx5_flow_counter_pool *
 flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
 			     struct mlx5_flow_counter **cnt_free,
-			     uint32_t batch, uint32_t age)
+			     uint32_t age)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_pools_container *cont;
+	struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
 	struct mlx5_flow_counter_pool *pool;
 	struct mlx5_counters tmp_tq;
-	struct mlx5_devx_obj *last_min_dcs;
 	struct mlx5_devx_obj *dcs = NULL;
 	struct mlx5_flow_counter *cnt;
 	enum mlx5_counter_type cnt_type =
 			age ? MLX5_COUNTER_TYPE_AGE : MLX5_COUNTER_TYPE_ORIGIN;
-	uint32_t add2other;
+	uint32_t fallback = priv->counter_fallback;
 	uint32_t i;
 
-	cont = MLX5_CNT_CONTAINER(priv->sh, batch);
-	if (!batch) {
-retry:
-		add2other = 0;
+	if (fallback) {
 		/* bulk_bitmap must be 0 for single counter allocation. */
 		dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0);
 		if (!dcs)
 			return NULL;
-		pool = flow_dv_find_pool_by_id(cont, dcs->id);
-		/*
-		 * If pool eixsts but with other type, counter will be added
-		 * to the other pool, need to reallocate new counter in the
-		 * ragne with same type later.
-		 */
+		pool = flow_dv_find_pool_by_id(cmng, dcs->id);
 		if (!pool) {
-			pool = flow_dv_pool_create(dev, dcs, batch,
-						   age);
+			pool = flow_dv_pool_create(dev, dcs, age);
 			if (!pool) {
 				mlx5_devx_cmd_destroy(dcs);
 				return NULL;
 			}
-		} else if ((!!IS_AGE_POOL(pool)) != age) {
-			add2other = 1;
-		}
-		if ((dcs->id < pool->min_dcs->id ||
-		    pool->min_dcs->id &
-		    (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1)) &&
-		    !(dcs->id & (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1))) {
-			/*
-			 * Update the pool min_dcs only if current dcs is
-			 * valid and exist min_dcs is not valid or greater
-			 * than new dcs.
-			 */
-			last_min_dcs = pool->min_dcs;
-			rte_atomic64_set(&pool->a64_dcs,
-					 (int64_t)(uintptr_t)dcs);
-			/*
-			 * Restore any skipped counters if the new min_dcs
-			 * ID is smaller or min_dcs is not valid.
-			 */
-			if (dcs->id < last_min_dcs->id ||
-			    last_min_dcs->id &
-			    (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1))
-				flow_dv_counter_restore(pool, last_min_dcs);
 		}
 		i = dcs->id % MLX5_COUNTERS_PER_POOL;
 		cnt = MLX5_POOL_GET_CNT(pool, i);
 		cnt->pool = pool;
 		MLX5_GET_POOL_CNT_EXT(pool, i)->dcs = dcs;
-		/*
-		 * If min_dcs is not valid, it means the new allocated dcs
-		 * also fail to become the valid min_dcs, just skip it.
-		 * Or if min_dcs is valid, and new dcs ID is smaller than
-		 * min_dcs, but not become the min_dcs, also skip it.
-		 */
-		if (pool->min_dcs->id &
-		    (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1) ||
-		    dcs->id < pool->min_dcs->id) {
-			MLX5_GET_POOL_CNT_EXT(pool, i)->skipped = 1;
-			pool->skip_cnt = 1;
-			goto retry;
-		}
-		if (add2other) {
-			TAILQ_INSERT_TAIL(&pool->counters[pool->query_gen],
-					  cnt, next);
-			goto retry;
-		}
 		*cnt_free = cnt;
 		return pool;
 	}
-	/* bulk_bitmap is in 128 counters units. */
-	if (priv->config.hca_attr.flow_counter_bulk_alloc_bitmap & 0x4)
-		dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0x4);
+	dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0x4);
 	if (!dcs) {
 		rte_errno = ENODATA;
 		return NULL;
 	}
-	pool = flow_dv_pool_create(dev, dcs, batch, age);
+	pool = flow_dv_pool_create(dev, dcs, age);
 	if (!pool) {
 		mlx5_devx_cmd_destroy(dcs);
 		return NULL;
@@ -5118,9 +4983,9 @@ struct field_modify_info modify_tcp[] = {
 		cnt->pool = pool;
 		TAILQ_INSERT_HEAD(&tmp_tq, cnt, next);
 	}
-	rte_spinlock_lock(&cont->csl);
-	TAILQ_CONCAT(&cont->counters[cnt_type], &tmp_tq, next);
-	rte_spinlock_unlock(&cont->csl);
+	rte_spinlock_lock(&cmng->csl[cnt_type]);
+	TAILQ_CONCAT(&cmng->counters[cnt_type], &tmp_tq, next);
+	rte_spinlock_unlock(&cmng->csl[cnt_type]);
 	*cnt_free = MLX5_POOL_GET_CNT(pool, 0);
 	(*cnt_free)->pool = pool;
 	return pool;
@@ -5157,8 +5022,6 @@ struct field_modify_info modify_tcp[] = {
  *   Indicate if this counter is shared with other flows.
  * @param[in] id
  *   Counter identifier.
- * @param[in] group
- *   Counter flow group.
  * @param[in] age
  *   Whether the counter was allocated for aging.
  *
@@ -5167,22 +5030,14 @@ struct field_modify_info modify_tcp[] = {
  */
 static uint32_t
 flow_dv_counter_alloc(struct rte_eth_dev *dev, uint32_t shared, uint32_t id,
-		      uint16_t group, uint32_t age)
+		      uint32_t age)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_flow_counter_pool *pool = NULL;
 	struct mlx5_flow_counter *cnt_free = NULL;
 	struct mlx5_flow_counter_ext *cnt_ext = NULL;
-	/*
-	 * Currently group 0 flow counter cannot be assigned to a flow if it is
-	 * not the first one in the batch counter allocation, so it is better
-	 * to allocate counters one by one for these flows in a separate
-	 * container.
-	 * A counter can be shared between different groups so need to take
-	 * shared counters from the single container.
-	 */
-	uint32_t batch = (group && !shared && !priv->counter_fallback) ? 1 : 0;
-	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch);
+	uint32_t fallback = priv->counter_fallback;
+	struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
 	enum mlx5_counter_type cnt_type =
 			age ? MLX5_COUNTER_TYPE_AGE : MLX5_COUNTER_TYPE_ORIGIN;
 	uint32_t cnt_idx;
@@ -5205,16 +5060,15 @@ struct field_modify_info modify_tcp[] = {
 		}
 	}
 	/* Get free counters from container. */
-	rte_spinlock_lock(&cont->csl);
-	cnt_free = TAILQ_FIRST(&cont->counters[cnt_type]);
+	rte_spinlock_lock(&cmng->csl[cnt_type]);
+	cnt_free = TAILQ_FIRST(&cmng->counters[cnt_type]);
 	if (cnt_free)
-		TAILQ_REMOVE(&cont->counters[cnt_type], cnt_free, next);
-	rte_spinlock_unlock(&cont->csl);
-	if (!cnt_free && !flow_dv_counter_pool_prepare(dev, &cnt_free,
-						       batch, age))
+		TAILQ_REMOVE(&cmng->counters[cnt_type], cnt_free, next);
+	rte_spinlock_unlock(&cmng->csl[cnt_type]);
+	if (!cnt_free && !flow_dv_counter_pool_prepare(dev, &cnt_free, age))
 		goto err;
 	pool = cnt_free->pool;
-	if (!batch)
+	if (fallback)
 		cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt_free);
 	/* Create a DV counter action only in the first time usage. */
 	if (!cnt_free->action) {
@@ -5222,7 +5076,7 @@ struct field_modify_info modify_tcp[] = {
 		struct mlx5_devx_obj *dcs;
 		int ret;
 
-		if (batch) {
+		if (!fallback) {
 			offset = MLX5_CNT_ARRAY_IDX(pool, cnt_free);
 			dcs = pool->min_dcs;
 		} else {
@@ -5238,7 +5092,6 @@ struct field_modify_info modify_tcp[] = {
 	}
 	cnt_idx = MLX5_MAKE_CNT_IDX(pool->index,
 				MLX5_CNT_ARRAY_IDX(pool, cnt_free));
-	cnt_idx += batch * MLX5_CNT_BATCH_OFFSET;
 	/* Update the counter reset values. */
 	if (_flow_dv_query_count(dev, cnt_idx, &cnt_free->hits,
 				 &cnt_free->bytes))
@@ -5253,16 +5106,16 @@ struct field_modify_info modify_tcp[] = {
 		cnt_free->shared_info.id = id;
 		cnt_idx |= MLX5_CNT_SHARED_OFFSET;
 	}
-	if (!priv->counter_fallback && !priv->sh->cmng.query_thread_on)
+	if (!fallback && !priv->sh->cmng.query_thread_on)
 		/* Start the asynchronous batch query by the host thread. */
 		mlx5_set_query_alarm(priv->sh);
 	return cnt_idx;
 err:
 	if (cnt_free) {
 		cnt_free->pool = pool;
-		rte_spinlock_lock(&cont->csl);
-		TAILQ_INSERT_TAIL(&cont->counters[cnt_type], cnt_free, next);
-		rte_spinlock_unlock(&cont->csl);
+		rte_spinlock_lock(&cmng->csl[cnt_type]);
+		TAILQ_INSERT_TAIL(&cmng->counters[cnt_type], cnt_free, next);
+		rte_spinlock_unlock(&cmng->csl[cnt_type]);
 	}
 	return 0;
 }
@@ -5346,7 +5199,6 @@ struct field_modify_info modify_tcp[] = {
 		return;
 	cnt = flow_dv_counter_get_by_idx(dev, counter, &pool);
 	MLX5_ASSERT(pool);
-
 	if (IS_SHARED_CNT(counter)) {
 		if (--cnt->shared_info.ref_cnt)
 			return;
@@ -5371,8 +5223,7 @@ struct field_modify_info modify_tcp[] = {
 	} else {
 		cnt_type = IS_AGE_POOL(pool) ? MLX5_COUNTER_TYPE_AGE :
 					       MLX5_COUNTER_TYPE_ORIGIN;
-		TAILQ_INSERT_TAIL(&((MLX5_CNT_CONTAINER
-				  (priv->sh, 0))->counters[cnt_type]),
+		TAILQ_INSERT_TAIL(&priv->sh->cmng.counters[cnt_type],
 				  cnt, next);
 	}
 }
@@ -8507,8 +8358,7 @@ struct field_modify_info modify_tcp[] = {
 
 	counter = flow_dv_counter_alloc(dev,
 				count ? count->shared : 0,
-				count ? count->id : 0,
-				dev_flow->dv.group, !!age);
+				count ? count->id : 0, !!age);
 	if (!counter || age == NULL)
 		return counter;
 	age_param  = flow_dv_counter_idx_get_age(dev, counter);
@@ -11419,6 +11269,103 @@ struct field_modify_info modify_tcp[] = {
 }
 
 /**
+ * Validate the batch counter support in root table.
+ *
+ * Create a simple flow with invalid counter and drop action on root table to
+ * validate if batch counter with offset on root table is supported or not.
+ *
+ * @param[in] dev
+ *   Pointer to rte_eth_dev structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_dv_discover_counter_offset_support(struct rte_eth_dev *dev)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
+	struct mlx5_flow_dv_match_params mask = {
+		.size = sizeof(mask.buf),
+	};
+	struct mlx5_flow_dv_match_params value = {
+		.size = sizeof(value.buf),
+	};
+	struct mlx5dv_flow_matcher_attr dv_attr = {
+		.type = IBV_FLOW_ATTR_NORMAL,
+		.priority = 0,
+		.match_criteria_enable = 0,
+		.match_mask = (void *)&mask,
+	};
+	void *actions[2] = { 0 };
+	struct mlx5_flow_tbl_resource *tbl = NULL, *dest_tbl = NULL;
+	struct mlx5_devx_obj *dcs = NULL;
+	void *matcher = NULL;
+	void *flow = NULL;
+	int i, ret = -1;
+
+	tbl = flow_dv_tbl_resource_get(dev, 0, 0, 0, NULL);
+	if (!tbl)
+		goto err;
+	dest_tbl = flow_dv_tbl_resource_get(dev, 1, 0, 0, NULL);
+	if (!dest_tbl)
+		goto err;
+	dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0x4);
+	if (!dcs)
+		goto err;
+	ret = mlx5_flow_os_create_flow_action_count(dcs->obj, UINT16_MAX,
+						    &actions[0]);
+	if (ret)
+		goto err;
+	ret = mlx5_flow_os_create_flow_action_dest_flow_tbl
+				(dest_tbl->obj, &actions[1]);
+	if (ret)
+		goto err;
+	dv_attr.match_criteria_enable = flow_dv_matcher_enable(mask.buf);
+	ret = mlx5_flow_os_create_flow_matcher(sh->ctx, &dv_attr, tbl->obj,
+					       &matcher);
+	if (ret)
+		goto err;
+	ret = mlx5_flow_os_create_flow(matcher, (void *)&value, 2,
+				       actions, &flow);
+err:
+	/*
+	 * If batch counter with offset is not supported, the driver will not
+	 * validate the invalid offset value, flow create should success.
+	 * In this case, it means batch counter is not supported in root table.
+	 *
+	 * Otherwise, if flow create is failed, counter offset is supported.
+	 */
+	if (flow) {
+		DRV_LOG(INFO, "Batch counter is not supported in root "
+			      "table. Switch to fallback mode.");
+		rte_errno = ENOTSUP;
+		ret = -rte_errno;
+		claim_zero(mlx5_flow_os_destroy_flow(flow));
+	} else {
+		/* Check matcher to make sure validate fail at flow create. */
+		if (!matcher || (matcher && errno != EINVAL))
+			DRV_LOG(ERR, "Unexpected error in counter offset "
+				     "support detection");
+		ret = 0;
+	}
+	for (i = 0; i < 2; i++) {
+		if (actions[i])
+			claim_zero(mlx5_flow_os_destroy_flow_action
+				   (actions[i]));
+	}
+	if (matcher)
+		claim_zero(mlx5_flow_os_destroy_flow_matcher(matcher));
+	if (tbl)
+		flow_dv_tbl_resource_release(dev, tbl);
+	if (dest_tbl)
+		flow_dv_tbl_resource_release(dev, dest_tbl);
+	if (dcs)
+		claim_zero(mlx5_devx_cmd_destroy(dcs));
+	return ret;
+}
+
+/**
  * Query a devx counter.
  *
  * @param[in] dev
@@ -11580,7 +11527,7 @@ struct field_modify_info modify_tcp[] = {
 	uint32_t cnt;
 
 	flow_dv_shared_lock(dev);
-	cnt = flow_dv_counter_alloc(dev, 0, 0, 1, 0);
+	cnt = flow_dv_counter_alloc(dev, 0, 0, 0);
 	flow_dv_shared_unlock(dev);
 	return cnt;
 }
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index bda55c2..bd2a734 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -159,11 +159,11 @@
 			      struct mlx5_flow_counter_pool **ppool)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0);
+	struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
 	struct mlx5_flow_counter_pool *pool;
 
 	idx = (idx - 1) & (MLX5_CNT_SHARED_OFFSET - 1);
-	pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL];
+	pool = cmng->pools[idx / MLX5_COUNTERS_PER_POOL];
 	MLX5_ASSERT(pool);
 	if (ppool)
 		*ppool = pool;
@@ -254,12 +254,12 @@
 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0);
+	struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
 	struct mlx5_flow_counter_pool *pool = NULL;
 	struct mlx5_flow_counter_ext *cnt_ext = NULL;
 	struct mlx5_flow_counter *cnt = NULL;
 	union mlx5_l3t_data data;
-	uint32_t n_valid = rte_atomic16_read(&cont->n_valid);
+	uint32_t n_valid = rte_atomic16_read(&cmng->n_valid);
 	uint32_t pool_idx, cnt_idx;
 	uint32_t i;
 	int ret;
@@ -275,7 +275,7 @@
 		return data.dword;
 	}
 	for (pool_idx = 0; pool_idx < n_valid; ++pool_idx) {
-		pool = cont->pools[pool_idx];
+		pool = cmng->pools[pool_idx];
 		if (!pool)
 			continue;
 		cnt = TAILQ_FIRST(&pool->counters[0]);
@@ -286,7 +286,7 @@
 		struct mlx5_flow_counter_pool **pools;
 		uint32_t size;
 
-		if (n_valid == cont->n) {
+		if (n_valid == cmng->n) {
 			/* Resize the container pool array. */
 			size = sizeof(struct mlx5_flow_counter_pool *) *
 				     (n_valid + MLX5_CNT_CONTAINER_RESIZE);
@@ -295,13 +295,13 @@
 			if (!pools)
 				return 0;
 			if (n_valid) {
-				memcpy(pools, cont->pools,
+				memcpy(pools, cmng->pools,
 				       sizeof(struct mlx5_flow_counter_pool *) *
 				       n_valid);
-				mlx5_free(cont->pools);
+				mlx5_free(cmng->pools);
 			}
-			cont->pools = pools;
-			cont->n += MLX5_CNT_CONTAINER_RESIZE;
+			cmng->pools = pools;
+			cmng->n += MLX5_CNT_CONTAINER_RESIZE;
 		}
 		/* Allocate memory for new pool*/
 		size = sizeof(*pool) + (sizeof(*cnt_ext) + sizeof(*cnt)) *
@@ -315,10 +315,10 @@
 			TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next);
 		}
 		cnt = MLX5_POOL_GET_CNT(pool, 0);
-		cont->pools[n_valid] = pool;
+		cmng->pools[n_valid] = pool;
 		pool_idx = n_valid;
-		rte_atomic16_add(&cont->n_valid, 1);
-		TAILQ_INSERT_HEAD(&cont->pool_list, pool, next);
+		rte_atomic16_add(&cmng->n_valid, 1);
+		TAILQ_INSERT_HEAD(&cmng->pool_list, pool, next);
 	}
 	i = MLX5_CNT_ARRAY_IDX(pool, cnt);
 	cnt_idx = MLX5_MAKE_CNT_IDX(pool_idx, i);
-- 
1.8.3.1


  parent reply	other threads:[~2020-10-20  3:04 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-06 11:38 [dpdk-dev] [PATCH 0/6] net/mlx5: make counter thread safe Suanming Mou
2020-10-06 11:38 ` [dpdk-dev] [PATCH 1/6] net/mlx5: locate aging pools in the general container Suanming Mou
2020-10-06 11:38 ` [dpdk-dev] [PATCH 2/6] net/mlx5: optimize shared counter memory Suanming Mou
2020-10-06 11:38 ` [dpdk-dev] [PATCH 3/6] net/mlx5: remove single counter container Suanming Mou
2020-10-06 11:38 ` [dpdk-dev] [PATCH 4/6] net/mlx5: synchronize flow counter pool creation Suanming Mou
2020-10-06 11:38 ` [dpdk-dev] [PATCH 5/6] net/mlx5: make three level table thread safe Suanming Mou
2020-10-06 11:38 ` [dpdk-dev] [PATCH 6/6] net/mlx5: make shared counters " Suanming Mou
2020-10-20  3:02 ` [dpdk-dev] [PATCH v2 0/8] net/mlx5: make counter " Suanming Mou
2020-10-20  3:02   ` [dpdk-dev] [PATCH v2 1/8] net/mlx5: locate aging pools in the general container Suanming Mou
2020-10-20  3:02   ` [dpdk-dev] [PATCH v2 2/8] net/mlx5: optimize shared counter memory Suanming Mou
2020-10-20  3:02   ` Suanming Mou [this message]
2020-10-20  3:02   ` [dpdk-dev] [PATCH v2 4/8] net/mlx5: synchronize flow counter pool creation Suanming Mou
2020-10-20  3:02   ` [dpdk-dev] [PATCH v2 5/8] net/mlx5: make three level table thread safe Suanming Mou
2020-10-20  3:02   ` [dpdk-dev] [PATCH v2 6/8] net/mlx5: make shared counters " Suanming Mou
2020-10-20  3:02   ` [dpdk-dev] [PATCH v2 7/8] net/mlx5: rename flow counter macro Suanming Mou
2020-10-20  3:02   ` [dpdk-dev] [PATCH v2 8/8] net/mlx5: optimize counter extend memory Suanming Mou
2020-10-20 22:59   ` [dpdk-dev] [PATCH v2 0/8] net/mlx5: make counter thread safe Raslan Darawsheh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1603162949-150001-4-git-send-email-suanmingm@nvidia.com \
    --to=suanmingm@nvidia.com \
    --cc=dev@dpdk.org \
    --cc=matan@nvidia.com \
    --cc=shahafs@nvidia.com \
    --cc=viacheslavo@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

DPDK patches and discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ https://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git