DPDK patches and discussions
 help / color / mirror / Atom feed
* [dpdk-dev] [PATCH] net/mlx5: optimize the counter allocate loop
@ 2020-06-18  7:42 Suanming Mou
  2020-06-18  8:12 ` [dpdk-dev] [PATCH v2] net/mlx5: optimize free counter lookup Suanming Mou
  0 siblings, 1 reply; 3+ messages in thread
From: Suanming Mou @ 2020-06-18  7:42 UTC (permalink / raw)
  To: viacheslavo, matan; +Cc: rasland, dev

Currently, when allocate a new counter, it needs loop the whole
container pool list to get a free counter.

In the case with millions of counters allocated, and all the pools
are empty, allocate the new counter will still need to loop the
whole container pool list first, then allocate a new pool to get a
free counter. It wastes the cycles during the pool list traversal.

Add a global free counter list in the container helps to get the free
counters more efficiently.

Signed-off-by: Suanming Mou <suanmingm@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---

This patch should be integrated after https://patches.dpdk.org/cover/71716/

---
 drivers/net/mlx5/mlx5.c            |   2 +
 drivers/net/mlx5/mlx5.h            |  14 ++---
 drivers/net/mlx5/mlx5_flow.c       |  18 ++++---
 drivers/net/mlx5/mlx5_flow_dv.c    | 104 +++++++++++++++++--------------------
 drivers/net/mlx5/mlx5_flow_verbs.c |   8 +--
 5 files changed, 73 insertions(+), 73 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 670a59e..1014b4e 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -462,6 +462,8 @@ struct mlx5_flow_id_pool *
 		sh->cmng.ccont[i].last_pool_idx = POOL_IDX_INVALID;
 		TAILQ_INIT(&sh->cmng.ccont[i].pool_list);
 		rte_spinlock_init(&sh->cmng.ccont[i].resize_sl);
+		TAILQ_INIT(&sh->cmng.ccont[i].counters);
+		rte_spinlock_init(&sh->cmng.ccont[i].csl);
 	}
 }
 
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 3ddae17..d28e84d 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -353,13 +353,14 @@ struct flow_counter_stats {
 	uint64_t bytes;
 };
 
+struct mlx5_flow_counter_pool;
 /* Generic counters information. */
 struct mlx5_flow_counter {
 	TAILQ_ENTRY(mlx5_flow_counter) next;
 	/**< Pointer to the next flow counter structure. */
 	union {
 		uint64_t hits; /**< Reset value of hits packets. */
-		int64_t query_gen; /**< Generation of the last release. */
+		struct mlx5_flow_counter_pool *pool; /**< Counter pool. */
 	};
 	uint64_t bytes; /**< Reset value of bytes. */
 	void *action; /**< Pointer to the dv action. */
@@ -387,16 +388,15 @@ struct mlx5_flow_counter_ext {
 /* Generic counter pool structure - query is in pool resolution. */
 struct mlx5_flow_counter_pool {
 	TAILQ_ENTRY(mlx5_flow_counter_pool) next;
-	struct mlx5_counters counters; /* Free counter list. */
+	struct mlx5_counters counters[2]; /* Free counter list. */
 	union {
 		struct mlx5_devx_obj *min_dcs;
 		rte_atomic64_t a64_dcs;
 	};
 	/* The devx object of the minimum counter ID. */
-	rte_atomic64_t start_query_gen; /* Query start round. */
-	rte_atomic64_t end_query_gen; /* Query end round. */
-	uint32_t index; /* Pool index in container. */
-	uint8_t type; /* Memory type behind the counter array. */
+	uint32_t index:29; /* Pool index in container. */
+	uint32_t type:2; /* Memory type behind the counter array. */
+	volatile uint32_t query_gen:1; /* Query round. */
 	rte_spinlock_t sl; /* The pool lock. */
 	struct mlx5_counter_stats_raw *raw;
 	struct mlx5_counter_stats_raw *raw_hw; /* The raw on HW working. */
@@ -430,6 +430,8 @@ struct mlx5_pools_container {
 	int min_id; /* The minimum counter ID in the pools. */
 	int max_id; /* The maximum counter ID in the pools. */
 	rte_spinlock_t resize_sl; /* The resize lock. */
+	rte_spinlock_t csl; /* The counter free list lock. */
+	struct mlx5_counters counters; /* Free counter list. */
 	struct mlx5_counter_pools pool_list; /* Counter pool list. */
 	struct mlx5_flow_counter_pool **pools; /* Counter pool array. */
 	struct mlx5_counter_stats_mem_mng *mem_mng;
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index ac9af08..3a48b89 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -5888,7 +5888,7 @@ struct mlx5_meter_domains_infos *
 	 * should wait for a new round of query as the new arrived packets
 	 * will not be taken into account.
 	 */
-	rte_atomic64_add(&pool->start_query_gen, 1);
+	pool->query_gen++;
 	ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL -
 					       offset, NULL, NULL,
 					       pool->raw_hw->mem_mng->dm->id,
@@ -5897,7 +5897,6 @@ struct mlx5_meter_domains_infos *
 					       sh->devx_comp,
 					       (uint64_t)(uintptr_t)pool);
 	if (ret) {
-		rte_atomic64_sub(&pool->start_query_gen, 1);
 		DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
 			" %d", pool->min_dcs->id);
 		pool->raw_hw = NULL;
@@ -6002,9 +6001,12 @@ struct mlx5_meter_domains_infos *
 	struct mlx5_flow_counter_pool *pool =
 		(struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
 	struct mlx5_counter_stats_raw *raw_to_free;
+	uint8_t age = !!IS_AGE_POOL(pool);
+	uint8_t query_gen = pool->query_gen ^ 1;
+	struct mlx5_pools_container *cont =
+		MLX5_CNT_CONTAINER(sh, !IS_EXT_POOL(pool), age);
 
 	if (unlikely(status)) {
-		rte_atomic64_sub(&pool->start_query_gen, 1);
 		raw_to_free = pool->raw_hw;
 	} else {
 		raw_to_free = pool->raw;
@@ -6013,12 +6015,14 @@ struct mlx5_meter_domains_infos *
 		rte_spinlock_lock(&pool->sl);
 		pool->raw = pool->raw_hw;
 		rte_spinlock_unlock(&pool->sl);
-		MLX5_ASSERT(rte_atomic64_read(&pool->end_query_gen) + 1 ==
-			    rte_atomic64_read(&pool->start_query_gen));
-		rte_atomic64_set(&pool->end_query_gen,
-				 rte_atomic64_read(&pool->start_query_gen));
 		/* Be sure the new raw counters data is updated in memory. */
 		rte_cio_wmb();
+		if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
+			rte_spinlock_lock(&cont->csl);
+			TAILQ_CONCAT(&cont->counters,
+				     &pool->counters[query_gen], next);
+			rte_spinlock_unlock(&cont->csl);
+		}
 	}
 	LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
 	pool->raw_hw = NULL;
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 9fa8568..6318bd9 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -4340,23 +4340,10 @@ struct field_modify_info modify_tcp[] = {
 	pool->type = 0;
 	pool->type |= (batch ? 0 :  CNT_POOL_TYPE_EXT);
 	pool->type |= (!age ? 0 :  CNT_POOL_TYPE_AGE);
+	pool->query_gen = 0;
 	rte_spinlock_init(&pool->sl);
-	/*
-	 * The generation of the new allocated counters in this pool is 0, 2 in
-	 * the pool generation makes all the counters valid for allocation.
-	 * The start and end query generation protect the counters be released
-	 * between the query and update gap period will not be reallocated
-	 * without the last query finished and stats updated to the memory.
-	 */
-	rte_atomic64_set(&pool->start_query_gen, 0x2);
-	/*
-	 * There's no background query thread for fallback mode, set the
-	 * end_query_gen to the maximum value since no need to wait for
-	 * statistics update.
-	 */
-	rte_atomic64_set(&pool->end_query_gen, priv->counter_fallback ?
-			 INT64_MAX : 0x2);
-	TAILQ_INIT(&pool->counters);
+	TAILQ_INIT(&pool->counters[0]);
+	TAILQ_INIT(&pool->counters[1]);
 	TAILQ_INSERT_HEAD(&cont->pool_list, pool, next);
 	pool->index = n_valid;
 	cont->pools[n_valid] = pool;
@@ -4432,6 +4419,7 @@ struct field_modify_info modify_tcp[] = {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_pools_container *cont;
 	struct mlx5_flow_counter_pool *pool;
+	struct mlx5_counters tmp_tq;
 	struct mlx5_devx_obj *dcs = NULL;
 	struct mlx5_flow_counter *cnt;
 	uint32_t i;
@@ -4457,7 +4445,7 @@ struct field_modify_info modify_tcp[] = {
 						pool, batch, age);
 		i = dcs->id % MLX5_COUNTERS_PER_POOL;
 		cnt = MLX5_POOL_GET_CNT(pool, i);
-		TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
+		cnt->pool = pool;
 		MLX5_GET_POOL_CNT_EXT(pool, i)->dcs = dcs;
 		*cnt_free = cnt;
 		return pool;
@@ -4474,11 +4462,17 @@ struct field_modify_info modify_tcp[] = {
 		mlx5_devx_cmd_destroy(dcs);
 		return NULL;
 	}
-	for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
+	TAILQ_INIT(&tmp_tq);
+	for (i = 1; i < MLX5_COUNTERS_PER_POOL; ++i) {
 		cnt = MLX5_POOL_GET_CNT(pool, i);
-		TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
+		cnt->pool = pool;
+		TAILQ_INSERT_HEAD(&tmp_tq, cnt, next);
 	}
+	rte_spinlock_lock(&cont->csl);
+	TAILQ_CONCAT(&cont->counters, &tmp_tq, next);
+	rte_spinlock_unlock(&cont->csl);
 	*cnt_free = MLX5_POOL_GET_CNT(pool, 0);
+	(*cnt_free)->pool = pool;
 	return pool;
 }
 
@@ -4570,28 +4564,16 @@ struct field_modify_info modify_tcp[] = {
 			return cnt_idx;
 		}
 	}
-	/* Pools which has a free counters are in the start. */
-	TAILQ_FOREACH(pool, &cont->pool_list, next) {
-		/*
-		 * The free counter reset values must be updated between the
-		 * counter release to the counter allocation, so, at least one
-		 * query must be done in this time. ensure it by saving the
-		 * query generation in the release time.
-		 * The free list is sorted according to the generation - so if
-		 * the first one is not updated, all the others are not
-		 * updated too.
-		 */
-		cnt_free = TAILQ_FIRST(&pool->counters);
-		if (cnt_free && cnt_free->query_gen <
-		    rte_atomic64_read(&pool->end_query_gen))
-			break;
-		cnt_free = NULL;
-	}
-	if (!cnt_free) {
-		pool = flow_dv_counter_pool_prepare(dev, &cnt_free, batch, age);
-		if (!pool)
-			return 0;
-	}
+	/* Get free counters from container. */
+	rte_spinlock_lock(&cont->csl);
+	cnt_free = TAILQ_FIRST(&cont->counters);
+	if (cnt_free)
+		TAILQ_REMOVE(&cont->counters, cnt_free, next);
+	rte_spinlock_unlock(&cont->csl);
+	if (!cnt_free && !flow_dv_counter_pool_prepare(dev, &cnt_free,
+						       batch, age))
+		goto err;
+	pool = cnt_free->pool;
 	if (!batch)
 		cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt_free);
 	/* Create a DV counter action only in the first time usage. */
@@ -4610,7 +4592,7 @@ struct field_modify_info modify_tcp[] = {
 					(dcs->obj, offset);
 		if (!cnt_free->action) {
 			rte_errno = errno;
-			return 0;
+			goto err;
 		}
 	}
 	cnt_idx = MLX5_MAKE_CNT_IDX(pool->index,
@@ -4620,7 +4602,7 @@ struct field_modify_info modify_tcp[] = {
 	/* Update the counter reset values. */
 	if (_flow_dv_query_count(dev, cnt_idx, &cnt_free->hits,
 				 &cnt_free->bytes))
-		return 0;
+		goto err;
 	if (cnt_ext) {
 		cnt_ext->shared = shared;
 		cnt_ext->ref_cnt = 1;
@@ -4636,13 +4618,15 @@ struct field_modify_info modify_tcp[] = {
 	if (!priv->counter_fallback && !priv->sh->cmng.query_thread_on)
 		/* Start the asynchronous batch query by the host thread. */
 		mlx5_set_query_alarm(priv->sh);
-	TAILQ_REMOVE(&pool->counters, cnt_free, next);
-	if (TAILQ_EMPTY(&pool->counters)) {
-		/* Move the pool to the end of the container pool list. */
-		TAILQ_REMOVE(&cont->pool_list, pool, next);
-		TAILQ_INSERT_TAIL(&cont->pool_list, pool, next);
-	}
 	return cnt_idx;
+err:
+	if (cnt_free) {
+		cnt_free->pool = pool;
+		rte_spinlock_lock(&cont->csl);
+		TAILQ_INSERT_TAIL(&cont->counters, cnt_free, next);
+		rte_spinlock_unlock(&cont->csl);
+	}
+	return 0;
 }
 
 /**
@@ -4735,15 +4719,23 @@ struct field_modify_info modify_tcp[] = {
 	}
 	if (IS_AGE_POOL(pool))
 		flow_dv_counter_remove_from_age(dev, counter, cnt);
-	/* Put the counter in the end - the last updated one. */
-	TAILQ_INSERT_TAIL(&pool->counters, cnt, next);
+	cnt->pool = pool;
 	/*
-	 * Counters released between query trigger and handler need
-	 * to wait the next round of query. Since the packets arrive
-	 * in the gap period will not be taken into account to the
-	 * old counter.
+	 * Put the counter back to list to be updated in none fallback mode.
+	 * Currently, we are using two list alternately, while one is in query,
+	 * add the freed counter to the other list based on the pool query_gen
+	 * value. After query finishes, add counter the list to the global
+	 * container counter list. The list changes while query starts. In
+	 * this case, lock will not be needed as query callback and release
+	 * function both operate with the different list.
+	 *
 	 */
-	cnt->query_gen = rte_atomic64_read(&pool->start_query_gen);
+	if (!priv->counter_fallback)
+		TAILQ_INSERT_TAIL(&pool->counters[pool->query_gen], cnt, next);
+	else
+		TAILQ_INSERT_TAIL(&((MLX5_CNT_CONTAINER
+				  (priv->sh, 0, 0))->counters),
+				  cnt, next);
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 150f6bb..6b86437 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -176,7 +176,7 @@
 		pool = cont->pools[pool_idx];
 		if (!pool)
 			continue;
-		cnt = TAILQ_FIRST(&pool->counters);
+		cnt = TAILQ_FIRST(&pool->counters[0]);
 		if (cnt)
 			break;
 	}
@@ -209,7 +209,7 @@
 		pool->type |= CNT_POOL_TYPE_EXT;
 		for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
 			cnt = MLX5_POOL_GET_CNT(pool, i);
-			TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
+			TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next);
 		}
 		cnt = MLX5_POOL_GET_CNT(pool, 0);
 		cont->pools[n_valid] = pool;
@@ -227,7 +227,7 @@
 	/* Create counter with Verbs. */
 	ret = flow_verbs_counter_create(dev, cnt_ext);
 	if (!ret) {
-		TAILQ_REMOVE(&pool->counters, cnt, next);
+		TAILQ_REMOVE(&pool->counters[0], cnt, next);
 		return MLX5_MAKE_CNT_IDX(pool_idx, i);
 	}
 	/* Some error occurred in Verbs library. */
@@ -261,7 +261,7 @@
 		claim_zero(mlx5_glue->destroy_counters(cnt_ext->cs));
 		cnt_ext->cs = NULL;
 #endif
-		TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
+		TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next);
 	}
 }
 
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-06-21 15:10 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-18  7:42 [dpdk-dev] [PATCH] net/mlx5: optimize the counter allocate loop Suanming Mou
2020-06-18  8:12 ` [dpdk-dev] [PATCH v2] net/mlx5: optimize free counter lookup Suanming Mou
2020-06-21 15:10   ` Raslan Darawsheh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).