DPDK patches and discussions
 help / color / mirror / Atom feed
From: Suanming Mou <suanmingm@mellanox.com>
To: matan@mellanox.com, viacheslavo@mellanox.com
Cc: dev@dpdk.org, rasland@mellanox.com
Subject: [dpdk-dev] [PATCH v2] net/mlx5: optimize free counter lookup
Date: Thu, 18 Jun 2020 16:12:50 +0800
Message-ID: <1592467970-143747-1-git-send-email-suanmingm@mellanox.com> (raw)
In-Reply-To: <1592466177-141918-1-git-send-email-suanmingm@mellanox.com>

Currently, when allocate a new counter, it needs loop the whole
container pool list to get a free counter.

In the case with millions of counters allocated, and all the pools
are empty, allocate the new counter will still need to loop the
whole container pool list first, then allocate a new pool to get a
free counter. It wastes the cycles during the pool list traversal.

Add a global free counter list in the container helps to get the free
counters more efficiently.

Signed-off-by: Suanming Mou <suanmingm@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---

v2: update the commit title.

This patch should be integrated after https://patches.dpdk.org/cover/71716/

---
 drivers/net/mlx5/mlx5.c            |   2 +
 drivers/net/mlx5/mlx5.h            |  14 ++---
 drivers/net/mlx5/mlx5_flow.c       |  18 ++++---
 drivers/net/mlx5/mlx5_flow_dv.c    | 104 +++++++++++++++++--------------------
 drivers/net/mlx5/mlx5_flow_verbs.c |   8 +--
 5 files changed, 73 insertions(+), 73 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 670a59e..1014b4e 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -462,6 +462,8 @@ struct mlx5_flow_id_pool *
 		sh->cmng.ccont[i].last_pool_idx = POOL_IDX_INVALID;
 		TAILQ_INIT(&sh->cmng.ccont[i].pool_list);
 		rte_spinlock_init(&sh->cmng.ccont[i].resize_sl);
+		TAILQ_INIT(&sh->cmng.ccont[i].counters);
+		rte_spinlock_init(&sh->cmng.ccont[i].csl);
 	}
 }
 
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 3ddae17..d28e84d 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -353,13 +353,14 @@ struct flow_counter_stats {
 	uint64_t bytes;
 };
 
+struct mlx5_flow_counter_pool;
 /* Generic counters information. */
 struct mlx5_flow_counter {
 	TAILQ_ENTRY(mlx5_flow_counter) next;
 	/**< Pointer to the next flow counter structure. */
 	union {
 		uint64_t hits; /**< Reset value of hits packets. */
-		int64_t query_gen; /**< Generation of the last release. */
+		struct mlx5_flow_counter_pool *pool; /**< Counter pool. */
 	};
 	uint64_t bytes; /**< Reset value of bytes. */
 	void *action; /**< Pointer to the dv action. */
@@ -387,16 +388,15 @@ struct mlx5_flow_counter_ext {
 /* Generic counter pool structure - query is in pool resolution. */
 struct mlx5_flow_counter_pool {
 	TAILQ_ENTRY(mlx5_flow_counter_pool) next;
-	struct mlx5_counters counters; /* Free counter list. */
+	struct mlx5_counters counters[2]; /* Free counter list. */
 	union {
 		struct mlx5_devx_obj *min_dcs;
 		rte_atomic64_t a64_dcs;
 	};
 	/* The devx object of the minimum counter ID. */
-	rte_atomic64_t start_query_gen; /* Query start round. */
-	rte_atomic64_t end_query_gen; /* Query end round. */
-	uint32_t index; /* Pool index in container. */
-	uint8_t type; /* Memory type behind the counter array. */
+	uint32_t index:29; /* Pool index in container. */
+	uint32_t type:2; /* Memory type behind the counter array. */
+	volatile uint32_t query_gen:1; /* Query round. */
 	rte_spinlock_t sl; /* The pool lock. */
 	struct mlx5_counter_stats_raw *raw;
 	struct mlx5_counter_stats_raw *raw_hw; /* The raw on HW working. */
@@ -430,6 +430,8 @@ struct mlx5_pools_container {
 	int min_id; /* The minimum counter ID in the pools. */
 	int max_id; /* The maximum counter ID in the pools. */
 	rte_spinlock_t resize_sl; /* The resize lock. */
+	rte_spinlock_t csl; /* The counter free list lock. */
+	struct mlx5_counters counters; /* Free counter list. */
 	struct mlx5_counter_pools pool_list; /* Counter pool list. */
 	struct mlx5_flow_counter_pool **pools; /* Counter pool array. */
 	struct mlx5_counter_stats_mem_mng *mem_mng;
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index ac9af08..3a48b89 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -5888,7 +5888,7 @@ struct mlx5_meter_domains_infos *
 	 * should wait for a new round of query as the new arrived packets
 	 * will not be taken into account.
 	 */
-	rte_atomic64_add(&pool->start_query_gen, 1);
+	pool->query_gen++;
 	ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL -
 					       offset, NULL, NULL,
 					       pool->raw_hw->mem_mng->dm->id,
@@ -5897,7 +5897,6 @@ struct mlx5_meter_domains_infos *
 					       sh->devx_comp,
 					       (uint64_t)(uintptr_t)pool);
 	if (ret) {
-		rte_atomic64_sub(&pool->start_query_gen, 1);
 		DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
 			" %d", pool->min_dcs->id);
 		pool->raw_hw = NULL;
@@ -6002,9 +6001,12 @@ struct mlx5_meter_domains_infos *
 	struct mlx5_flow_counter_pool *pool =
 		(struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
 	struct mlx5_counter_stats_raw *raw_to_free;
+	uint8_t age = !!IS_AGE_POOL(pool);
+	uint8_t query_gen = pool->query_gen ^ 1;
+	struct mlx5_pools_container *cont =
+		MLX5_CNT_CONTAINER(sh, !IS_EXT_POOL(pool), age);
 
 	if (unlikely(status)) {
-		rte_atomic64_sub(&pool->start_query_gen, 1);
 		raw_to_free = pool->raw_hw;
 	} else {
 		raw_to_free = pool->raw;
@@ -6013,12 +6015,14 @@ struct mlx5_meter_domains_infos *
 		rte_spinlock_lock(&pool->sl);
 		pool->raw = pool->raw_hw;
 		rte_spinlock_unlock(&pool->sl);
-		MLX5_ASSERT(rte_atomic64_read(&pool->end_query_gen) + 1 ==
-			    rte_atomic64_read(&pool->start_query_gen));
-		rte_atomic64_set(&pool->end_query_gen,
-				 rte_atomic64_read(&pool->start_query_gen));
 		/* Be sure the new raw counters data is updated in memory. */
 		rte_cio_wmb();
+		if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
+			rte_spinlock_lock(&cont->csl);
+			TAILQ_CONCAT(&cont->counters,
+				     &pool->counters[query_gen], next);
+			rte_spinlock_unlock(&cont->csl);
+		}
 	}
 	LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
 	pool->raw_hw = NULL;
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 9fa8568..6318bd9 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -4340,23 +4340,10 @@ struct field_modify_info modify_tcp[] = {
 	pool->type = 0;
 	pool->type |= (batch ? 0 :  CNT_POOL_TYPE_EXT);
 	pool->type |= (!age ? 0 :  CNT_POOL_TYPE_AGE);
+	pool->query_gen = 0;
 	rte_spinlock_init(&pool->sl);
-	/*
-	 * The generation of the new allocated counters in this pool is 0, 2 in
-	 * the pool generation makes all the counters valid for allocation.
-	 * The start and end query generation protect the counters be released
-	 * between the query and update gap period will not be reallocated
-	 * without the last query finished and stats updated to the memory.
-	 */
-	rte_atomic64_set(&pool->start_query_gen, 0x2);
-	/*
-	 * There's no background query thread for fallback mode, set the
-	 * end_query_gen to the maximum value since no need to wait for
-	 * statistics update.
-	 */
-	rte_atomic64_set(&pool->end_query_gen, priv->counter_fallback ?
-			 INT64_MAX : 0x2);
-	TAILQ_INIT(&pool->counters);
+	TAILQ_INIT(&pool->counters[0]);
+	TAILQ_INIT(&pool->counters[1]);
 	TAILQ_INSERT_HEAD(&cont->pool_list, pool, next);
 	pool->index = n_valid;
 	cont->pools[n_valid] = pool;
@@ -4432,6 +4419,7 @@ struct field_modify_info modify_tcp[] = {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_pools_container *cont;
 	struct mlx5_flow_counter_pool *pool;
+	struct mlx5_counters tmp_tq;
 	struct mlx5_devx_obj *dcs = NULL;
 	struct mlx5_flow_counter *cnt;
 	uint32_t i;
@@ -4457,7 +4445,7 @@ struct field_modify_info modify_tcp[] = {
 						pool, batch, age);
 		i = dcs->id % MLX5_COUNTERS_PER_POOL;
 		cnt = MLX5_POOL_GET_CNT(pool, i);
-		TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
+		cnt->pool = pool;
 		MLX5_GET_POOL_CNT_EXT(pool, i)->dcs = dcs;
 		*cnt_free = cnt;
 		return pool;
@@ -4474,11 +4462,17 @@ struct field_modify_info modify_tcp[] = {
 		mlx5_devx_cmd_destroy(dcs);
 		return NULL;
 	}
-	for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
+	TAILQ_INIT(&tmp_tq);
+	for (i = 1; i < MLX5_COUNTERS_PER_POOL; ++i) {
 		cnt = MLX5_POOL_GET_CNT(pool, i);
-		TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
+		cnt->pool = pool;
+		TAILQ_INSERT_HEAD(&tmp_tq, cnt, next);
 	}
+	rte_spinlock_lock(&cont->csl);
+	TAILQ_CONCAT(&cont->counters, &tmp_tq, next);
+	rte_spinlock_unlock(&cont->csl);
 	*cnt_free = MLX5_POOL_GET_CNT(pool, 0);
+	(*cnt_free)->pool = pool;
 	return pool;
 }
 
@@ -4570,28 +4564,16 @@ struct field_modify_info modify_tcp[] = {
 			return cnt_idx;
 		}
 	}
-	/* Pools which has a free counters are in the start. */
-	TAILQ_FOREACH(pool, &cont->pool_list, next) {
-		/*
-		 * The free counter reset values must be updated between the
-		 * counter release to the counter allocation, so, at least one
-		 * query must be done in this time. ensure it by saving the
-		 * query generation in the release time.
-		 * The free list is sorted according to the generation - so if
-		 * the first one is not updated, all the others are not
-		 * updated too.
-		 */
-		cnt_free = TAILQ_FIRST(&pool->counters);
-		if (cnt_free && cnt_free->query_gen <
-		    rte_atomic64_read(&pool->end_query_gen))
-			break;
-		cnt_free = NULL;
-	}
-	if (!cnt_free) {
-		pool = flow_dv_counter_pool_prepare(dev, &cnt_free, batch, age);
-		if (!pool)
-			return 0;
-	}
+	/* Get free counters from container. */
+	rte_spinlock_lock(&cont->csl);
+	cnt_free = TAILQ_FIRST(&cont->counters);
+	if (cnt_free)
+		TAILQ_REMOVE(&cont->counters, cnt_free, next);
+	rte_spinlock_unlock(&cont->csl);
+	if (!cnt_free && !flow_dv_counter_pool_prepare(dev, &cnt_free,
+						       batch, age))
+		goto err;
+	pool = cnt_free->pool;
 	if (!batch)
 		cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt_free);
 	/* Create a DV counter action only in the first time usage. */
@@ -4610,7 +4592,7 @@ struct field_modify_info modify_tcp[] = {
 					(dcs->obj, offset);
 		if (!cnt_free->action) {
 			rte_errno = errno;
-			return 0;
+			goto err;
 		}
 	}
 	cnt_idx = MLX5_MAKE_CNT_IDX(pool->index,
@@ -4620,7 +4602,7 @@ struct field_modify_info modify_tcp[] = {
 	/* Update the counter reset values. */
 	if (_flow_dv_query_count(dev, cnt_idx, &cnt_free->hits,
 				 &cnt_free->bytes))
-		return 0;
+		goto err;
 	if (cnt_ext) {
 		cnt_ext->shared = shared;
 		cnt_ext->ref_cnt = 1;
@@ -4636,13 +4618,15 @@ struct field_modify_info modify_tcp[] = {
 	if (!priv->counter_fallback && !priv->sh->cmng.query_thread_on)
 		/* Start the asynchronous batch query by the host thread. */
 		mlx5_set_query_alarm(priv->sh);
-	TAILQ_REMOVE(&pool->counters, cnt_free, next);
-	if (TAILQ_EMPTY(&pool->counters)) {
-		/* Move the pool to the end of the container pool list. */
-		TAILQ_REMOVE(&cont->pool_list, pool, next);
-		TAILQ_INSERT_TAIL(&cont->pool_list, pool, next);
-	}
 	return cnt_idx;
+err:
+	if (cnt_free) {
+		cnt_free->pool = pool;
+		rte_spinlock_lock(&cont->csl);
+		TAILQ_INSERT_TAIL(&cont->counters, cnt_free, next);
+		rte_spinlock_unlock(&cont->csl);
+	}
+	return 0;
 }
 
 /**
@@ -4735,15 +4719,23 @@ struct field_modify_info modify_tcp[] = {
 	}
 	if (IS_AGE_POOL(pool))
 		flow_dv_counter_remove_from_age(dev, counter, cnt);
-	/* Put the counter in the end - the last updated one. */
-	TAILQ_INSERT_TAIL(&pool->counters, cnt, next);
+	cnt->pool = pool;
 	/*
-	 * Counters released between query trigger and handler need
-	 * to wait the next round of query. Since the packets arrive
-	 * in the gap period will not be taken into account to the
-	 * old counter.
+	 * Put the counter back to list to be updated in none fallback mode.
+	 * Currently, we are using two list alternately, while one is in query,
+	 * add the freed counter to the other list based on the pool query_gen
+	 * value. After query finishes, add counter the list to the global
+	 * container counter list. The list changes while query starts. In
+	 * this case, lock will not be needed as query callback and release
+	 * function both operate with the different list.
+	 *
 	 */
-	cnt->query_gen = rte_atomic64_read(&pool->start_query_gen);
+	if (!priv->counter_fallback)
+		TAILQ_INSERT_TAIL(&pool->counters[pool->query_gen], cnt, next);
+	else
+		TAILQ_INSERT_TAIL(&((MLX5_CNT_CONTAINER
+				  (priv->sh, 0, 0))->counters),
+				  cnt, next);
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 150f6bb..6b86437 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -176,7 +176,7 @@
 		pool = cont->pools[pool_idx];
 		if (!pool)
 			continue;
-		cnt = TAILQ_FIRST(&pool->counters);
+		cnt = TAILQ_FIRST(&pool->counters[0]);
 		if (cnt)
 			break;
 	}
@@ -209,7 +209,7 @@
 		pool->type |= CNT_POOL_TYPE_EXT;
 		for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
 			cnt = MLX5_POOL_GET_CNT(pool, i);
-			TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
+			TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next);
 		}
 		cnt = MLX5_POOL_GET_CNT(pool, 0);
 		cont->pools[n_valid] = pool;
@@ -227,7 +227,7 @@
 	/* Create counter with Verbs. */
 	ret = flow_verbs_counter_create(dev, cnt_ext);
 	if (!ret) {
-		TAILQ_REMOVE(&pool->counters, cnt, next);
+		TAILQ_REMOVE(&pool->counters[0], cnt, next);
 		return MLX5_MAKE_CNT_IDX(pool_idx, i);
 	}
 	/* Some error occurred in Verbs library. */
@@ -261,7 +261,7 @@
 		claim_zero(mlx5_glue->destroy_counters(cnt_ext->cs));
 		cnt_ext->cs = NULL;
 #endif
-		TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
+		TAILQ_INSERT_HEAD(&pool->counters[0], cnt, next);
 	}
 }
 
-- 
1.8.3.1


  reply	other threads:[~2020-06-18  8:12 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-18  7:42 [dpdk-dev] [PATCH] net/mlx5: optimize the counter allocate loop Suanming Mou
2020-06-18  8:12 ` Suanming Mou [this message]
2020-06-21 15:10   ` [dpdk-dev] [PATCH v2] net/mlx5: optimize free counter lookup Raslan Darawsheh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1592467970-143747-1-git-send-email-suanmingm@mellanox.com \
    --to=suanmingm@mellanox.com \
    --cc=dev@dpdk.org \
    --cc=matan@mellanox.com \
    --cc=rasland@mellanox.com \
    --cc=viacheslavo@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

DPDK patches and discussions

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://inbox.dpdk.org/dev/0 dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 dev dev/ https://inbox.dpdk.org/dev \
		dev@dpdk.org
	public-inbox-index dev

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.dev


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git