patches for DPDK stable branches
 help / color / mirror / Atom feed
From: Michael Baum <michaelba@nvidia.com>
To: <stable@dpdk.org>
Cc: Matan Azrad <matan@nvidia.com>,
	Viacheslav Ovsiienko <viacheslavo@nvidia.com>,
	Kevin Traynor <ktraynor@redhat.com>
Subject: [PATCH 21.11 1/2] net/mlx5: fix race condition in counter pool resizing
Date: Thu, 24 Nov 2022 09:53:19 +0200	[thread overview]
Message-ID: <20221124075320.2026822-2-michaelba@nvidia.com> (raw)
In-Reply-To: <20221124075320.2026822-1-michaelba@nvidia.com>

[ upstream commit a94e89e47b59ebaf84246bbb34c06e1a004cde8a ]

Counter management structure has array of counter pools. This array is
invalid in management structure initialization and grows on demand.

The resizing include:
1. Allocate memory for the new size.
2. Copy the existing data to the new memory.
3. Move the pointer to the new memory.
4. Free the old memory.

The third step can be performed before for this function, and compiler
may do that, but another thread might read the pointer before coping and
read invalid data or even crash.

This patch allocates memory for this array once in management structure
initialization and limit the counters number by 16M.

Fixes: 3aa279157fa0 ("net/mlx5: synchronize flow counter pool creation")

Signed-off-by: Michael Baum <michaelba@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
 drivers/net/mlx5/mlx5.c            | 26 ++++++++++++---
 drivers/net/mlx5/mlx5.h            |  5 +--
 drivers/net/mlx5/mlx5_flow.c       | 25 +++++++-------
 drivers/net/mlx5/mlx5_flow_dv.c    | 53 +++++-------------------------
 drivers/net/mlx5/mlx5_flow_verbs.c | 23 +++----------
 5 files changed, 51 insertions(+), 81 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 2234dc7563..f79e71fe1a 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -518,22 +518,37 @@ mlx5_flow_aging_init(struct mlx5_dev_ctx_shared *sh)
  *
  * @param[in] sh
  *   Pointer to mlx5_dev_ctx_shared object to free
+ *
+ * @return
+ *   0 on success, otherwise negative errno value and rte_errno is set.
  */
-static void
+static int
 mlx5_flow_counters_mng_init(struct mlx5_dev_ctx_shared *sh)
 {
+	void *pools;
 	int i;
 
+	pools = mlx5_malloc(MLX5_MEM_ZERO,
+			    sizeof(struct mlx5_flow_counter_pool *) *
+			    MLX5_COUNTER_POOLS_MAX_NUM,
+			    0, SOCKET_ID_ANY);
+	if (!pools) {
+		DRV_LOG(ERR, "Counter management allocation was failed.");
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
 	memset(&sh->cmng, 0, sizeof(sh->cmng));
 	TAILQ_INIT(&sh->cmng.flow_counters);
 	sh->cmng.min_id = MLX5_CNT_BATCH_OFFSET;
 	sh->cmng.max_id = -1;
+	sh->cmng.pools = pools;
 	sh->cmng.last_pool_idx = POOL_IDX_INVALID;
 	rte_spinlock_init(&sh->cmng.pool_update_sl);
 	for (i = 0; i < MLX5_COUNTER_TYPE_MAX; i++) {
 		TAILQ_INIT(&sh->cmng.counters[i]);
 		rte_spinlock_init(&sh->cmng.csl[i]);
 	}
+	return 0;
 }
 
 /**
@@ -591,8 +606,7 @@ mlx5_flow_counters_mng_close(struct mlx5_dev_ctx_shared *sh)
 					claim_zero
 					 (mlx5_flow_os_destroy_flow_action
 					  (cnt->action));
-				if (fallback && MLX5_POOL_GET_CNT
-				    (pool, j)->dcs_when_free)
+				if (fallback && cnt->dcs_when_free)
 					claim_zero(mlx5_devx_cmd_destroy
 						   (cnt->dcs_when_free));
 			}
@@ -1235,8 +1249,12 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
 		if (err)
 			goto error;
 	}
+	err = mlx5_flow_counters_mng_init(sh);
+	if (err) {
+		DRV_LOG(ERR, "Fail to initialize counters manage.");
+		goto error;
+	}
 	mlx5_flow_aging_init(sh);
-	mlx5_flow_counters_mng_init(sh);
 	mlx5_flow_ipool_create(sh, config);
 	/* Add context to the global device list. */
 	LIST_INSERT_HEAD(&mlx5_dev_ctx_list, sh, next);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index e99831a6bb..f14a76f301 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -323,9 +323,10 @@ struct mlx5_lb_ctx {
 	uint16_t refcnt; /* Reference count for representors. */
 };
 
+#define MLX5_COUNTER_POOLS_MAX_NUM (1 << 15)
 #define MLX5_COUNTERS_PER_POOL 512
 #define MLX5_MAX_PENDING_QUERIES 4
-#define MLX5_CNT_CONTAINER_RESIZE 64
+#define MLX5_CNT_MR_ALLOC_BULK 64
 #define MLX5_CNT_SHARED_OFFSET 0x80000000
 #define IS_BATCH_CNT(cnt) (((cnt) & (MLX5_CNT_SHARED_OFFSET - 1)) >= \
 			   MLX5_CNT_BATCH_OFFSET)
@@ -481,7 +482,6 @@ TAILQ_HEAD(mlx5_counter_pools, mlx5_flow_counter_pool);
 /* Counter global management structure. */
 struct mlx5_flow_counter_mng {
 	volatile uint16_t n_valid; /* Number of valid pools. */
-	uint16_t n; /* Number of pools. */
 	uint16_t last_pool_idx; /* Last used pool index */
 	int min_id; /* The minimum counter ID in the pools. */
 	int max_id; /* The maximum counter ID in the pools. */
@@ -550,6 +550,7 @@ struct mlx5_aso_age_action {
 };
 
 #define MLX5_ASO_AGE_ACTIONS_PER_POOL 512
+#define MLX5_ASO_AGE_CONTAINER_RESIZE 64
 
 struct mlx5_aso_age_pool {
 	struct mlx5_devx_obj *flow_hit_aso_obj;
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index df3a132ee4..1418631bc3 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -7955,7 +7955,7 @@ mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
 {
 	struct mlx5_counter_stats_mem_mng *mem_mng;
 	volatile struct flow_counter_stats *raw_data;
-	int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES;
+	int raws_n = MLX5_CNT_MR_ALLOC_BULK + MLX5_MAX_PENDING_QUERIES;
 	int size = (sizeof(struct flow_counter_stats) *
 			MLX5_COUNTERS_PER_POOL +
 			sizeof(struct mlx5_counter_stats_raw)) * raws_n +
@@ -7993,7 +7993,7 @@ mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
 	}
 	for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
 		LIST_INSERT_HEAD(&sh->cmng.free_stat_raws,
-				 mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i,
+				 mem_mng->raws + MLX5_CNT_MR_ALLOC_BULK + i,
 				 next);
 	LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next);
 	sh->cmng.mem_mng = mem_mng;
@@ -8017,14 +8017,13 @@ mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
 {
 	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
 	/* Resize statistic memory once used out. */
-	if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) &&
+	if (!(pool->index % MLX5_CNT_MR_ALLOC_BULK) &&
 	    mlx5_flow_create_counter_stat_mem_mng(sh)) {
 		DRV_LOG(ERR, "Cannot resize counter stat mem.");
 		return -1;
 	}
 	rte_spinlock_lock(&pool->sl);
-	pool->raw = cmng->mem_mng->raws + pool->index %
-		    MLX5_CNT_CONTAINER_RESIZE;
+	pool->raw = cmng->mem_mng->raws + pool->index % MLX5_CNT_MR_ALLOC_BULK;
 	rte_spinlock_unlock(&pool->sl);
 	pool->raw_hw = NULL;
 	return 0;
@@ -8066,13 +8065,14 @@ void
 mlx5_flow_query_alarm(void *arg)
 {
 	struct mlx5_dev_ctx_shared *sh = arg;
-	int ret;
-	uint16_t pool_index = sh->cmng.pool_index;
 	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
+	uint16_t pool_index = cmng->pool_index;
 	struct mlx5_flow_counter_pool *pool;
 	uint16_t n_valid;
+	int ret;
+
 
-	if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
+	if (cmng->pending_queries >= MLX5_MAX_PENDING_QUERIES)
 		goto set_alarm;
 	rte_spinlock_lock(&cmng->pool_update_sl);
 	pool = cmng->pools[pool_index];
@@ -8084,8 +8084,7 @@ mlx5_flow_query_alarm(void *arg)
 	if (pool->raw_hw)
 		/* There is a pool query in progress. */
 		goto set_alarm;
-	pool->raw_hw =
-		LIST_FIRST(&sh->cmng.free_stat_raws);
+	pool->raw_hw = LIST_FIRST(&cmng->free_stat_raws);
 	if (!pool->raw_hw)
 		/* No free counter statistics raw memory. */
 		goto set_alarm;
@@ -8111,12 +8110,12 @@ mlx5_flow_query_alarm(void *arg)
 		goto set_alarm;
 	}
 	LIST_REMOVE(pool->raw_hw, next);
-	sh->cmng.pending_queries++;
+	cmng->pending_queries++;
 	pool_index++;
 	if (pool_index >= n_valid)
 		pool_index = 0;
 set_alarm:
-	sh->cmng.pool_index = pool_index;
+	cmng->pool_index = pool_index;
 	mlx5_set_query_alarm(sh);
 }
 
@@ -8647,7 +8646,7 @@ mlx5_flow_dev_dump_sh_all(struct rte_eth_dev *dev,
 	}
 
 	/* get counter */
-	MLX5_ASSERT(cmng->n_valid <= cmng->n);
+	MLX5_ASSERT(cmng->n_valid <= MLX5_COUNTER_POOLS_MAX_NUM);
 	max = MLX5_COUNTERS_PER_POOL * cmng->n_valid;
 	for (j = 1; j <= max; j++) {
 		action = NULL;
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 4603736c28..d4df5f8bca 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -5865,7 +5865,7 @@ flow_dv_counter_get_by_idx(struct rte_eth_dev *dev,
 
 	/* Decrease to original index and clear shared bit. */
 	idx = (idx - 1) & (MLX5_CNT_SHARED_OFFSET - 1);
-	MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < cmng->n);
+	MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < MLX5_COUNTER_POOLS_MAX_NUM);
 	pool = cmng->pools[idx / MLX5_COUNTERS_PER_POOL];
 	MLX5_ASSERT(pool);
 	if (ppool)
@@ -5941,39 +5941,6 @@ flow_dv_find_pool_by_id(struct mlx5_flow_counter_mng *cmng, int id)
 	return pool;
 }
 
-/**
- * Resize a counter container.
- *
- * @param[in] dev
- *   Pointer to the Ethernet device structure.
- *
- * @return
- *   0 on success, otherwise negative errno value and rte_errno is set.
- */
-static int
-flow_dv_container_resize(struct rte_eth_dev *dev)
-{
-	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
-	void *old_pools = cmng->pools;
-	uint32_t resize = cmng->n + MLX5_CNT_CONTAINER_RESIZE;
-	uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) * resize;
-	void *pools = mlx5_malloc(MLX5_MEM_ZERO, mem_size, 0, SOCKET_ID_ANY);
-
-	if (!pools) {
-		rte_errno = ENOMEM;
-		return -ENOMEM;
-	}
-	if (old_pools)
-		memcpy(pools, old_pools, cmng->n *
-				       sizeof(struct mlx5_flow_counter_pool *));
-	cmng->n = resize;
-	cmng->pools = pools;
-	if (old_pools)
-		mlx5_free(old_pools);
-	return 0;
-}
-
 /**
  * Query a devx flow counter.
  *
@@ -6025,8 +5992,6 @@ _flow_dv_query_count(struct rte_eth_dev *dev, uint32_t counter, uint64_t *pkts,
  *   The devX counter handle.
  * @param[in] age
  *   Whether the pool is for counter that was allocated for aging.
- * @param[in/out] cont_cur
- *   Pointer to the container pointer, it will be update in pool resize.
  *
  * @return
  *   The pool container pointer on success, NULL otherwise and rte_errno is set.
@@ -6038,9 +6003,14 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_flow_counter_pool *pool;
 	struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
-	bool fallback = priv->sh->cmng.counter_fallback;
+	bool fallback = cmng->counter_fallback;
 	uint32_t size = sizeof(*pool);
 
+	if (cmng->n_valid == MLX5_COUNTER_POOLS_MAX_NUM) {
+		DRV_LOG(ERR, "All counter is in used, try again later.");
+		rte_errno = EAGAIN;
+		return NULL;
+	}
 	size += MLX5_COUNTERS_PER_POOL * MLX5_CNT_SIZE;
 	size += (!age ? 0 : MLX5_COUNTERS_PER_POOL * MLX5_AGE_SIZE);
 	pool = mlx5_malloc(MLX5_MEM_ZERO, size, 0, SOCKET_ID_ANY);
@@ -6059,11 +6029,6 @@ flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
 	pool->time_of_last_age_check = MLX5_CURR_TIME_SEC;
 	rte_spinlock_lock(&cmng->pool_update_sl);
 	pool->index = cmng->n_valid;
-	if (pool->index == cmng->n && flow_dv_container_resize(dev)) {
-		mlx5_free(pool);
-		rte_spinlock_unlock(&cmng->pool_update_sl);
-		return NULL;
-	}
 	cmng->pools[pool->index] = pool;
 	cmng->n_valid++;
 	if (unlikely(fallback)) {
@@ -12020,7 +11985,7 @@ flow_dv_aso_age_release(struct rte_eth_dev *dev, uint32_t age_idx)
 }
 
 /**
- * Resize the ASO age pools array by MLX5_CNT_CONTAINER_RESIZE pools.
+ * Resize the ASO age pools array by MLX5_ASO_AGE_CONTAINER_RESIZE pools.
  *
  * @param[in] dev
  *   Pointer to the Ethernet device structure.
@@ -12034,7 +11999,7 @@ flow_dv_aso_age_pools_resize(struct rte_eth_dev *dev)
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_aso_age_mng *mng = priv->sh->aso_age_mng;
 	void *old_pools = mng->pools;
-	uint32_t resize = mng->n + MLX5_CNT_CONTAINER_RESIZE;
+	uint32_t resize = mng->n + MLX5_ASO_AGE_CONTAINER_RESIZE;
 	uint32_t mem_size = sizeof(struct mlx5_aso_age_pool *) * resize;
 	void *pools = mlx5_malloc(MLX5_MEM_ZERO, mem_size, 0, SOCKET_ID_ANY);
 
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index 165786f864..1d8f053d83 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -232,27 +232,14 @@ flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t id __rte_unused)
 			break;
 	}
 	if (!cnt) {
-		struct mlx5_flow_counter_pool **pools;
 		uint32_t size;
 
-		if (n_valid == cmng->n) {
-			/* Resize the container pool array. */
-			size = sizeof(struct mlx5_flow_counter_pool *) *
-				     (n_valid + MLX5_CNT_CONTAINER_RESIZE);
-			pools = mlx5_malloc(MLX5_MEM_ZERO, size, 0,
-					    SOCKET_ID_ANY);
-			if (!pools)
-				return 0;
-			if (n_valid) {
-				memcpy(pools, cmng->pools,
-				       sizeof(struct mlx5_flow_counter_pool *) *
-				       n_valid);
-				mlx5_free(cmng->pools);
-			}
-			cmng->pools = pools;
-			cmng->n += MLX5_CNT_CONTAINER_RESIZE;
+		if (n_valid == MLX5_COUNTER_POOLS_MAX_NUM) {
+			DRV_LOG(ERR, "All counter is in used, try again later.");
+			rte_errno = EAGAIN;
+			return 0;
 		}
-		/* Allocate memory for new pool*/
+		/* Allocate memory for new pool */
 		size = sizeof(*pool) + sizeof(*cnt) * MLX5_COUNTERS_PER_POOL;
 		pool = mlx5_malloc(MLX5_MEM_ZERO, size, 0, SOCKET_ID_ANY);
 		if (!pool)
-- 
2.25.1


  reply	other threads:[~2022-11-24  7:53 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-24  7:53 [PATCH 21.11 0/2] net/mlx5: some backported fixes Michael Baum
2022-11-24  7:53 ` Michael Baum [this message]
2022-11-29 11:50   ` [PATCH 21.11 1/2] net/mlx5: fix race condition in counter pool resizing Kevin Traynor
2022-11-24  7:53 ` [PATCH 21.11 2/2] net/mlx5: fix port event cleaning order Michael Baum

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221124075320.2026822-2-michaelba@nvidia.com \
    --to=michaelba@nvidia.com \
    --cc=ktraynor@redhat.com \
    --cc=matan@nvidia.com \
    --cc=stable@dpdk.org \
    --cc=viacheslavo@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).