patches for DPDK stable branches
 help / color / mirror / Atom feed
* [dpdk-stable] [PATCH] net/mlx5: fix counter container resize
@ 2020-05-12 12:52 Matan Azrad
  2020-05-13 15:27 ` Slava Ovsiienko
  2020-05-14  8:11 ` [dpdk-stable] [dpdk-dev] " Raslan Darawsheh
  0 siblings, 2 replies; 3+ messages in thread
From: Matan Azrad @ 2020-05-12 12:52 UTC (permalink / raw)
  To: dev; +Cc: Viacheslav Ovsiienko, stable

The design of counter container resize used double buffer algorithm in
order to synchronize between the query thread to the control thread.
When the control thread detected resize need, it created new bigger
buffer for the counter pools in a new container and change the container
index atomically.
In case the query thread had not detect the previous resize before a new
one need was detected by the control thread, the control thread returned
EAGAIN to the flow creation API used a COUNT action.

The rte_flow API doesn't allow unblocked commands and doesn't expect to
get EAGAIN error type.

So, when a lot of flows were created between 2 different periodic
queries, 2 different resizes might try to be created and caused EAGAIN
error.
This behavior may blame flow creations.

Change the synchronization way to use lock instead of double buffer
algorithm.

The critical section of this lock is very small, so flow insertion
rate should not be decreased.

Fixes: ebbac312e448 ("net/mlx5: resize a full counter container")
Cc: stable@dpdk.org

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/mlx5.c            |  70 +++++++++++------------
 drivers/net/mlx5/mlx5.h            |  23 +++++---
 drivers/net/mlx5/mlx5_flow.c       |  37 ++++---------
 drivers/net/mlx5/mlx5_flow.h       |   6 --
 drivers/net/mlx5/mlx5_flow_dv.c    | 110 +++++++++++++++----------------------
 drivers/net/mlx5/mlx5_flow_verbs.c |   6 +-
 6 files changed, 104 insertions(+), 148 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 4f704cb..1445809 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -467,13 +467,13 @@ struct mlx5_flow_id_pool *
 static void
 mlx5_flow_counters_mng_init(struct mlx5_ibv_shared *sh)
 {
-	uint8_t i, age;
+	int i;
 
-	sh->cmng.age = 0;
+	memset(&sh->cmng, 0, sizeof(sh->cmng));
 	TAILQ_INIT(&sh->cmng.flow_counters);
-	for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
-		for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i)
-			TAILQ_INIT(&sh->cmng.ccont[i][age].pool_list);
+	for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) {
+		TAILQ_INIT(&sh->cmng.ccont[i].pool_list);
+		rte_spinlock_init(&sh->cmng.ccont[i].resize_sl);
 	}
 }
 
@@ -504,7 +504,7 @@ struct mlx5_flow_id_pool *
 mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh)
 {
 	struct mlx5_counter_stats_mem_mng *mng;
-	uint8_t i, age = 0;
+	int i;
 	int j;
 	int retries = 1024;
 
@@ -515,42 +515,34 @@ struct mlx5_flow_id_pool *
 			break;
 		rte_pause();
 	}
+	for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) {
+		struct mlx5_flow_counter_pool *pool;
+		uint32_t batch = !!(i > 1);
 
-	for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
-		for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i) {
-			struct mlx5_flow_counter_pool *pool;
-			uint32_t batch = !!(i % 2);
-
-			if (!sh->cmng.ccont[i][age].pools)
-				continue;
-			pool = TAILQ_FIRST(&sh->cmng.ccont[i][age].pool_list);
-			while (pool) {
-				if (batch) {
-					if (pool->min_dcs)
-						claim_zero
-						(mlx5_devx_cmd_destroy
-						(pool->min_dcs));
-				}
-				for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) {
-					if (MLX5_POOL_GET_CNT(pool, j)->action)
-						claim_zero
-						(mlx5_glue->destroy_flow_action
-						 (MLX5_POOL_GET_CNT
-						  (pool, j)->action));
-					if (!batch && MLX5_GET_POOL_CNT_EXT
-					    (pool, j)->dcs)
-						claim_zero(mlx5_devx_cmd_destroy
-							  (MLX5_GET_POOL_CNT_EXT
-							  (pool, j)->dcs));
-				}
-				TAILQ_REMOVE(&sh->cmng.ccont[i][age].pool_list,
-					pool, next);
-				rte_free(pool);
-				pool = TAILQ_FIRST
-					(&sh->cmng.ccont[i][age].pool_list);
+		if (!sh->cmng.ccont[i].pools)
+			continue;
+		pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
+		while (pool) {
+			if (batch && pool->min_dcs)
+				claim_zero(mlx5_devx_cmd_destroy
+							       (pool->min_dcs));
+			for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) {
+				if (MLX5_POOL_GET_CNT(pool, j)->action)
+					claim_zero
+					 (mlx5_glue->destroy_flow_action
+					  (MLX5_POOL_GET_CNT
+					  (pool, j)->action));
+				if (!batch && MLX5_GET_POOL_CNT_EXT
+				    (pool, j)->dcs)
+					claim_zero(mlx5_devx_cmd_destroy
+						   (MLX5_GET_POOL_CNT_EXT
+						    (pool, j)->dcs));
 			}
-			rte_free(sh->cmng.ccont[i][age].pools);
+			TAILQ_REMOVE(&sh->cmng.ccont[i].pool_list, pool, next);
+			rte_free(pool);
+			pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
 		}
+		rte_free(sh->cmng.ccont[i].pools);
 	}
 	mng = LIST_FIRST(&sh->cmng.mem_mngs);
 	while (mng) {
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 1740d4a..d9f5d81 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -227,13 +227,11 @@ struct mlx5_drop {
 #define CNTEXT_SIZE (sizeof(struct mlx5_flow_counter_ext))
 #define AGE_SIZE (sizeof(struct mlx5_age_param))
 #define MLX5_AGING_TIME_DELAY	7
-
 #define CNT_POOL_TYPE_EXT	(1 << 0)
 #define CNT_POOL_TYPE_AGE	(1 << 1)
 #define IS_EXT_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_EXT)
 #define IS_AGE_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_AGE)
 #define MLX_CNT_IS_AGE(counter) ((counter) & MLX5_CNT_AGE_OFFSET ? 1 : 0)
-
 #define MLX5_CNT_LEN(pool) \
 	(CNT_SIZE + \
 	(IS_AGE_POOL(pool) ? AGE_SIZE : 0) + \
@@ -270,6 +268,17 @@ enum {
 	AGE_TMOUT, /* Timeout, wait for rte_flow_get_aged_flows and destroy. */
 };
 
+#define MLX5_CNT_CONTAINER(sh, batch, age) (&(sh)->cmng.ccont \
+					    [(batch) * 2 + (age)])
+
+enum {
+	MLX5_CCONT_TYPE_SINGLE,
+	MLX5_CCONT_TYPE_SINGLE_FOR_AGE,
+	MLX5_CCONT_TYPE_BATCH,
+	MLX5_CCONT_TYPE_BATCH_FOR_AGE,
+	MLX5_CCONT_TYPE_MAX,
+};
+
 /* Counter age parameter. */
 struct mlx5_age_param {
 	rte_atomic16_t state; /**< Age state. */
@@ -313,7 +322,6 @@ struct mlx5_flow_counter_ext {
 	};
 };
 
-
 TAILQ_HEAD(mlx5_counters, mlx5_flow_counter);
 
 /* Generic counter pool structure - query is in pool resolution. */
@@ -358,17 +366,16 @@ struct mlx5_counter_stats_raw {
 struct mlx5_pools_container {
 	rte_atomic16_t n_valid; /* Number of valid pools. */
 	uint16_t n; /* Number of pools. */
+	rte_spinlock_t resize_sl; /* The resize lock. */
 	struct mlx5_counter_pools pool_list; /* Counter pool list. */
 	struct mlx5_flow_counter_pool **pools; /* Counter pool array. */
-	struct mlx5_counter_stats_mem_mng *init_mem_mng;
+	struct mlx5_counter_stats_mem_mng *mem_mng;
 	/* Hold the memory management for the next allocated pools raws. */
 };
 
 /* Counter global management structure. */
 struct mlx5_flow_counter_mng {
-	uint8_t mhi[2][2]; /* master \ host and age \ no age container index. */
-	struct mlx5_pools_container ccont[2 * 2][2];
-	/* master \ host and age \ no age pools container. */
+	struct mlx5_pools_container ccont[MLX5_CCONT_TYPE_MAX];
 	struct mlx5_counters flow_counters; /* Legacy flow counter list. */
 	uint8_t pending_queries;
 	uint8_t batch;
@@ -378,6 +385,7 @@ struct mlx5_flow_counter_mng {
 	LIST_HEAD(mem_mngs, mlx5_counter_stats_mem_mng) mem_mngs;
 	LIST_HEAD(stat_raws, mlx5_counter_stats_raw) free_stat_raws;
 };
+
 #define MLX5_AGE_EVENT_NEW		1
 #define MLX5_AGE_TRIGGER		2
 #define MLX5_AGE_SET(age_info, BIT) \
@@ -393,6 +401,7 @@ struct mlx5_age_info {
 	struct mlx5_counters aged_counters; /* Aged flow counter list. */
 	rte_spinlock_t aged_sl; /* Aged flow counter list lock. */
 };
+
 /* Per port data of shared IB device. */
 struct mlx5_ibv_shared_port {
 	uint32_t ih_port_id;
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 08c7cdf..ae478a5 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -5801,16 +5801,11 @@ struct mlx5_meter_domains_infos *
 static uint32_t
 mlx5_get_all_valid_pool_count(struct mlx5_ibv_shared *sh)
 {
-	uint8_t age, i;
+	int i;
 	uint32_t pools_n = 0;
-	struct mlx5_pools_container *cont;
 
-	for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
-		for (i = 0; i < 2 ; ++i) {
-			cont = MLX5_CNT_CONTAINER(sh, i, 0, age);
-			pools_n += rte_atomic16_read(&cont->n_valid);
-		}
-	}
+	for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i)
+		pools_n += rte_atomic16_read(&sh->cmng.ccont[i].n_valid);
 	return pools_n;
 }
 
@@ -5855,32 +5850,19 @@ struct mlx5_meter_domains_infos *
 	uint8_t age = sh->cmng.age;
 	uint16_t pool_index = sh->cmng.pool_index;
 	struct mlx5_pools_container *cont;
-	struct mlx5_pools_container *mcont;
 	struct mlx5_flow_counter_pool *pool;
+	int cont_loop = MLX5_CCONT_TYPE_MAX;
 
 	if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
 		goto set_alarm;
 next_container:
-	cont = MLX5_CNT_CONTAINER(sh, batch, 1, age);
-	mcont = MLX5_CNT_CONTAINER(sh, batch, 0, age);
-	/* Check if resize was done and need to flip a container. */
-	if (cont != mcont) {
-		if (cont->pools) {
-			/* Clean the old container. */
-			rte_free(cont->pools);
-			memset(cont, 0, sizeof(*cont));
-		}
-		rte_cio_wmb();
-		 /* Flip the host container. */
-		sh->cmng.mhi[batch][age] ^= (uint8_t)2;
-		cont = mcont;
-	}
+	cont = MLX5_CNT_CONTAINER(sh, batch, age);
+	rte_spinlock_lock(&cont->resize_sl);
 	if (!cont->pools) {
-		/* 2 empty containers case is unexpected. */
-		if (unlikely(batch != sh->cmng.batch) &&
-			unlikely(age != sh->cmng.age)) {
+		rte_spinlock_unlock(&cont->resize_sl);
+		/* Check if all the containers are empty. */
+		if (unlikely(--cont_loop == 0))
 			goto set_alarm;
-		}
 		batch ^= 0x1;
 		pool_index = 0;
 		if (batch == 0 && pool_index == 0) {
@@ -5891,6 +5873,7 @@ struct mlx5_meter_domains_infos *
 		goto next_container;
 	}
 	pool = cont->pools[pool_index];
+	rte_spinlock_unlock(&cont->resize_sl);
 	if (pool->raw_hw)
 		/* There is a pool query in progress. */
 		goto set_alarm;
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 7f5e01f..2c96677 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -900,12 +900,6 @@ struct mlx5_flow_driver_ops {
 	mlx5_flow_get_aged_flows_t get_aged_flows;
 };
 
-
-#define MLX5_CNT_CONTAINER(sh, batch, thread, age) (&(sh)->cmng.ccont \
-	[(((sh)->cmng.mhi[batch][age] >> (thread)) & 0x1) * 2 + (batch)][age])
-#define MLX5_CNT_CONTAINER_UNUSED(sh, batch, thread, age) (&(sh)->cmng.ccont \
-	[(~((sh)->cmng.mhi[batch][age] >> (thread)) & 0x1) * 2 + (batch)][age])
-
 /* mlx5_flow.c */
 
 struct mlx5_flow_id_pool *mlx5_flow_id_pool_alloc(uint32_t max_id);
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 4ebb7ce..c46a589 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -4038,7 +4038,7 @@ struct field_modify_info modify_tcp[] = {
 		idx -= MLX5_CNT_BATCH_OFFSET;
 		batch = 1;
 	}
-	cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
+	cont = MLX5_CNT_CONTAINER(priv->sh, batch, age);
 	MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < cont->n);
 	pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL];
 	MLX5_ASSERT(pool);
@@ -4162,69 +4162,55 @@ struct field_modify_info modify_tcp[] = {
  *   Whether the pool is for Aging counter.
  *
  * @return
- *   The new container pointer on success, otherwise NULL and rte_errno is set.
+ *   0 on success, otherwise negative errno value and rte_errno is set.
  */
-static struct mlx5_pools_container *
+static int
 flow_dv_container_resize(struct rte_eth_dev *dev,
 				uint32_t batch, uint32_t age)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_pools_container *cont =
-			MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
-	struct mlx5_pools_container *new_cont =
-			MLX5_CNT_CONTAINER_UNUSED(priv->sh, batch, 0, age);
+	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch,
+							       age);
 	struct mlx5_counter_stats_mem_mng *mem_mng = NULL;
+	void *old_pools = cont->pools;
 	uint32_t resize = cont->n + MLX5_CNT_CONTAINER_RESIZE;
 	uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) * resize;
-	int i;
+	void *pools = rte_calloc(__func__, 1, mem_size, 0);
 
-	/* Fallback mode has no background thread. Skip the check. */
-	if (!priv->counter_fallback &&
-	    cont != MLX5_CNT_CONTAINER(priv->sh, batch, 1, age)) {
-		/* The last resize still hasn't detected by the host thread. */
-		rte_errno = EAGAIN;
-		return NULL;
-	}
-	new_cont->pools = rte_calloc(__func__, 1, mem_size, 0);
-	if (!new_cont->pools) {
+	if (!pools) {
 		rte_errno = ENOMEM;
-		return NULL;
+		return -ENOMEM;
 	}
-	if (cont->n)
-		memcpy(new_cont->pools, cont->pools, cont->n *
-		       sizeof(struct mlx5_flow_counter_pool *));
+	if (old_pools)
+		memcpy(pools, old_pools, cont->n *
+				       sizeof(struct mlx5_flow_counter_pool *));
 	/*
 	 * Fallback mode query the counter directly, no background query
 	 * resources are needed.
 	 */
 	if (!priv->counter_fallback) {
+		int i;
+
 		mem_mng = flow_dv_create_counter_stat_mem_mng(dev,
-			MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES);
+			  MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES);
 		if (!mem_mng) {
-			rte_free(new_cont->pools);
-			return NULL;
+			rte_free(pools);
+			return -ENOMEM;
 		}
 		for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
 			LIST_INSERT_HEAD(&priv->sh->cmng.free_stat_raws,
 					 mem_mng->raws +
 					 MLX5_CNT_CONTAINER_RESIZE +
 					 i, next);
-	} else {
-		/*
-		 * Release the old container pools directly as no background
-		 * thread helps that.
-		 */
-		rte_free(cont->pools);
 	}
-	new_cont->n = resize;
-	rte_atomic16_set(&new_cont->n_valid, rte_atomic16_read(&cont->n_valid));
-	TAILQ_INIT(&new_cont->pool_list);
-	TAILQ_CONCAT(&new_cont->pool_list, &cont->pool_list, next);
-	new_cont->init_mem_mng = mem_mng;
-	rte_cio_wmb();
-	 /* Flip the master container. */
-	priv->sh->cmng.mhi[batch][age] ^= (uint8_t)1;
-	return new_cont;
+	rte_spinlock_lock(&cont->resize_sl);
+	cont->n = resize;
+	cont->mem_mng = mem_mng;
+	cont->pools = pools;
+	rte_spinlock_unlock(&cont->resize_sl);
+	if (old_pools)
+		rte_free(old_pools);
+	return 0;
 }
 
 /**
@@ -4296,22 +4282,19 @@ struct field_modify_info modify_tcp[] = {
  * @return
  *   The pool container pointer on success, NULL otherwise and rte_errno is set.
  */
-static struct mlx5_pools_container *
+static struct mlx5_flow_counter_pool *
 flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
 		    uint32_t batch, uint32_t age)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_flow_counter_pool *pool;
 	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch,
-							       0, age);
+							       age);
 	int16_t n_valid = rte_atomic16_read(&cont->n_valid);
 	uint32_t size = sizeof(*pool);
 
-	if (cont->n == n_valid) {
-		cont = flow_dv_container_resize(dev, batch, age);
-		if (!cont)
-			return NULL;
-	}
+	if (cont->n == n_valid && flow_dv_container_resize(dev, batch, age))
+		return NULL;
 	size += MLX5_COUNTERS_PER_POOL * CNT_SIZE;
 	size += (batch ? 0 : MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE);
 	size += (!age ? 0 : MLX5_COUNTERS_PER_POOL * AGE_SIZE);
@@ -4322,8 +4305,8 @@ struct field_modify_info modify_tcp[] = {
 	}
 	pool->min_dcs = dcs;
 	if (!priv->counter_fallback)
-		pool->raw = cont->init_mem_mng->raws + n_valid %
-						     MLX5_CNT_CONTAINER_RESIZE;
+		pool->raw = cont->mem_mng->raws + n_valid %
+						      MLX5_CNT_CONTAINER_RESIZE;
 	pool->raw_hw = NULL;
 	pool->type = 0;
 	pool->type |= (batch ? 0 :  CNT_POOL_TYPE_EXT);
@@ -4351,7 +4334,7 @@ struct field_modify_info modify_tcp[] = {
 	/* Pool initialization must be updated before host thread access. */
 	rte_cio_wmb();
 	rte_atomic16_add(&cont->n_valid, 1);
-	return cont;
+	return pool;
 }
 
 /**
@@ -4375,7 +4358,7 @@ struct field_modify_info modify_tcp[] = {
 	struct mlx5_flow_counter_pool *other;
 	struct mlx5_pools_container *cont;
 
-	cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, (age ^ 0x1));
+	cont = MLX5_CNT_CONTAINER(priv->sh, batch, (age ^ 0x1));
 	other = flow_dv_find_pool_by_id(cont, pool->min_dcs->id);
 	if (!other)
 		return;
@@ -4400,10 +4383,10 @@ struct field_modify_info modify_tcp[] = {
  *   Whether the pool is for counter that was allocated for aging.
  *
  * @return
- *   The counter container pointer and @p cnt_free is set on success,
+ *   The counter pool pointer and @p cnt_free is set on success,
  *   NULL otherwise and rte_errno is set.
  */
-static struct mlx5_pools_container *
+static struct mlx5_flow_counter_pool *
 flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
 			     struct mlx5_flow_counter **cnt_free,
 			     uint32_t batch, uint32_t age)
@@ -4415,7 +4398,7 @@ struct field_modify_info modify_tcp[] = {
 	struct mlx5_flow_counter *cnt;
 	uint32_t i;
 
-	cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
+	cont = MLX5_CNT_CONTAINER(priv->sh, batch, age);
 	if (!batch) {
 		/* bulk_bitmap must be 0 for single counter allocation. */
 		dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0);
@@ -4423,12 +4406,11 @@ struct field_modify_info modify_tcp[] = {
 			return NULL;
 		pool = flow_dv_find_pool_by_id(cont, dcs->id);
 		if (!pool) {
-			cont = flow_dv_pool_create(dev, dcs, batch, age);
-			if (!cont) {
+			pool = flow_dv_pool_create(dev, dcs, batch, age);
+			if (!pool) {
 				mlx5_devx_cmd_destroy(dcs);
 				return NULL;
 			}
-			pool = TAILQ_FIRST(&cont->pool_list);
 		} else if (dcs->id < pool->min_dcs->id) {
 			rte_atomic64_set(&pool->a64_dcs,
 					 (int64_t)(uintptr_t)dcs);
@@ -4440,7 +4422,7 @@ struct field_modify_info modify_tcp[] = {
 		TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
 		MLX5_GET_POOL_CNT_EXT(pool, i)->dcs = dcs;
 		*cnt_free = cnt;
-		return cont;
+		return pool;
 	}
 	/* bulk_bitmap is in 128 counters units. */
 	if (priv->config.hca_attr.flow_counter_bulk_alloc_bitmap & 0x4)
@@ -4449,18 +4431,17 @@ struct field_modify_info modify_tcp[] = {
 		rte_errno = ENODATA;
 		return NULL;
 	}
-	cont = flow_dv_pool_create(dev, dcs, batch, age);
-	if (!cont) {
+	pool = flow_dv_pool_create(dev, dcs, batch, age);
+	if (!pool) {
 		mlx5_devx_cmd_destroy(dcs);
 		return NULL;
 	}
-	pool = TAILQ_FIRST(&cont->pool_list);
 	for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
 		cnt = MLX5_POOL_GET_CNT(pool, i);
 		TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
 	}
 	*cnt_free = MLX5_POOL_GET_CNT(pool, 0);
-	return cont;
+	return pool;
 }
 
 /**
@@ -4534,7 +4515,7 @@ struct field_modify_info modify_tcp[] = {
 	 */
 	uint32_t batch = (group && !shared && !priv->counter_fallback) ? 1 : 0;
 	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, batch,
-							       0, age);
+							       age);
 	uint32_t cnt_idx;
 
 	if (!priv->config.devx) {
@@ -4573,10 +4554,9 @@ struct field_modify_info modify_tcp[] = {
 		cnt_free = NULL;
 	}
 	if (!cnt_free) {
-		cont = flow_dv_counter_pool_prepare(dev, &cnt_free, batch, age);
-		if (!cont)
+		pool = flow_dv_counter_pool_prepare(dev, &cnt_free, batch, age);
+		if (!pool)
 			return 0;
-		pool = TAILQ_FIRST(&cont->pool_list);
 	}
 	if (!batch)
 		cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt_free);
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index c403f72..01eec31 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -56,8 +56,7 @@
 			      struct mlx5_flow_counter_pool **ppool)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0,
-									0);
+	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0);
 	struct mlx5_flow_counter_pool *pool;
 
 	idx--;
@@ -152,8 +151,7 @@
 flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0,
-									0);
+	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv->sh, 0, 0);
 	struct mlx5_flow_counter_pool *pool = NULL;
 	struct mlx5_flow_counter_ext *cnt_ext = NULL;
 	struct mlx5_flow_counter *cnt = NULL;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [dpdk-stable] [PATCH] net/mlx5: fix counter container resize
  2020-05-12 12:52 [dpdk-stable] [PATCH] net/mlx5: fix counter container resize Matan Azrad
@ 2020-05-13 15:27 ` Slava Ovsiienko
  2020-05-14  8:11 ` [dpdk-stable] [dpdk-dev] " Raslan Darawsheh
  1 sibling, 0 replies; 3+ messages in thread
From: Slava Ovsiienko @ 2020-05-13 15:27 UTC (permalink / raw)
  To: Matan Azrad, dev; +Cc: stable

> -----Original Message-----
> From: Matan Azrad <matan@mellanox.com>
> Sent: Tuesday, May 12, 2020 15:52
> To: dev@dpdk.org
> Cc: Slava Ovsiienko <viacheslavo@mellanox.com>; stable@dpdk.org
> Subject: [PATCH] net/mlx5: fix counter container resize
> 
> The design of counter container resize used double buffer algorithm in order
> to synchronize between the query thread to the control thread.
> When the control thread detected resize need, it created new bigger buffer
> for the counter pools in a new container and change the container index
> atomically.
> In case the query thread had not detect the previous resize before a new one
> need was detected by the control thread, the control thread returned
> EAGAIN to the flow creation API used a COUNT action.
> 
> The rte_flow API doesn't allow unblocked commands and doesn't expect to
> get EAGAIN error type.
> 
> So, when a lot of flows were created between 2 different periodic queries, 2
> different resizes might try to be created and caused EAGAIN error.
> This behavior may blame flow creations.
> 
> Change the synchronization way to use lock instead of double buffer
> algorithm.
> 
> The critical section of this lock is very small, so flow insertion rate should not
> be decreased.
> 
> Fixes: ebbac312e448 ("net/mlx5: resize a full counter container")
> Cc: stable@dpdk.org
Acked-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>

> 
> Signed-off-by: Matan Azrad <matan@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5.c            |  70 +++++++++++------------
>  drivers/net/mlx5/mlx5.h            |  23 +++++---
>  drivers/net/mlx5/mlx5_flow.c       |  37 ++++---------
>  drivers/net/mlx5/mlx5_flow.h       |   6 --
>  drivers/net/mlx5/mlx5_flow_dv.c    | 110 +++++++++++++++--------------------
> --
>  drivers/net/mlx5/mlx5_flow_verbs.c |   6 +-
>  6 files changed, 104 insertions(+), 148 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> 4f704cb..1445809 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -467,13 +467,13 @@ struct mlx5_flow_id_pool *  static void
> mlx5_flow_counters_mng_init(struct mlx5_ibv_shared *sh)  {
> -	uint8_t i, age;
> +	int i;
> 
> -	sh->cmng.age = 0;
> +	memset(&sh->cmng, 0, sizeof(sh->cmng));
>  	TAILQ_INIT(&sh->cmng.flow_counters);
> -	for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
> -		for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i)
> -			TAILQ_INIT(&sh->cmng.ccont[i][age].pool_list);
> +	for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) {
> +		TAILQ_INIT(&sh->cmng.ccont[i].pool_list);
> +		rte_spinlock_init(&sh->cmng.ccont[i].resize_sl);
>  	}
>  }
> 
> @@ -504,7 +504,7 @@ struct mlx5_flow_id_pool *
> mlx5_flow_counters_mng_close(struct mlx5_ibv_shared *sh)  {
>  	struct mlx5_counter_stats_mem_mng *mng;
> -	uint8_t i, age = 0;
> +	int i;
>  	int j;
>  	int retries = 1024;
> 
> @@ -515,42 +515,34 @@ struct mlx5_flow_id_pool *
>  			break;
>  		rte_pause();
>  	}
> +	for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i) {
> +		struct mlx5_flow_counter_pool *pool;
> +		uint32_t batch = !!(i > 1);
> 
> -	for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
> -		for (i = 0; i < RTE_DIM(sh->cmng.ccont); ++i) {
> -			struct mlx5_flow_counter_pool *pool;
> -			uint32_t batch = !!(i % 2);
> -
> -			if (!sh->cmng.ccont[i][age].pools)
> -				continue;
> -			pool = TAILQ_FIRST(&sh-
> >cmng.ccont[i][age].pool_list);
> -			while (pool) {
> -				if (batch) {
> -					if (pool->min_dcs)
> -						claim_zero
> -						(mlx5_devx_cmd_destroy
> -						(pool->min_dcs));
> -				}
> -				for (j = 0; j < MLX5_COUNTERS_PER_POOL;
> ++j) {
> -					if (MLX5_POOL_GET_CNT(pool, j)-
> >action)
> -						claim_zero
> -						(mlx5_glue-
> >destroy_flow_action
> -						 (MLX5_POOL_GET_CNT
> -						  (pool, j)->action));
> -					if (!batch &&
> MLX5_GET_POOL_CNT_EXT
> -					    (pool, j)->dcs)
> -
> 	claim_zero(mlx5_devx_cmd_destroy
> -
> (MLX5_GET_POOL_CNT_EXT
> -							  (pool, j)->dcs));
> -				}
> -				TAILQ_REMOVE(&sh-
> >cmng.ccont[i][age].pool_list,
> -					pool, next);
> -				rte_free(pool);
> -				pool = TAILQ_FIRST
> -					(&sh->cmng.ccont[i][age].pool_list);
> +		if (!sh->cmng.ccont[i].pools)
> +			continue;
> +		pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
> +		while (pool) {
> +			if (batch && pool->min_dcs)
> +				claim_zero(mlx5_devx_cmd_destroy
> +							       (pool->min_dcs));
> +			for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) {
> +				if (MLX5_POOL_GET_CNT(pool, j)->action)
> +					claim_zero
> +					 (mlx5_glue->destroy_flow_action
> +					  (MLX5_POOL_GET_CNT
> +					  (pool, j)->action));
> +				if (!batch && MLX5_GET_POOL_CNT_EXT
> +				    (pool, j)->dcs)
> +					claim_zero(mlx5_devx_cmd_destroy
> +
> (MLX5_GET_POOL_CNT_EXT
> +						    (pool, j)->dcs));
>  			}
> -			rte_free(sh->cmng.ccont[i][age].pools);
> +			TAILQ_REMOVE(&sh->cmng.ccont[i].pool_list, pool,
> next);
> +			rte_free(pool);
> +			pool = TAILQ_FIRST(&sh->cmng.ccont[i].pool_list);
>  		}
> +		rte_free(sh->cmng.ccont[i].pools);
>  	}
>  	mng = LIST_FIRST(&sh->cmng.mem_mngs);
>  	while (mng) {
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> 1740d4a..d9f5d81 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -227,13 +227,11 @@ struct mlx5_drop {  #define CNTEXT_SIZE
> (sizeof(struct mlx5_flow_counter_ext))  #define AGE_SIZE (sizeof(struct
> mlx5_age_param))
>  #define MLX5_AGING_TIME_DELAY	7
> -
>  #define CNT_POOL_TYPE_EXT	(1 << 0)
>  #define CNT_POOL_TYPE_AGE	(1 << 1)
>  #define IS_EXT_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_EXT)
> #define IS_AGE_POOL(pool) (((pool)->type) & CNT_POOL_TYPE_AGE)
> #define MLX_CNT_IS_AGE(counter) ((counter) & MLX5_CNT_AGE_OFFSET ? 1
> : 0)
> -
>  #define MLX5_CNT_LEN(pool) \
>  	(CNT_SIZE + \
>  	(IS_AGE_POOL(pool) ? AGE_SIZE : 0) + \ @@ -270,6 +268,17 @@
> enum {
>  	AGE_TMOUT, /* Timeout, wait for rte_flow_get_aged_flows and
> destroy. */  };
> 
> +#define MLX5_CNT_CONTAINER(sh, batch, age) (&(sh)->cmng.ccont \
> +					    [(batch) * 2 + (age)])
> +
> +enum {
> +	MLX5_CCONT_TYPE_SINGLE,
> +	MLX5_CCONT_TYPE_SINGLE_FOR_AGE,
> +	MLX5_CCONT_TYPE_BATCH,
> +	MLX5_CCONT_TYPE_BATCH_FOR_AGE,
> +	MLX5_CCONT_TYPE_MAX,
> +};
> +
>  /* Counter age parameter. */
>  struct mlx5_age_param {
>  	rte_atomic16_t state; /**< Age state. */ @@ -313,7 +322,6 @@
> struct mlx5_flow_counter_ext {
>  	};
>  };
> 
> -
>  TAILQ_HEAD(mlx5_counters, mlx5_flow_counter);
> 
>  /* Generic counter pool structure - query is in pool resolution. */ @@ -
> 358,17 +366,16 @@ struct mlx5_counter_stats_raw {  struct
> mlx5_pools_container {
>  	rte_atomic16_t n_valid; /* Number of valid pools. */
>  	uint16_t n; /* Number of pools. */
> +	rte_spinlock_t resize_sl; /* The resize lock. */
>  	struct mlx5_counter_pools pool_list; /* Counter pool list. */
>  	struct mlx5_flow_counter_pool **pools; /* Counter pool array. */
> -	struct mlx5_counter_stats_mem_mng *init_mem_mng;
> +	struct mlx5_counter_stats_mem_mng *mem_mng;
>  	/* Hold the memory management for the next allocated pools raws.
> */  };
> 
>  /* Counter global management structure. */  struct
> mlx5_flow_counter_mng {
> -	uint8_t mhi[2][2]; /* master \ host and age \ no age container index.
> */
> -	struct mlx5_pools_container ccont[2 * 2][2];
> -	/* master \ host and age \ no age pools container. */
> +	struct mlx5_pools_container ccont[MLX5_CCONT_TYPE_MAX];
>  	struct mlx5_counters flow_counters; /* Legacy flow counter list. */
>  	uint8_t pending_queries;
>  	uint8_t batch;
> @@ -378,6 +385,7 @@ struct mlx5_flow_counter_mng {
>  	LIST_HEAD(mem_mngs, mlx5_counter_stats_mem_mng)
> mem_mngs;
>  	LIST_HEAD(stat_raws, mlx5_counter_stats_raw) free_stat_raws;  };
> +
>  #define MLX5_AGE_EVENT_NEW		1
>  #define MLX5_AGE_TRIGGER		2
>  #define MLX5_AGE_SET(age_info, BIT) \
> @@ -393,6 +401,7 @@ struct mlx5_age_info {
>  	struct mlx5_counters aged_counters; /* Aged flow counter list. */
>  	rte_spinlock_t aged_sl; /* Aged flow counter list lock. */  };
> +
>  /* Per port data of shared IB device. */  struct mlx5_ibv_shared_port {
>  	uint32_t ih_port_id;
> diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
> index 08c7cdf..ae478a5 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -5801,16 +5801,11 @@ struct mlx5_meter_domains_infos *  static
> uint32_t  mlx5_get_all_valid_pool_count(struct mlx5_ibv_shared *sh)  {
> -	uint8_t age, i;
> +	int i;
>  	uint32_t pools_n = 0;
> -	struct mlx5_pools_container *cont;
> 
> -	for (age = 0; age < RTE_DIM(sh->cmng.ccont[0]); ++age) {
> -		for (i = 0; i < 2 ; ++i) {
> -			cont = MLX5_CNT_CONTAINER(sh, i, 0, age);
> -			pools_n += rte_atomic16_read(&cont->n_valid);
> -		}
> -	}
> +	for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i)
> +		pools_n += rte_atomic16_read(&sh->cmng.ccont[i].n_valid);
>  	return pools_n;
>  }
> 
> @@ -5855,32 +5850,19 @@ struct mlx5_meter_domains_infos *
>  	uint8_t age = sh->cmng.age;
>  	uint16_t pool_index = sh->cmng.pool_index;
>  	struct mlx5_pools_container *cont;
> -	struct mlx5_pools_container *mcont;
>  	struct mlx5_flow_counter_pool *pool;
> +	int cont_loop = MLX5_CCONT_TYPE_MAX;
> 
>  	if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
>  		goto set_alarm;
>  next_container:
> -	cont = MLX5_CNT_CONTAINER(sh, batch, 1, age);
> -	mcont = MLX5_CNT_CONTAINER(sh, batch, 0, age);
> -	/* Check if resize was done and need to flip a container. */
> -	if (cont != mcont) {
> -		if (cont->pools) {
> -			/* Clean the old container. */
> -			rte_free(cont->pools);
> -			memset(cont, 0, sizeof(*cont));
> -		}
> -		rte_cio_wmb();
> -		 /* Flip the host container. */
> -		sh->cmng.mhi[batch][age] ^= (uint8_t)2;
> -		cont = mcont;
> -	}
> +	cont = MLX5_CNT_CONTAINER(sh, batch, age);
> +	rte_spinlock_lock(&cont->resize_sl);
>  	if (!cont->pools) {
> -		/* 2 empty containers case is unexpected. */
> -		if (unlikely(batch != sh->cmng.batch) &&
> -			unlikely(age != sh->cmng.age)) {
> +		rte_spinlock_unlock(&cont->resize_sl);
> +		/* Check if all the containers are empty. */
> +		if (unlikely(--cont_loop == 0))
>  			goto set_alarm;
> -		}
>  		batch ^= 0x1;
>  		pool_index = 0;
>  		if (batch == 0 && pool_index == 0) {
> @@ -5891,6 +5873,7 @@ struct mlx5_meter_domains_infos *
>  		goto next_container;
>  	}
>  	pool = cont->pools[pool_index];
> +	rte_spinlock_unlock(&cont->resize_sl);
>  	if (pool->raw_hw)
>  		/* There is a pool query in progress. */
>  		goto set_alarm;
> diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> index 7f5e01f..2c96677 100644
> --- a/drivers/net/mlx5/mlx5_flow.h
> +++ b/drivers/net/mlx5/mlx5_flow.h
> @@ -900,12 +900,6 @@ struct mlx5_flow_driver_ops {
>  	mlx5_flow_get_aged_flows_t get_aged_flows;  };
> 
> -
> -#define MLX5_CNT_CONTAINER(sh, batch, thread, age) (&(sh)->cmng.ccont
> \
> -	[(((sh)->cmng.mhi[batch][age] >> (thread)) & 0x1) * 2 + (batch)][age])
> -#define MLX5_CNT_CONTAINER_UNUSED(sh, batch, thread, age) (&(sh)-
> >cmng.ccont \
> -	[(~((sh)->cmng.mhi[batch][age] >> (thread)) & 0x1) * 2 +
> (batch)][age])
> -
>  /* mlx5_flow.c */
> 
>  struct mlx5_flow_id_pool *mlx5_flow_id_pool_alloc(uint32_t max_id); diff -
> -git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
> index 4ebb7ce..c46a589 100644
> --- a/drivers/net/mlx5/mlx5_flow_dv.c
> +++ b/drivers/net/mlx5/mlx5_flow_dv.c
> @@ -4038,7 +4038,7 @@ struct field_modify_info modify_tcp[] = {
>  		idx -= MLX5_CNT_BATCH_OFFSET;
>  		batch = 1;
>  	}
> -	cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
> +	cont = MLX5_CNT_CONTAINER(priv->sh, batch, age);
>  	MLX5_ASSERT(idx / MLX5_COUNTERS_PER_POOL < cont->n);
>  	pool = cont->pools[idx / MLX5_COUNTERS_PER_POOL];
>  	MLX5_ASSERT(pool);
> @@ -4162,69 +4162,55 @@ struct field_modify_info modify_tcp[] = {
>   *   Whether the pool is for Aging counter.
>   *
>   * @return
> - *   The new container pointer on success, otherwise NULL and rte_errno is
> set.
> + *   0 on success, otherwise negative errno value and rte_errno is set.
>   */
> -static struct mlx5_pools_container *
> +static int
>  flow_dv_container_resize(struct rte_eth_dev *dev,
>  				uint32_t batch, uint32_t age)
>  {
>  	struct mlx5_priv *priv = dev->data->dev_private;
> -	struct mlx5_pools_container *cont =
> -			MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
> -	struct mlx5_pools_container *new_cont =
> -			MLX5_CNT_CONTAINER_UNUSED(priv->sh, batch, 0,
> age);
> +	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv-
> >sh, batch,
> +							       age);
>  	struct mlx5_counter_stats_mem_mng *mem_mng = NULL;
> +	void *old_pools = cont->pools;
>  	uint32_t resize = cont->n + MLX5_CNT_CONTAINER_RESIZE;
>  	uint32_t mem_size = sizeof(struct mlx5_flow_counter_pool *) *
> resize;
> -	int i;
> +	void *pools = rte_calloc(__func__, 1, mem_size, 0);
> 
> -	/* Fallback mode has no background thread. Skip the check. */
> -	if (!priv->counter_fallback &&
> -	    cont != MLX5_CNT_CONTAINER(priv->sh, batch, 1, age)) {
> -		/* The last resize still hasn't detected by the host thread. */
> -		rte_errno = EAGAIN;
> -		return NULL;
> -	}
> -	new_cont->pools = rte_calloc(__func__, 1, mem_size, 0);
> -	if (!new_cont->pools) {
> +	if (!pools) {
>  		rte_errno = ENOMEM;
> -		return NULL;
> +		return -ENOMEM;
>  	}
> -	if (cont->n)
> -		memcpy(new_cont->pools, cont->pools, cont->n *
> -		       sizeof(struct mlx5_flow_counter_pool *));
> +	if (old_pools)
> +		memcpy(pools, old_pools, cont->n *
> +				       sizeof(struct mlx5_flow_counter_pool
> *));
>  	/*
>  	 * Fallback mode query the counter directly, no background query
>  	 * resources are needed.
>  	 */
>  	if (!priv->counter_fallback) {
> +		int i;
> +
>  		mem_mng = flow_dv_create_counter_stat_mem_mng(dev,
> -			MLX5_CNT_CONTAINER_RESIZE +
> MLX5_MAX_PENDING_QUERIES);
> +			  MLX5_CNT_CONTAINER_RESIZE +
> MLX5_MAX_PENDING_QUERIES);
>  		if (!mem_mng) {
> -			rte_free(new_cont->pools);
> -			return NULL;
> +			rte_free(pools);
> +			return -ENOMEM;
>  		}
>  		for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
>  			LIST_INSERT_HEAD(&priv->sh-
> >cmng.free_stat_raws,
>  					 mem_mng->raws +
>  					 MLX5_CNT_CONTAINER_RESIZE +
>  					 i, next);
> -	} else {
> -		/*
> -		 * Release the old container pools directly as no background
> -		 * thread helps that.
> -		 */
> -		rte_free(cont->pools);
>  	}
> -	new_cont->n = resize;
> -	rte_atomic16_set(&new_cont->n_valid, rte_atomic16_read(&cont-
> >n_valid));
> -	TAILQ_INIT(&new_cont->pool_list);
> -	TAILQ_CONCAT(&new_cont->pool_list, &cont->pool_list, next);
> -	new_cont->init_mem_mng = mem_mng;
> -	rte_cio_wmb();
> -	 /* Flip the master container. */
> -	priv->sh->cmng.mhi[batch][age] ^= (uint8_t)1;
> -	return new_cont;
> +	rte_spinlock_lock(&cont->resize_sl);
> +	cont->n = resize;
> +	cont->mem_mng = mem_mng;
> +	cont->pools = pools;
> +	rte_spinlock_unlock(&cont->resize_sl);
> +	if (old_pools)
> +		rte_free(old_pools);
> +	return 0;
>  }
> 
>  /**
> @@ -4296,22 +4282,19 @@ struct field_modify_info modify_tcp[] = {
>   * @return
>   *   The pool container pointer on success, NULL otherwise and rte_errno is
> set.
>   */
> -static struct mlx5_pools_container *
> +static struct mlx5_flow_counter_pool *
>  flow_dv_pool_create(struct rte_eth_dev *dev, struct mlx5_devx_obj *dcs,
>  		    uint32_t batch, uint32_t age)
>  {
>  	struct mlx5_priv *priv = dev->data->dev_private;
>  	struct mlx5_flow_counter_pool *pool;
>  	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv-
> >sh, batch,
> -							       0, age);
> +							       age);
>  	int16_t n_valid = rte_atomic16_read(&cont->n_valid);
>  	uint32_t size = sizeof(*pool);
> 
> -	if (cont->n == n_valid) {
> -		cont = flow_dv_container_resize(dev, batch, age);
> -		if (!cont)
> -			return NULL;
> -	}
> +	if (cont->n == n_valid && flow_dv_container_resize(dev, batch, age))
> +		return NULL;
>  	size += MLX5_COUNTERS_PER_POOL * CNT_SIZE;
>  	size += (batch ? 0 : MLX5_COUNTERS_PER_POOL * CNTEXT_SIZE);
>  	size += (!age ? 0 : MLX5_COUNTERS_PER_POOL * AGE_SIZE); @@ -
> 4322,8 +4305,8 @@ struct field_modify_info modify_tcp[] = {
>  	}
>  	pool->min_dcs = dcs;
>  	if (!priv->counter_fallback)
> -		pool->raw = cont->init_mem_mng->raws + n_valid %
> -
> MLX5_CNT_CONTAINER_RESIZE;
> +		pool->raw = cont->mem_mng->raws + n_valid %
> +
> MLX5_CNT_CONTAINER_RESIZE;
>  	pool->raw_hw = NULL;
>  	pool->type = 0;
>  	pool->type |= (batch ? 0 :  CNT_POOL_TYPE_EXT); @@ -4351,7
> +4334,7 @@ struct field_modify_info modify_tcp[] = {
>  	/* Pool initialization must be updated before host thread access. */
>  	rte_cio_wmb();
>  	rte_atomic16_add(&cont->n_valid, 1);
> -	return cont;
> +	return pool;
>  }
> 
>  /**
> @@ -4375,7 +4358,7 @@ struct field_modify_info modify_tcp[] = {
>  	struct mlx5_flow_counter_pool *other;
>  	struct mlx5_pools_container *cont;
> 
> -	cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, (age ^ 0x1));
> +	cont = MLX5_CNT_CONTAINER(priv->sh, batch, (age ^ 0x1));
>  	other = flow_dv_find_pool_by_id(cont, pool->min_dcs->id);
>  	if (!other)
>  		return;
> @@ -4400,10 +4383,10 @@ struct field_modify_info modify_tcp[] = {
>   *   Whether the pool is for counter that was allocated for aging.
>   *
>   * @return
> - *   The counter container pointer and @p cnt_free is set on success,
> + *   The counter pool pointer and @p cnt_free is set on success,
>   *   NULL otherwise and rte_errno is set.
>   */
> -static struct mlx5_pools_container *
> +static struct mlx5_flow_counter_pool *
>  flow_dv_counter_pool_prepare(struct rte_eth_dev *dev,
>  			     struct mlx5_flow_counter **cnt_free,
>  			     uint32_t batch, uint32_t age)
> @@ -4415,7 +4398,7 @@ struct field_modify_info modify_tcp[] = {
>  	struct mlx5_flow_counter *cnt;
>  	uint32_t i;
> 
> -	cont = MLX5_CNT_CONTAINER(priv->sh, batch, 0, age);
> +	cont = MLX5_CNT_CONTAINER(priv->sh, batch, age);
>  	if (!batch) {
>  		/* bulk_bitmap must be 0 for single counter allocation. */
>  		dcs = mlx5_devx_cmd_flow_counter_alloc(priv->sh->ctx, 0);
> @@ -4423,12 +4406,11 @@ struct field_modify_info modify_tcp[] = {
>  			return NULL;
>  		pool = flow_dv_find_pool_by_id(cont, dcs->id);
>  		if (!pool) {
> -			cont = flow_dv_pool_create(dev, dcs, batch, age);
> -			if (!cont) {
> +			pool = flow_dv_pool_create(dev, dcs, batch, age);
> +			if (!pool) {
>  				mlx5_devx_cmd_destroy(dcs);
>  				return NULL;
>  			}
> -			pool = TAILQ_FIRST(&cont->pool_list);
>  		} else if (dcs->id < pool->min_dcs->id) {
>  			rte_atomic64_set(&pool->a64_dcs,
>  					 (int64_t)(uintptr_t)dcs);
> @@ -4440,7 +4422,7 @@ struct field_modify_info modify_tcp[] = {
>  		TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
>  		MLX5_GET_POOL_CNT_EXT(pool, i)->dcs = dcs;
>  		*cnt_free = cnt;
> -		return cont;
> +		return pool;
>  	}
>  	/* bulk_bitmap is in 128 counters units. */
>  	if (priv->config.hca_attr.flow_counter_bulk_alloc_bitmap & 0x4)
> @@ -4449,18 +4431,17 @@ struct field_modify_info modify_tcp[] = {
>  		rte_errno = ENODATA;
>  		return NULL;
>  	}
> -	cont = flow_dv_pool_create(dev, dcs, batch, age);
> -	if (!cont) {
> +	pool = flow_dv_pool_create(dev, dcs, batch, age);
> +	if (!pool) {
>  		mlx5_devx_cmd_destroy(dcs);
>  		return NULL;
>  	}
> -	pool = TAILQ_FIRST(&cont->pool_list);
>  	for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
>  		cnt = MLX5_POOL_GET_CNT(pool, i);
>  		TAILQ_INSERT_HEAD(&pool->counters, cnt, next);
>  	}
>  	*cnt_free = MLX5_POOL_GET_CNT(pool, 0);
> -	return cont;
> +	return pool;
>  }
> 
>  /**
> @@ -4534,7 +4515,7 @@ struct field_modify_info modify_tcp[] = {
>  	 */
>  	uint32_t batch = (group && !shared && !priv->counter_fallback) ? 1 :
> 0;
>  	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv-
> >sh, batch,
> -							       0, age);
> +							       age);
>  	uint32_t cnt_idx;
> 
>  	if (!priv->config.devx) {
> @@ -4573,10 +4554,9 @@ struct field_modify_info modify_tcp[] = {
>  		cnt_free = NULL;
>  	}
>  	if (!cnt_free) {
> -		cont = flow_dv_counter_pool_prepare(dev, &cnt_free,
> batch, age);
> -		if (!cont)
> +		pool = flow_dv_counter_pool_prepare(dev, &cnt_free,
> batch, age);
> +		if (!pool)
>  			return 0;
> -		pool = TAILQ_FIRST(&cont->pool_list);
>  	}
>  	if (!batch)
>  		cnt_ext = MLX5_CNT_TO_CNT_EXT(pool, cnt_free); diff --git
> a/drivers/net/mlx5/mlx5_flow_verbs.c
> b/drivers/net/mlx5/mlx5_flow_verbs.c
> index c403f72..01eec31 100644
> --- a/drivers/net/mlx5/mlx5_flow_verbs.c
> +++ b/drivers/net/mlx5/mlx5_flow_verbs.c
> @@ -56,8 +56,7 @@
>  			      struct mlx5_flow_counter_pool **ppool)  {
>  	struct mlx5_priv *priv = dev->data->dev_private;
> -	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv-
> >sh, 0, 0,
> -									0);
> +	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv-
> >sh, 0,
> +0);
>  	struct mlx5_flow_counter_pool *pool;
> 
>  	idx--;
> @@ -152,8 +151,7 @@
>  flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t
> id)  {
>  	struct mlx5_priv *priv = dev->data->dev_private;
> -	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv-
> >sh, 0, 0,
> -									0);
> +	struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(priv-
> >sh, 0,
> +0);
>  	struct mlx5_flow_counter_pool *pool = NULL;
>  	struct mlx5_flow_counter_ext *cnt_ext = NULL;
>  	struct mlx5_flow_counter *cnt = NULL;
> --
> 1.8.3.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [dpdk-stable] [dpdk-dev] [PATCH] net/mlx5: fix counter container resize
  2020-05-12 12:52 [dpdk-stable] [PATCH] net/mlx5: fix counter container resize Matan Azrad
  2020-05-13 15:27 ` Slava Ovsiienko
@ 2020-05-14  8:11 ` Raslan Darawsheh
  1 sibling, 0 replies; 3+ messages in thread
From: Raslan Darawsheh @ 2020-05-14  8:11 UTC (permalink / raw)
  To: Matan Azrad, dev; +Cc: Slava Ovsiienko, stable

Hi,

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Matan Azrad
> Sent: Tuesday, May 12, 2020 3:52 PM
> To: dev@dpdk.org
> Cc: Slava Ovsiienko <viacheslavo@mellanox.com>; stable@dpdk.org
> Subject: [dpdk-dev] [PATCH] net/mlx5: fix counter container resize
> 
> The design of counter container resize used double buffer algorithm in
> order to synchronize between the query thread to the control thread.
> When the control thread detected resize need, it created new bigger
> buffer for the counter pools in a new container and change the container
> index atomically.
> In case the query thread had not detect the previous resize before a new
> one need was detected by the control thread, the control thread returned
> EAGAIN to the flow creation API used a COUNT action.
> 
> The rte_flow API doesn't allow unblocked commands and doesn't expect to
> get EAGAIN error type.
> 
> So, when a lot of flows were created between 2 different periodic
> queries, 2 different resizes might try to be created and caused EAGAIN
> error.
> This behavior may blame flow creations.
> 
> Change the synchronization way to use lock instead of double buffer
> algorithm.
> 
> The critical section of this lock is very small, so flow insertion
> rate should not be decreased.
> 
> Fixes: ebbac312e448 ("net/mlx5: resize a full counter container")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Matan Azrad <matan@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5.c            |  70 +++++++++++------------
>  drivers/net/mlx5/mlx5.h            |  23 +++++---
>  drivers/net/mlx5/mlx5_flow.c       |  37 ++++---------
>  drivers/net/mlx5/mlx5_flow.h       |   6 --
>  drivers/net/mlx5/mlx5_flow_dv.c    | 110 +++++++++++++++------------------
> ----
>  drivers/net/mlx5/mlx5_flow_verbs.c |   6 +-
>  6 files changed, 104 insertions(+), 148 deletions(-)
> 

Patch applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-05-14  8:11 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-12 12:52 [dpdk-stable] [PATCH] net/mlx5: fix counter container resize Matan Azrad
2020-05-13 15:27 ` Slava Ovsiienko
2020-05-14  8:11 ` [dpdk-stable] [dpdk-dev] " Raslan Darawsheh

patches for DPDK stable branches

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://inbox.dpdk.org/stable/0 stable/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 stable stable/ https://inbox.dpdk.org/stable \
		stable@dpdk.org
	public-inbox-index stable

Example config snippet for mirrors.
Newsgroup available over NNTP:
	nntp://inbox.dpdk.org/inbox.dpdk.stable


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git