[PATCH] net/mlx5: fix counter cache starvation

DPDK patches and discussions
 help / color / mirror / Atom feed

From: Dariusz Sosnowski <dsosnowski@nvidia.com>
To: Viacheslav Ovsiienko <viacheslavo@nvidia.com>,
	Ori Kam <orika@nvidia.com>,  Suanming Mou <suanmingm@nvidia.com>,
	Matan Azrad <matan@nvidia.com>, "Xiaoyu Min" <jackmin@nvidia.com>
Cc: <dev@dpdk.org>, <stable@dpdk.org>, Bing Zhao <bingz@nvidia.com>
Subject: [PATCH] net/mlx5: fix counter cache starvation
Date: Wed, 28 Feb 2024 20:06:06 +0100	[thread overview]
Message-ID: <20240228190607.187958-1-dsosnowski@nvidia.com> (raw)

mlx5 PMD maintains a global counter pool and per-queue counter cache,
which are used to allocate COUNT flow action objects.
Whenever an empty cache is accessed, it is replenished
with a pre-defined number of counters.

If number of configured counters was sufficiently small, then
it might have happened that caches associated with some queues
could get starved because all counters were fetched on other queues.

This patch fixes that by disabling cache at runtime
if number of configured counters is not sufficient to avoid
such starvation.

Fixes: 4d368e1da3a4 ("net/mlx5: support flow counter action for HWS")
Cc: jackmin@nvidia.com
Cc: stable@dpdk.org

Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
Acked-by: Ori Kam <orika@nvidia.com>
Acked-by: Bing Zhao <bingz@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow_hw.c |  6 +--
 drivers/net/mlx5/mlx5_hws_cnt.c | 72 ++++++++++++++++++++++++---------
 drivers/net/mlx5/mlx5_hws_cnt.h | 25 +++++++++---
 3 files changed, 74 insertions(+), 29 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index f778fd0698..8ba3b3321e 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -3117,8 +3117,7 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
 				break;
 			/* Fall-through. */
 		case RTE_FLOW_ACTION_TYPE_COUNT:
-			/* If the port is engaged in resource sharing, do not use queue cache. */
-			cnt_queue = mlx5_hws_cnt_is_pool_shared(priv) ? NULL : &queue;
+			cnt_queue = mlx5_hws_cnt_get_queue(priv, &queue);
 			ret = mlx5_hws_cnt_pool_get(priv->hws_cpool, cnt_queue, &cnt_id, age_idx);
 			if (ret != 0)
 				return ret;
@@ -3757,8 +3756,7 @@ flow_hw_age_count_release(struct mlx5_priv *priv, uint32_t queue,
 		}
 		return;
 	}
-	/* If the port is engaged in resource sharing, do not use queue cache. */
-	cnt_queue = mlx5_hws_cnt_is_pool_shared(priv) ? NULL : &queue;
+	cnt_queue = mlx5_hws_cnt_get_queue(priv, &queue);
 	/* Put the counter first to reduce the race risk in BG thread. */
 	mlx5_hws_cnt_pool_put(priv->hws_cpool, cnt_queue, &flow->cnt_id);
 	flow->cnt_id = 0;
diff --git a/drivers/net/mlx5/mlx5_hws_cnt.c b/drivers/net/mlx5/mlx5_hws_cnt.c
index a3bea94811..c31f2f380b 100644
--- a/drivers/net/mlx5/mlx5_hws_cnt.c
+++ b/drivers/net/mlx5/mlx5_hws_cnt.c
@@ -340,6 +340,55 @@ mlx5_hws_cnt_pool_deinit(struct mlx5_hws_cnt_pool * const cntp)
 	mlx5_free(cntp);
 }
 
+static bool
+mlx5_hws_cnt_should_enable_cache(const struct mlx5_hws_cnt_pool_cfg *pcfg,
+				 const struct mlx5_hws_cache_param *ccfg)
+{
+	/*
+	 * Enable cache if and only if there are enough counters requested
+	 * to populate all of the caches.
+	 */
+	return pcfg->request_num >= ccfg->q_num * ccfg->size;
+}
+
+static struct mlx5_hws_cnt_pool_caches *
+mlx5_hws_cnt_cache_init(const struct mlx5_hws_cnt_pool_cfg *pcfg,
+			const struct mlx5_hws_cache_param *ccfg)
+{
+	struct mlx5_hws_cnt_pool_caches *cache;
+	char mz_name[RTE_MEMZONE_NAMESIZE];
+	uint32_t qidx;
+
+	/* If counter pool is big enough, setup the counter pool cache. */
+	cache = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO,
+			sizeof(*cache) +
+			sizeof(((struct mlx5_hws_cnt_pool_caches *)0)->qcache[0])
+				* ccfg->q_num, 0, SOCKET_ID_ANY);
+	if (cache == NULL)
+		return NULL;
+	/* Store the necessary cache parameters. */
+	cache->fetch_sz = ccfg->fetch_sz;
+	cache->preload_sz = ccfg->preload_sz;
+	cache->threshold = ccfg->threshold;
+	cache->q_num = ccfg->q_num;
+	for (qidx = 0; qidx < ccfg->q_num; qidx++) {
+		snprintf(mz_name, sizeof(mz_name), "%s_qc/%x", pcfg->name, qidx);
+		cache->qcache[qidx] = rte_ring_create(mz_name, ccfg->size,
+				SOCKET_ID_ANY,
+				RING_F_SP_ENQ | RING_F_SC_DEQ |
+				RING_F_EXACT_SZ);
+		if (cache->qcache[qidx] == NULL)
+			goto error;
+	}
+	return cache;
+
+error:
+	while (qidx--)
+		rte_ring_free(cache->qcache[qidx]);
+	mlx5_free(cache);
+	return NULL;
+}
+
 static struct mlx5_hws_cnt_pool *
 mlx5_hws_cnt_pool_init(struct mlx5_dev_ctx_shared *sh,
 		       const struct mlx5_hws_cnt_pool_cfg *pcfg,
@@ -348,7 +397,6 @@ mlx5_hws_cnt_pool_init(struct mlx5_dev_ctx_shared *sh,
 	char mz_name[RTE_MEMZONE_NAMESIZE];
 	struct mlx5_hws_cnt_pool *cntp;
 	uint64_t cnt_num = 0;
-	uint32_t qidx;
 
 	MLX5_ASSERT(pcfg);
 	MLX5_ASSERT(ccfg);
@@ -360,17 +408,6 @@ mlx5_hws_cnt_pool_init(struct mlx5_dev_ctx_shared *sh,
 	cntp->cfg = *pcfg;
 	if (cntp->cfg.host_cpool)
 		return cntp;
-	cntp->cache = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO,
-			sizeof(*cntp->cache) +
-			sizeof(((struct mlx5_hws_cnt_pool_caches *)0)->qcache[0])
-				* ccfg->q_num, 0, SOCKET_ID_ANY);
-	if (cntp->cache == NULL)
-		goto error;
-	 /* store the necessary cache parameters. */
-	cntp->cache->fetch_sz = ccfg->fetch_sz;
-	cntp->cache->preload_sz = ccfg->preload_sz;
-	cntp->cache->threshold = ccfg->threshold;
-	cntp->cache->q_num = ccfg->q_num;
 	if (pcfg->request_num > sh->hws_max_nb_counters) {
 		DRV_LOG(ERR, "Counter number %u "
 			"is greater than the maximum supported (%u).",
@@ -418,13 +455,10 @@ mlx5_hws_cnt_pool_init(struct mlx5_dev_ctx_shared *sh,
 		DRV_LOG(ERR, "failed to create reuse list ring");
 		goto error;
 	}
-	for (qidx = 0; qidx < ccfg->q_num; qidx++) {
-		snprintf(mz_name, sizeof(mz_name), "%s_qc/%x", pcfg->name, qidx);
-		cntp->cache->qcache[qidx] = rte_ring_create(mz_name, ccfg->size,
-				SOCKET_ID_ANY,
-				RING_F_SP_ENQ | RING_F_SC_DEQ |
-				RING_F_EXACT_SZ);
-		if (cntp->cache->qcache[qidx] == NULL)
+	/* Allocate counter cache only if needed. */
+	if (mlx5_hws_cnt_should_enable_cache(pcfg, ccfg)) {
+		cntp->cache = mlx5_hws_cnt_cache_init(pcfg, ccfg);
+		if (cntp->cache == NULL)
 			goto error;
 	}
 	/* Initialize the time for aging-out calculation. */
diff --git a/drivers/net/mlx5/mlx5_hws_cnt.h b/drivers/net/mlx5/mlx5_hws_cnt.h
index 585b5a83ad..e00596088f 100644
--- a/drivers/net/mlx5/mlx5_hws_cnt.h
+++ b/drivers/net/mlx5/mlx5_hws_cnt.h
@@ -557,19 +557,32 @@ mlx5_hws_cnt_pool_get(struct mlx5_hws_cnt_pool *cpool, uint32_t *queue,
 }
 
 /**
- * Check if counter pool allocated for HWS is shared between ports.
+ * Decide if the given queue can be used to perform counter allocation/deallcation
+ * based on counter configuration
  *
  * @param[in] priv
  *   Pointer to the port private data structure.
+ * @param[in] queue
+ *   Pointer to the queue index.
  *
  * @return
- *   True if counter pools is shared between ports. False otherwise.
+ *   @p queue if cache related to the queue can be used. NULL otherwise.
  */
-static __rte_always_inline bool
-mlx5_hws_cnt_is_pool_shared(struct mlx5_priv *priv)
+static __rte_always_inline uint32_t *
+mlx5_hws_cnt_get_queue(struct mlx5_priv *priv, uint32_t *queue)
 {
-	return priv && priv->hws_cpool &&
-	    (priv->shared_refcnt || priv->hws_cpool->cfg.host_cpool != NULL);
+	if (priv && priv->hws_cpool) {
+		/* Do not use queue cache if counter pool is shared. */
+		if (priv->shared_refcnt || priv->hws_cpool->cfg.host_cpool != NULL)
+			return NULL;
+		/* Do not use queue cache if counter cache is disabled. */
+		if (priv->hws_cpool->cache == NULL)
+			return NULL;
+		return queue;
+	}
+	/* This case should not be reached if counter pool was successfully configured. */
+	MLX5_ASSERT(false);
+	return NULL;
 }
 
 static __rte_always_inline unsigned int
-- 
2.39.2

next             reply	other threads:[~2024-02-28 19:06 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-28 19:06 Dariusz Sosnowski [this message]
2024-02-29 13:35 ` Raslan Darawsheh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240228190607.187958-1-dsosnowski@nvidia.com \
    --to=dsosnowski@nvidia.com \
    --cc=bingz@nvidia.com \
    --cc=dev@dpdk.org \
    --cc=jackmin@nvidia.com \
    --cc=matan@nvidia.com \
    --cc=orika@nvidia.com \
    --cc=stable@dpdk.org \
    --cc=suanmingm@nvidia.com \
    --cc=viacheslavo@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).