* [PATCH22.11 v1] net/mlx5: fix age checking crash
@ 2025-10-28 8:16 Rongwei Liu
2025-10-28 12:13 ` Luca Boccassi
0 siblings, 1 reply; 5+ messages in thread
From: Rongwei Liu @ 2025-10-28 8:16 UTC (permalink / raw)
To: stable, matan, viacheslavo, orika, suanmingm, thomas; +Cc: michaelba
When aging is configured, there is a background thread
which queries all the counters in the pool.
Meantime, per queue flow insertion/deletion/update changes
the counter pool too. It introduces a race condition between
resetting counters's in_used and age_idx fields during flow deletion
and reading them in the background thread.
To resolve it, all key members of counter's struct
are placed in a single uint32_t and they are accessed atomically.
To avoid the occasional timestamp equalization with age_idx,
query_gen_when_free is moved out of the union. The total memory
size is kept the same.
Fixes: 04a4de756e14 ("net/mlx5: support flow age action with HWS")
Cc: michaelba@nvidia.com
Cc: stable@dpdk.org
Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
---
drivers/net/mlx5/mlx5_flow_hw.c | 5 +-
drivers/net/mlx5/mlx5_hws_cnt.c | 8 +--
drivers/net/mlx5/mlx5_hws_cnt.h | 124 +++++++++++++++++++-------------
3 files changed, 82 insertions(+), 55 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index 5fbbd487de..df02bd237c 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -1933,7 +1933,7 @@ flow_hw_shared_action_construct(struct rte_eth_dev *dev, uint32_t queue,
return -1;
if (action_flags & MLX5_FLOW_ACTION_COUNT) {
cnt_queue = mlx5_hws_cnt_get_queue(priv, &queue);
- if (mlx5_hws_cnt_pool_get(priv->hws_cpool, cnt_queue, &age_cnt, idx) < 0)
+ if (mlx5_hws_cnt_pool_get(priv->hws_cpool, cnt_queue, &age_cnt, idx, 1) < 0)
return -1;
flow->cnt_id = age_cnt;
param->nb_cnts++;
@@ -2321,7 +2321,8 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
/* Fall-through. */
case RTE_FLOW_ACTION_TYPE_COUNT:
cnt_queue = mlx5_hws_cnt_get_queue(priv, &queue);
- ret = mlx5_hws_cnt_pool_get(priv->hws_cpool, cnt_queue, &cnt_id, age_idx);
+ ret = mlx5_hws_cnt_pool_get(priv->hws_cpool, cnt_queue, &cnt_id,
+ age_idx, 0);
if (ret != 0) {
rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ACTION,
action, "Failed to allocate flow counter");
diff --git a/drivers/net/mlx5/mlx5_hws_cnt.c b/drivers/net/mlx5/mlx5_hws_cnt.c
index 6a0c371cd9..956ee5a172 100644
--- a/drivers/net/mlx5/mlx5_hws_cnt.c
+++ b/drivers/net/mlx5/mlx5_hws_cnt.c
@@ -72,8 +72,8 @@ __mlx5_hws_cnt_svc(struct mlx5_dev_ctx_shared *sh,
uint32_t ret __rte_unused;
reset_cnt_num = rte_ring_count(reset_list);
- cpool->query_gen++;
mlx5_aso_cnt_query(sh, cpool);
+ __atomic_store_n(&cpool->query_gen, cpool->query_gen + 1, __ATOMIC_RELEASE);
zcdr.n1 = 0;
zcdu.n1 = 0;
ret = rte_ring_enqueue_zc_burst_elem_start(reuse_list,
@@ -143,14 +143,14 @@ mlx5_hws_aging_check(struct mlx5_priv *priv, struct mlx5_hws_cnt_pool *cpool)
uint32_t nb_alloc_cnts = mlx5_hws_cnt_pool_get_size(cpool);
uint16_t expected1 = HWS_AGE_CANDIDATE;
uint16_t expected2 = HWS_AGE_CANDIDATE_INSIDE_RING;
- uint32_t i;
+ uint32_t i, age_idx, in_use;
cpool->time_of_last_age_check = curr_time;
for (i = 0; i < nb_alloc_cnts; ++i) {
- uint32_t age_idx = cpool->pool[i].age_idx;
uint64_t hits;
- if (!cpool->pool[i].in_used || age_idx == 0)
+ mlx5_hws_cnt_get_all(&cpool->pool[i], &in_use, NULL, &age_idx);
+ if (!in_use || age_idx == 0)
continue;
param = mlx5_ipool_get(age_info->ages_ipool, age_idx);
if (unlikely(param == NULL)) {
diff --git a/drivers/net/mlx5/mlx5_hws_cnt.h b/drivers/net/mlx5/mlx5_hws_cnt.h
index 72751f3330..115586fced 100644
--- a/drivers/net/mlx5/mlx5_hws_cnt.h
+++ b/drivers/net/mlx5/mlx5_hws_cnt.h
@@ -42,33 +42,36 @@ struct mlx5_hws_cnt_dcs_mng {
struct mlx5_hws_cnt_dcs dcs[MLX5_HWS_CNT_DCS_NUM];
};
-struct mlx5_hws_cnt {
- struct flow_counter_stats reset;
- bool in_used; /* Indicator whether this counter in used or in pool. */
- union {
- struct {
- uint32_t share:1;
- /*
- * share will be set to 1 when this counter is used as
- * indirect action.
- */
- uint32_t age_idx:24;
- /*
- * When this counter uses for aging, it save the index
- * of AGE parameter. For pure counter (without aging)
- * this index is zero.
- */
- };
- /* This struct is only meaningful when user own this counter. */
- uint32_t query_gen_when_free;
+union mlx5_hws_cnt_state {
+ uint32_t data;
+ struct {
+ uint32_t in_used:1;
+ /* Indicator whether this counter in used or in pool. */
+ uint32_t share:1;
+ /*
+ * share will be set to 1 when this counter is used as
+ * indirect action.
+ */
+ uint32_t age_idx:24;
/*
- * When PMD own this counter (user put back counter to PMD
- * counter pool, i.e), this field recorded value of counter
- * pools query generation at time user release the counter.
+ * When this counter uses for aging, it stores the index
+ * of AGE parameter. Otherwise, this index is zero.
*/
};
};
+struct mlx5_hws_cnt {
+ struct flow_counter_stats reset;
+ union mlx5_hws_cnt_state cnt_state;
+ /* This struct is only meaningful when user own this counter. */
+ uint32_t query_gen_when_free;
+ /*
+ * When PMD own this counter (user put back counter to PMD
+ * counter pool, i.e), this field recorded value of counter
+ * pools query generation at time user release the counter.
+ */
+};
+
struct mlx5_hws_cnt_raw_data_mng {
struct flow_counter_stats *raw;
struct mlx5_pmd_mr mr;
@@ -179,6 +182,42 @@ mlx5_hws_cnt_id_valid(cnt_id_t cnt_id)
MLX5_INDIRECT_ACTION_TYPE_COUNT ? true : false;
}
+static __rte_always_inline void
+mlx5_hws_cnt_set_age_idx(struct mlx5_hws_cnt *cnt, uint32_t value)
+{
+ union mlx5_hws_cnt_state cnt_state;
+
+ cnt_state.data = __atomic_load_n(&cnt->cnt_state.data, __ATOMIC_ACQUIRE);
+ cnt_state.age_idx = value;
+ __atomic_store_n(&cnt->cnt_state.data, cnt_state.data, __ATOMIC_RELEASE);
+}
+
+static __rte_always_inline void
+mlx5_hws_cnt_set_all(struct mlx5_hws_cnt *cnt, uint32_t in_used, uint32_t share, uint32_t age_idx)
+{
+ union mlx5_hws_cnt_state cnt_state;
+
+ cnt_state.in_used = !!in_used;
+ cnt_state.share = !!share;
+ cnt_state.age_idx = age_idx;
+ __atomic_store_n(&cnt->cnt_state.data, cnt_state.data, __ATOMIC_RELAXED);
+}
+
+static __rte_always_inline void
+mlx5_hws_cnt_get_all(struct mlx5_hws_cnt *cnt, uint32_t *in_used, uint32_t *share,
+ uint32_t *age_idx)
+{
+ union mlx5_hws_cnt_state cnt_state;
+
+ cnt_state.data = __atomic_load_n(&cnt->cnt_state.data, __ATOMIC_ACQUIRE);
+ if (in_used != NULL)
+ *in_used = cnt_state.in_used;
+ if (share != NULL)
+ *share = cnt_state.share;
+ if (age_idx != NULL)
+ *age_idx = cnt_state.age_idx;
+}
+
/**
* Generate Counter id from internal index.
*
@@ -402,8 +441,7 @@ mlx5_hws_cnt_pool_put(struct mlx5_hws_cnt_pool *cpool, uint32_t *queue,
uint32_t iidx;
iidx = mlx5_hws_cnt_iidx(cpool, *cnt_id);
- MLX5_ASSERT(cpool->pool[iidx].in_used);
- cpool->pool[iidx].in_used = false;
+ mlx5_hws_cnt_set_all(&cpool->pool[iidx], 0, 0, 0);
cpool->pool[iidx].query_gen_when_free =
__atomic_load_n(&cpool->query_gen, __ATOMIC_RELAXED);
if (likely(queue != NULL))
@@ -459,7 +497,7 @@ mlx5_hws_cnt_pool_put(struct mlx5_hws_cnt_pool *cpool, uint32_t *queue,
*/
static __rte_always_inline int
mlx5_hws_cnt_pool_get(struct mlx5_hws_cnt_pool *cpool, uint32_t *queue,
- cnt_id_t *cnt_id, uint32_t age_idx)
+ cnt_id_t *cnt_id, uint32_t age_idx, uint32_t shared)
{
unsigned int ret;
struct rte_ring_zc_data zcdc = {0};
@@ -486,9 +524,7 @@ mlx5_hws_cnt_pool_get(struct mlx5_hws_cnt_pool *cpool, uint32_t *queue,
__hws_cnt_query_raw(cpool, *cnt_id,
&cpool->pool[iidx].reset.hits,
&cpool->pool[iidx].reset.bytes);
- MLX5_ASSERT(!cpool->pool[iidx].in_used);
- cpool->pool[iidx].in_used = true;
- cpool->pool[iidx].age_idx = age_idx;
+ mlx5_hws_cnt_set_all(&cpool->pool[iidx], 1, shared, age_idx);
return 0;
}
ret = rte_ring_dequeue_zc_burst_elem_start(qcache, sizeof(cnt_id_t), 1,
@@ -526,10 +562,7 @@ mlx5_hws_cnt_pool_get(struct mlx5_hws_cnt_pool *cpool, uint32_t *queue,
__hws_cnt_query_raw(cpool, *cnt_id, &cpool->pool[iidx].reset.hits,
&cpool->pool[iidx].reset.bytes);
rte_ring_dequeue_zc_elem_finish(qcache, 1);
- cpool->pool[iidx].share = 0;
- MLX5_ASSERT(!cpool->pool[iidx].in_used);
- cpool->pool[iidx].in_used = true;
- cpool->pool[iidx].age_idx = age_idx;
+ mlx5_hws_cnt_set_all(&cpool->pool[iidx], 1, shared, age_idx);
return 0;
}
@@ -582,23 +615,12 @@ static __rte_always_inline int
mlx5_hws_cnt_shared_get(struct mlx5_hws_cnt_pool *cpool, cnt_id_t *cnt_id,
uint32_t age_idx)
{
- int ret;
- uint32_t iidx;
-
- ret = mlx5_hws_cnt_pool_get(cpool, NULL, cnt_id, age_idx);
- if (ret != 0)
- return ret;
- iidx = mlx5_hws_cnt_iidx(cpool, *cnt_id);
- cpool->pool[iidx].share = 1;
- return 0;
+ return mlx5_hws_cnt_pool_get(cpool, NULL, cnt_id, age_idx, 1);
}
static __rte_always_inline void
mlx5_hws_cnt_shared_put(struct mlx5_hws_cnt_pool *cpool, cnt_id_t *cnt_id)
{
- uint32_t iidx = mlx5_hws_cnt_iidx(cpool, *cnt_id);
-
- cpool->pool[iidx].share = 0;
mlx5_hws_cnt_pool_put(cpool, NULL, cnt_id);
}
@@ -606,8 +628,10 @@ static __rte_always_inline bool
mlx5_hws_cnt_is_shared(struct mlx5_hws_cnt_pool *cpool, cnt_id_t cnt_id)
{
uint32_t iidx = mlx5_hws_cnt_iidx(cpool, cnt_id);
+ uint32_t share;
- return cpool->pool[iidx].share ? true : false;
+ mlx5_hws_cnt_get_all(&cpool->pool[iidx], NULL, &share, NULL);
+ return !!share;
}
static __rte_always_inline void
@@ -616,17 +640,19 @@ mlx5_hws_cnt_age_set(struct mlx5_hws_cnt_pool *cpool, cnt_id_t cnt_id,
{
uint32_t iidx = mlx5_hws_cnt_iidx(cpool, cnt_id);
- MLX5_ASSERT(cpool->pool[iidx].share);
- cpool->pool[iidx].age_idx = age_idx;
+ MLX5_ASSERT(cpool->pool[iidx].cnt_state.share);
+ mlx5_hws_cnt_set_age_idx(&cpool->pool[iidx], age_idx);
}
static __rte_always_inline uint32_t
mlx5_hws_cnt_age_get(struct mlx5_hws_cnt_pool *cpool, cnt_id_t cnt_id)
{
uint32_t iidx = mlx5_hws_cnt_iidx(cpool, cnt_id);
+ uint32_t age_idx, share;
- MLX5_ASSERT(cpool->pool[iidx].share);
- return cpool->pool[iidx].age_idx;
+ mlx5_hws_cnt_get_all(&cpool->pool[iidx], NULL, &share, &age_idx);
+ MLX5_ASSERT(share);
+ return age_idx;
}
static __rte_always_inline cnt_id_t
--
2.27.0
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH22.11 v1] net/mlx5: fix age checking crash
2025-10-28 8:16 [PATCH22.11 v1] net/mlx5: fix age checking crash Rongwei Liu
@ 2025-10-28 12:13 ` Luca Boccassi
2025-10-28 13:10 ` Thomas Monjalon
0 siblings, 1 reply; 5+ messages in thread
From: Luca Boccassi @ 2025-10-28 12:13 UTC (permalink / raw)
To: Rongwei Liu
Cc: stable, matan, viacheslavo, orika, suanmingm, thomas, michaelba
On Tue, 28 Oct 2025 at 08:17, Rongwei Liu <rongweil@nvidia.com> wrote:
>
> When aging is configured, there is a background thread
> which queries all the counters in the pool.
>
> Meantime, per queue flow insertion/deletion/update changes
> the counter pool too. It introduces a race condition between
> resetting counters's in_used and age_idx fields during flow deletion
> and reading them in the background thread.
>
> To resolve it, all key members of counter's struct
> are placed in a single uint32_t and they are accessed atomically.
>
> To avoid the occasional timestamp equalization with age_idx,
> query_gen_when_free is moved out of the union. The total memory
> size is kept the same.
>
> Fixes: 04a4de756e14 ("net/mlx5: support flow age action with HWS")
> Cc: michaelba@nvidia.com
> Cc: stable@dpdk.org
> Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
Hi,
I cannot seem to find a reference to this patch on the main branch?
Was it merged with a different commit?
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH22.11 v1] net/mlx5: fix age checking crash
2025-10-28 12:13 ` Luca Boccassi
@ 2025-10-28 13:10 ` Thomas Monjalon
2025-10-28 14:48 ` Luca Boccassi
0 siblings, 1 reply; 5+ messages in thread
From: Thomas Monjalon @ 2025-10-28 13:10 UTC (permalink / raw)
To: Rongwei Liu, Luca Boccassi
Cc: stable, matan, viacheslavo, orika, suanmingm, michaelba
28/10/2025 13:13, Luca Boccassi:
> On Tue, 28 Oct 2025 at 08:17, Rongwei Liu <rongweil@nvidia.com> wrote:
> >
> > When aging is configured, there is a background thread
> > which queries all the counters in the pool.
> >
> > Meantime, per queue flow insertion/deletion/update changes
> > the counter pool too. It introduces a race condition between
> > resetting counters's in_used and age_idx fields during flow deletion
> > and reading them in the background thread.
> >
> > To resolve it, all key members of counter's struct
> > are placed in a single uint32_t and they are accessed atomically.
> >
> > To avoid the occasional timestamp equalization with age_idx,
> > query_gen_when_free is moved out of the union. The total memory
> > size is kept the same.
> >
> > Fixes: 04a4de756e14 ("net/mlx5: support flow age action with HWS")
> > Cc: michaelba@nvidia.com
> > Cc: stable@dpdk.org
> > Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
>
> Hi,
>
> I cannot seem to find a reference to this patch on the main branch?
> Was it merged with a different commit?
It seems Rongwei didn't reuse the title updated while merging in main:
https://git.dpdk.org/dpdk/commit/?id=820ca7361bb
Note the new title is more precise, please use it:
net/mlx5: fix flow aging race condition
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH22.11 v1] net/mlx5: fix age checking crash
2025-10-28 13:10 ` Thomas Monjalon
@ 2025-10-28 14:48 ` Luca Boccassi
2025-10-29 1:48 ` rongwei liu
0 siblings, 1 reply; 5+ messages in thread
From: Luca Boccassi @ 2025-10-28 14:48 UTC (permalink / raw)
To: Thomas Monjalon
Cc: Rongwei Liu, stable, matan, viacheslavo, orika, suanmingm, michaelba
On Tue, 28 Oct 2025 at 13:10, Thomas Monjalon <thomas@monjalon.net> wrote:
>
> 28/10/2025 13:13, Luca Boccassi:
> > On Tue, 28 Oct 2025 at 08:17, Rongwei Liu <rongweil@nvidia.com> wrote:
> > >
> > > When aging is configured, there is a background thread
> > > which queries all the counters in the pool.
> > >
> > > Meantime, per queue flow insertion/deletion/update changes
> > > the counter pool too. It introduces a race condition between
> > > resetting counters's in_used and age_idx fields during flow deletion
> > > and reading them in the background thread.
> > >
> > > To resolve it, all key members of counter's struct
> > > are placed in a single uint32_t and they are accessed atomically.
> > >
> > > To avoid the occasional timestamp equalization with age_idx,
> > > query_gen_when_free is moved out of the union. The total memory
> > > size is kept the same.
> > >
> > > Fixes: 04a4de756e14 ("net/mlx5: support flow age action with HWS")
> > > Cc: michaelba@nvidia.com
> > > Cc: stable@dpdk.org
> > > Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
> >
> > Hi,
> >
> > I cannot seem to find a reference to this patch on the main branch?
> > Was it merged with a different commit?
>
> It seems Rongwei didn't reuse the title updated while merging in main:
>
> https://git.dpdk.org/dpdk/commit/?id=820ca7361bb
>
> Note the new title is more precise, please use it:
> net/mlx5: fix flow aging race condition
Sounds good, thanks
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH22.11 v1] net/mlx5: fix age checking crash
2025-10-28 14:48 ` Luca Boccassi
@ 2025-10-29 1:48 ` rongwei liu
0 siblings, 0 replies; 5+ messages in thread
From: rongwei liu @ 2025-10-29 1:48 UTC (permalink / raw)
To: Luca Boccassi, Thomas Monjalon
Cc: stable, matan, viacheslavo, orika, suanmingm, michaelba
Thanks Thomas.
Hi Luca: I just noticed the patch title was changed somehow when merging.
Sorry for the confusion.
On 2025/10/28 22:48, Luca Boccassi wrote:
> On Tue, 28 Oct 2025 at 13:10, Thomas Monjalon <thomas@monjalon.net> wrote:
>>
>> 28/10/2025 13:13, Luca Boccassi:
>>> On Tue, 28 Oct 2025 at 08:17, Rongwei Liu <rongweil@nvidia.com> wrote:
>>>>
>>>> When aging is configured, there is a background thread
>>>> which queries all the counters in the pool.
>>>>
>>>> Meantime, per queue flow insertion/deletion/update changes
>>>> the counter pool too. It introduces a race condition between
>>>> resetting counters's in_used and age_idx fields during flow deletion
>>>> and reading them in the background thread.
>>>>
>>>> To resolve it, all key members of counter's struct
>>>> are placed in a single uint32_t and they are accessed atomically.
>>>>
>>>> To avoid the occasional timestamp equalization with age_idx,
>>>> query_gen_when_free is moved out of the union. The total memory
>>>> size is kept the same.
>>>>
>>>> Fixes: 04a4de756e14 ("net/mlx5: support flow age action with HWS")
>>>> Cc: michaelba@nvidia.com
>>>> Cc: stable@dpdk.org
>>>> Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
>>>
>>> Hi,
>>>
>>> I cannot seem to find a reference to this patch on the main branch?
>>> Was it merged with a different commit?
>>
>> It seems Rongwei didn't reuse the title updated while merging in main:
>>
>> https://git.dpdk.org/dpdk/commit/?id=820ca7361bb
>>
>> Note the new title is more precise, please use it:
>> net/mlx5: fix flow aging race condition
>
> Sounds good, thanks
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2025-10-29 1:49 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-10-28 8:16 [PATCH22.11 v1] net/mlx5: fix age checking crash Rongwei Liu
2025-10-28 12:13 ` Luca Boccassi
2025-10-28 13:10 ` Thomas Monjalon
2025-10-28 14:48 ` Luca Boccassi
2025-10-29 1:48 ` rongwei liu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).