patches for DPDK stable branches
 help / color / mirror / Atom feed
* [PATCH 22.11] net/mlx5: fix async flow create error handling
@ 2024-04-03  8:38 Dariusz Sosnowski
  2024-04-03 11:15 ` Luca Boccassi
  0 siblings, 1 reply; 5+ messages in thread
From: Dariusz Sosnowski @ 2024-04-03  8:38 UTC (permalink / raw)
  To: Matan Azrad, Viacheslav Ovsiienko, Suanming Mou; +Cc: stable, Ori Kam

[ upstream commit 5ecc8df4fad3411a53c20406f99b59dc736a6d1e ]

Whenever processing of asynchronous flow rule create operation failed,
but after some dynamic flow actions had already been allocated,
these actions were not freed during error handling flow.
That behavior lead to leaks e.g., RSS/QUEUE action objects were leaked
which triggered assertions during device cleanup.

This patch adds flow rule cleanup handling in case of an error
during async flow rule creation.

Fixes: 3a2f674b6aa8 ("net/mlx5: add queue and RSS HW steering action")
Cc: stable@dpdk.org

Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
Acked-by: Ori Kam <orika@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow_hw.c | 71 +++++++++++++++++++++++----------
 1 file changed, 51 insertions(+), 20 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index f4e125667f..87d29ec0da 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -72,6 +72,10 @@ flow_hw_set_vlan_vid_construct(struct rte_eth_dev *dev,
 static __rte_always_inline uint32_t flow_hw_tx_tag_regc_mask(struct rte_eth_dev *dev);
 static __rte_always_inline uint32_t flow_hw_tx_tag_regc_value(struct rte_eth_dev *dev);
 
+static void
+flow_hw_age_count_release(struct mlx5_priv *priv, uint32_t queue, struct rte_flow_hw *flow,
+			  struct rte_flow_error *error);
+
 const struct mlx5_flow_driver_ops mlx5_flow_hw_drv_ops;
 
 /* DR action flags with different table. */
@@ -2082,6 +2086,30 @@ flow_hw_modify_field_construct(struct mlx5_hw_q_job *job,
 	return 0;
 }
 
+/**
+ * Release any actions allocated for the flow rule during actions construction.
+ *
+ * @param[in] flow
+ *   Pointer to flow structure.
+ */
+static void
+flow_hw_release_actions(struct rte_eth_dev *dev,
+			uint32_t queue,
+			struct rte_flow_hw *flow)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_mtr_pool *pool = priv->hws_mpool;
+
+	if (flow->fate_type == MLX5_FLOW_FATE_JUMP)
+		flow_hw_jump_release(dev, flow->jump);
+	else if (flow->fate_type == MLX5_FLOW_FATE_QUEUE)
+		mlx5_hrxq_obj_release(dev, flow->hrxq);
+	if (mlx5_hws_cnt_id_valid(flow->cnt_id))
+		flow_hw_age_count_release(priv, queue, flow, NULL);
+	if (flow->mtr_id)
+		mlx5_ipool_free(pool->idx_pool, flow->mtr_id);
+}
+
 /**
  * Construct flow action array.
  *
@@ -2191,7 +2219,7 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
 					(dev, queue, action, table, it_idx,
 					 at->action_flags, job->flow,
 					 &rule_acts[act_data->action_dst]))
-				return -1;
+				goto error;
 			break;
 		case RTE_FLOW_ACTION_TYPE_VOID:
 			break;
@@ -2211,7 +2239,7 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
 			jump = flow_hw_jump_action_register
 				(dev, &table->cfg, jump_group, NULL);
 			if (!jump)
-				return -1;
+				goto error;
 			rule_acts[act_data->action_dst].action =
 			(!!attr.group) ? jump->hws_action : jump->root_action;
 			job->flow->jump = jump;
@@ -2223,7 +2251,7 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
 					ft_flag,
 					action);
 			if (!hrxq)
-				return -1;
+				goto error;
 			rule_acts[act_data->action_dst].action = hrxq->action;
 			job->flow->hrxq = hrxq;
 			job->flow->fate_type = MLX5_FLOW_FATE_QUEUE;
@@ -2233,19 +2261,19 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
 			if (flow_hw_shared_action_get
 				(dev, act_data, item_flags,
 				 &rule_acts[act_data->action_dst]))
-				return -1;
+				goto error;
 			break;
 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
 			enc_item = ((const struct rte_flow_action_vxlan_encap *)
 				   action->conf)->definition;
 			if (flow_dv_convert_encap_data(enc_item, buf, &encap_len, NULL))
-				return -1;
+				goto error;
 			break;
 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
 			enc_item = ((const struct rte_flow_action_nvgre_encap *)
 				   action->conf)->definition;
 			if (flow_dv_convert_encap_data(enc_item, buf, &encap_len, NULL))
-				return -1;
+				goto error;
 			break;
 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
 			raw_encap_data =
@@ -2267,12 +2295,12 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
 								     hw_acts,
 								     action);
 			if (ret)
-				return -1;
+				goto error;
 			break;
 		case RTE_FLOW_ACTION_TYPE_REPRESENTED_PORT:
 			port_action = action->conf;
 			if (!priv->hw_vport[port_action->port_id])
-				return -1;
+				goto error;
 			rule_acts[act_data->action_dst].action =
 					priv->hw_vport[port_action->port_id];
 			break;
@@ -2287,7 +2315,7 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
 			jump = flow_hw_jump_action_register
 				(dev, &table->cfg, aso_mtr->fm.group, NULL);
 			if (!jump)
-				return -1;
+				goto error;
 			MLX5_ASSERT
 				(!rule_acts[act_data->action_dst + 1].action);
 			rule_acts[act_data->action_dst + 1].action =
@@ -2296,7 +2324,7 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
 			job->flow->jump = jump;
 			job->flow->fate_type = MLX5_FLOW_FATE_JUMP;
 			if (mlx5_aso_mtr_wait(priv->sh, MLX5_HW_INV_QUEUE, aso_mtr))
-				return -1;
+				goto error;
 			break;
 		case RTE_FLOW_ACTION_TYPE_AGE:
 			age = action->conf;
@@ -2311,7 +2339,7 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
 							     job->flow->idx,
 							     error);
 			if (age_idx == 0)
-				return -rte_errno;
+				goto error;
 			job->flow->age_idx = age_idx;
 			if (at->action_flags & MLX5_FLOW_ACTION_INDIRECT_COUNT)
 				/*
@@ -2325,7 +2353,7 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
 			cnt_queue = mlx5_hws_cnt_get_queue(priv, &queue);
 			ret = mlx5_hws_cnt_pool_get(priv->hws_cpool, cnt_queue, &cnt_id, age_idx);
 			if (ret != 0)
-				return ret;
+				goto error;
 			ret = mlx5_hws_cnt_pool_get_action_offset
 				(priv->hws_cpool,
 				 cnt_id,
@@ -2333,7 +2361,7 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
 				 &rule_acts[act_data->action_dst].counter.offset
 				 );
 			if (ret != 0)
-				return ret;
+				goto error;
 			job->flow->cnt_id = cnt_id;
 			break;
 		case MLX5_RTE_FLOW_ACTION_TYPE_COUNT:
@@ -2344,7 +2372,7 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
 				 &rule_acts[act_data->action_dst].counter.offset
 				 );
 			if (ret != 0)
-				return ret;
+				goto error;
 			job->flow->cnt_id = act_data->shared_counter.id;
 			break;
 		case RTE_FLOW_ACTION_TYPE_CONNTRACK:
@@ -2352,7 +2380,7 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
 				 ((uint32_t)(uintptr_t)action->conf);
 			if (flow_hw_ct_compile(dev, queue, ct_idx,
 					       &rule_acts[act_data->action_dst]))
-				return -1;
+				goto error;
 			break;
 		case MLX5_RTE_FLOW_ACTION_TYPE_METER_MARK:
 			mtr_id = act_data->shared_meter.id &
@@ -2360,7 +2388,7 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
 			/* Find ASO object. */
 			aso_mtr = mlx5_ipool_get(pool->idx_pool, mtr_id);
 			if (!aso_mtr)
-				return -1;
+				goto error;
 			rule_acts[act_data->action_dst].action =
 							pool->action;
 			rule_acts[act_data->action_dst].aso_meter.offset =
@@ -2378,7 +2406,7 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
 				act_data->action_dst, action,
 				rule_acts, &job->flow->mtr_id, MLX5_HW_INV_QUEUE);
 			if (ret != 0)
-				return ret;
+				goto error;
 			break;
 		default:
 			break;
@@ -2411,6 +2439,11 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
 	if (mlx5_hws_cnt_id_valid(hw_acts->cnt_id))
 		job->flow->cnt_id = hw_acts->cnt_id;
 	return 0;
+
+error:
+	flow_hw_release_actions(dev, queue, job->flow);
+	rte_errno = EINVAL;
+	return -rte_errno;
 }
 
 static const struct rte_flow_item *
@@ -2553,10 +2586,8 @@ flow_hw_async_flow_create(struct rte_eth_dev *dev,
 	if (flow_hw_actions_construct(dev, job,
 				      &table->ats[action_template_index],
 				      pattern_template_index, actions,
-				      rule_acts, queue, error)) {
-		rte_errno = EINVAL;
+				      rule_acts, queue, error))
 		goto free;
-	}
 	rule_items = flow_hw_get_rule_items(dev, table, items,
 					    pattern_template_index, job);
 	if (!rule_items)
-- 
2.39.2


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 22.11] net/mlx5: fix async flow create error handling
  2024-04-03  8:38 [PATCH 22.11] net/mlx5: fix async flow create error handling Dariusz Sosnowski
@ 2024-04-03 11:15 ` Luca Boccassi
  2024-04-03 13:50   ` Dariusz Sosnowski
  0 siblings, 1 reply; 5+ messages in thread
From: Luca Boccassi @ 2024-04-03 11:15 UTC (permalink / raw)
  To: Dariusz Sosnowski, Matan Azrad, Viacheslav Ovsiienko, Suanming Mou
  Cc: stable, Ori Kam

On Wed, 2024-04-03 at 10:38 +0200, Dariusz Sosnowski wrote:
> [ upstream commit 5ecc8df4fad3411a53c20406f99b59dc736a6d1e ]
> 
> Whenever processing of asynchronous flow rule create operation failed,
> but after some dynamic flow actions had already been allocated,
> these actions were not freed during error handling flow.
> That behavior lead to leaks e.g., RSS/QUEUE action objects were leaked
> which triggered assertions during device cleanup.
> 
> This patch adds flow rule cleanup handling in case of an error
> during async flow rule creation.
> 
> Fixes: 3a2f674b6aa8 ("net/mlx5: add queue and RSS HW steering action")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
> Acked-by: Ori Kam <orika@nvidia.com>
> 

This patch does not apply on stable/22.11, how was it tested?

-- 
Kind regards,
Luca Boccassi

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH 22.11] net/mlx5: fix async flow create error handling
  2024-04-03 11:15 ` Luca Boccassi
@ 2024-04-03 13:50   ` Dariusz Sosnowski
  2024-04-03 15:34     ` Luca Boccassi
  0 siblings, 1 reply; 5+ messages in thread
From: Dariusz Sosnowski @ 2024-04-03 13:50 UTC (permalink / raw)
  To: Luca Boccassi; +Cc: stable, Ori Kam, Matan Azrad, Slava Ovsiienko, Suanming Mou

> -----Original Message-----
> From: Luca Boccassi <bluca@debian.org>
> Sent: Wednesday, April 3, 2024 13:16
> To: Dariusz Sosnowski <dsosnowski@nvidia.com>; Matan Azrad
> <matan@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>; Suanming
> Mou <suanmingm@nvidia.com>
> Cc: stable@dpdk.org; Ori Kam <orika@nvidia.com>
> Subject: Re: [PATCH 22.11] net/mlx5: fix async flow create error handling
> 
> External email: Use caution opening links or attachments
> 
> 
> On Wed, 2024-04-03 at 10:38 +0200, Dariusz Sosnowski wrote:
> > [ upstream commit 5ecc8df4fad3411a53c20406f99b59dc736a6d1e ]
> >
> > Whenever processing of asynchronous flow rule create operation failed,
> > but after some dynamic flow actions had already been allocated, these
> > actions were not freed during error handling flow.
> > That behavior lead to leaks e.g., RSS/QUEUE action objects were leaked
> > which triggered assertions during device cleanup.
> >
> > This patch adds flow rule cleanup handling in case of an error during
> > async flow rule creation.
> >
> > Fixes: 3a2f674b6aa8 ("net/mlx5: add queue and RSS HW steering action")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
> > Acked-by: Ori Kam <orika@nvidia.com>
> >
> 
> This patch does not apply on stable/22.11, how was it tested?

The backports were tested today by applying on 22.11 branch on git@github.com:bluca/dpdk-stable.git

Could you please try applying mlx5 patches sent by me in the following order?

1. "net/mlx5: fix flow configure validation"
2. "net/mlx5: fix rollback on failed flow configure"
3. "net/mlx5: fix async flow create error handling"

There might be some conflicts between these patches, maybe that's the reason for failure. I'm sorry about not mentioning this.
I retested applying them on commit efe1d783f474, in that order and it applied correctly on my side.

Best regards,
Dariusz Sosnowski

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 22.11] net/mlx5: fix async flow create error handling
  2024-04-03 13:50   ` Dariusz Sosnowski
@ 2024-04-03 15:34     ` Luca Boccassi
  2024-04-03 18:09       ` Dariusz Sosnowski
  0 siblings, 1 reply; 5+ messages in thread
From: Luca Boccassi @ 2024-04-03 15:34 UTC (permalink / raw)
  To: Dariusz Sosnowski
  Cc: stable, Ori Kam, Matan Azrad, Slava Ovsiienko, Suanming Mou

On Wed, 3 Apr 2024 at 14:50, Dariusz Sosnowski <dsosnowski@nvidia.com> wrote:
>
> > -----Original Message-----
> > From: Luca Boccassi <bluca@debian.org>
> > Sent: Wednesday, April 3, 2024 13:16
> > To: Dariusz Sosnowski <dsosnowski@nvidia.com>; Matan Azrad
> > <matan@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>; Suanming
> > Mou <suanmingm@nvidia.com>
> > Cc: stable@dpdk.org; Ori Kam <orika@nvidia.com>
> > Subject: Re: [PATCH 22.11] net/mlx5: fix async flow create error handling
> >
> > External email: Use caution opening links or attachments
> >
> >
> > On Wed, 2024-04-03 at 10:38 +0200, Dariusz Sosnowski wrote:
> > > [ upstream commit 5ecc8df4fad3411a53c20406f99b59dc736a6d1e ]
> > >
> > > Whenever processing of asynchronous flow rule create operation failed,
> > > but after some dynamic flow actions had already been allocated, these
> > > actions were not freed during error handling flow.
> > > That behavior lead to leaks e.g., RSS/QUEUE action objects were leaked
> > > which triggered assertions during device cleanup.
> > >
> > > This patch adds flow rule cleanup handling in case of an error during
> > > async flow rule creation.
> > >
> > > Fixes: 3a2f674b6aa8 ("net/mlx5: add queue and RSS HW steering action")
> > > Cc: stable@dpdk.org
> > >
> > > Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
> > > Acked-by: Ori Kam <orika@nvidia.com>
> > >
> >
> > This patch does not apply on stable/22.11, how was it tested?
>
> The backports were tested today by applying on 22.11 branch on git@github.com:bluca/dpdk-stable.git
>
> Could you please try applying mlx5 patches sent by me in the following order?
>
> 1. "net/mlx5: fix flow configure validation"
> 2. "net/mlx5: fix rollback on failed flow configure"
> 3. "net/mlx5: fix async flow create error handling"
>
> There might be some conflicts between these patches, maybe that's the reason for failure. I'm sorry about not mentioning this.
> I retested applying them on commit efe1d783f474, in that order and it applied correctly on my side.

Thanks, that works, my email client showed me the 3rd one as the first
mail, so when downloading the mbox with all 3 it failed. Next time
please use a series so that you know the order you meant is the one we
see too.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH 22.11] net/mlx5: fix async flow create error handling
  2024-04-03 15:34     ` Luca Boccassi
@ 2024-04-03 18:09       ` Dariusz Sosnowski
  0 siblings, 0 replies; 5+ messages in thread
From: Dariusz Sosnowski @ 2024-04-03 18:09 UTC (permalink / raw)
  To: Luca Boccassi; +Cc: stable, Ori Kam, Matan Azrad, Slava Ovsiienko, Suanming Mou

> -----Original Message-----
> From: Luca Boccassi <bluca@debian.org>
> Sent: Wednesday, April 3, 2024 17:34
> To: Dariusz Sosnowski <dsosnowski@nvidia.com>
> Cc: stable@dpdk.org; Ori Kam <orika@nvidia.com>; Matan Azrad
> <matan@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>; Suanming
> Mou <suanmingm@nvidia.com>
> Subject: Re: [PATCH 22.11] net/mlx5: fix async flow create error handling
> 
> External email: Use caution opening links or attachments
> 
> 
> On Wed, 3 Apr 2024 at 14:50, Dariusz Sosnowski <dsosnowski@nvidia.com>
> wrote:
> >
> > > -----Original Message-----
> > > From: Luca Boccassi <bluca@debian.org>
> > > Sent: Wednesday, April 3, 2024 13:16
> > > To: Dariusz Sosnowski <dsosnowski@nvidia.com>; Matan Azrad
> > > <matan@nvidia.com>; Slava Ovsiienko <viacheslavo@nvidia.com>;
> > > Suanming Mou <suanmingm@nvidia.com>
> > > Cc: stable@dpdk.org; Ori Kam <orika@nvidia.com>
> > > Subject: Re: [PATCH 22.11] net/mlx5: fix async flow create error
> > > handling
> > >
> > > External email: Use caution opening links or attachments
> > >
> > >
> > > On Wed, 2024-04-03 at 10:38 +0200, Dariusz Sosnowski wrote:
> > > > [ upstream commit 5ecc8df4fad3411a53c20406f99b59dc736a6d1e ]
> > > >
> > > > Whenever processing of asynchronous flow rule create operation
> > > > failed, but after some dynamic flow actions had already been
> > > > allocated, these actions were not freed during error handling flow.
> > > > That behavior lead to leaks e.g., RSS/QUEUE action objects were
> > > > leaked which triggered assertions during device cleanup.
> > > >
> > > > This patch adds flow rule cleanup handling in case of an error
> > > > during async flow rule creation.
> > > >
> > > > Fixes: 3a2f674b6aa8 ("net/mlx5: add queue and RSS HW steering
> > > > action")
> > > > Cc: stable@dpdk.org
> > > >
> > > > Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
> > > > Acked-by: Ori Kam <orika@nvidia.com>
> > > >
> > >
> > > This patch does not apply on stable/22.11, how was it tested?
> >
> > The backports were tested today by applying on 22.11 branch on
> > git@github.com:bluca/dpdk-stable.git
> >
> > Could you please try applying mlx5 patches sent by me in the following
> order?
> >
> > 1. "net/mlx5: fix flow configure validation"
> > 2. "net/mlx5: fix rollback on failed flow configure"
> > 3. "net/mlx5: fix async flow create error handling"
> >
> > There might be some conflicts between these patches, maybe that's the
> reason for failure. I'm sorry about not mentioning this.
> > I retested applying them on commit efe1d783f474, in that order and it
> applied correctly on my side.
> 
> Thanks, that works, my email client showed me the 3rd one as the first mail,
> so when downloading the mbox with all 3 it failed. Next time please use a
> series so that you know the order you meant is the one we see too.

Of course, will do. Thank you.

Best regards,
Dariusz Sosnowski

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-04-03 18:09 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-03  8:38 [PATCH 22.11] net/mlx5: fix async flow create error handling Dariusz Sosnowski
2024-04-03 11:15 ` Luca Boccassi
2024-04-03 13:50   ` Dariusz Sosnowski
2024-04-03 15:34     ` Luca Boccassi
2024-04-03 18:09       ` Dariusz Sosnowski

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).