patches for DPDK stable branches
 help / color / mirror / Atom feed
* [PATCH] net/mlx5: fix the Rx queue control management
@ 2024-11-04 16:15 Bing Zhao
  2024-11-13 13:38 ` Raslan Darawsheh
  0 siblings, 1 reply; 2+ messages in thread
From: Bing Zhao @ 2024-11-04 16:15 UTC (permalink / raw)
  To: dsosnowski, viacheslavo, dev, rasland; +Cc: orika, suanmingm, matan, stable

With the shared Rx queue feature introduced, the control and private
Rx queue structures are decoupled, each control structure can be
shared for multiple queue for all representors inside a domain.

So it should be only managed by the shared context instead of any
private data of each device. The previous workaround is using a flag
to check the owner (allocator) of the structure and handle it only
on that device closing stage.

A proper formal solution is to add a reference count for each control
structure and only free the structure when there is no reference to
it to get rid of the UAF issue.

Fixes: f957ac996435 ("net/mlx5: workaround list management of Rx queue control")
Fixes: bcc220cb57d7 ("net/mlx5: fix shared Rx queue list management")
CC: stable@dpdk.org

Signed-off-by: Bing Zhao <bingz@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Acked-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
---
 drivers/net/mlx5/mlx5.h      |  1 -
 drivers/net/mlx5/mlx5_flow.c |  4 ++--
 drivers/net/mlx5/mlx5_rx.h   |  3 +--
 drivers/net/mlx5/mlx5_rxq.c  | 20 ++++++++++----------
 4 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index b6be4646ef..6db02da3b8 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1978,7 +1978,6 @@ struct mlx5_priv {
 	uint32_t ctrl_flows; /* Control flow rules. */
 	rte_spinlock_t flow_list_lock;
 	struct mlx5_obj_ops obj_ops; /* HW objects operations. */
-	LIST_HEAD(rxq, mlx5_rxq_ctrl) rxqsctrl; /* DPDK Rx queues. */
 	LIST_HEAD(rxqobj, mlx5_rxq_obj) rxqsobj; /* Verbs/DevX Rx queues. */
 	struct mlx5_list *hrxqs; /* Hash Rx queues. */
 	LIST_HEAD(txq, mlx5_txq_ctrl) txqsctrl; /* DPDK Tx queues. */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 9c43201e05..acae2bb063 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1648,13 +1648,13 @@ flow_rxq_mark_flag_set(struct rte_eth_dev *dev)
 			    opriv->domain_id != priv->domain_id ||
 			    opriv->mark_enabled)
 				continue;
-			LIST_FOREACH(rxq_ctrl, &opriv->rxqsctrl, next) {
+			LIST_FOREACH(rxq_ctrl, &opriv->sh->shared_rxqs, share_entry) {
 				rxq_ctrl->rxq.mark = 1;
 			}
 			opriv->mark_enabled = 1;
 		}
 	} else {
-		LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
+		LIST_FOREACH(rxq_ctrl, &priv->sh->shared_rxqs, share_entry) {
 			rxq_ctrl->rxq.mark = 1;
 		}
 		priv->mark_enabled = 1;
diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index 9bcb43b007..da7c448948 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -151,13 +151,13 @@ struct __rte_cache_aligned mlx5_rxq_data {
 /* RX queue control descriptor. */
 struct mlx5_rxq_ctrl {
 	struct mlx5_rxq_data rxq; /* Data path structure. */
-	LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */
 	LIST_HEAD(priv, mlx5_rxq_priv) owners; /* Owner rxq list. */
 	struct mlx5_rxq_obj *obj; /* Verbs/DevX elements. */
 	struct mlx5_dev_ctx_shared *sh; /* Shared context. */
 	bool is_hairpin; /* Whether RxQ type is Hairpin. */
 	unsigned int socket; /* CPU socket ID for allocations. */
 	LIST_ENTRY(mlx5_rxq_ctrl) share_entry; /* Entry in shared RXQ list. */
+	RTE_ATOMIC(uint32_t) ctrl_ref; /* Reference counter. */
 	uint32_t share_group; /* Group ID of shared RXQ. */
 	uint16_t share_qid; /* Shared RxQ ID in group. */
 	unsigned int started:1; /* Whether (shared) RXQ has been started. */
@@ -173,7 +173,6 @@ struct mlx5_rxq_ctrl {
 /* RX queue private data. */
 struct mlx5_rxq_priv {
 	uint16_t idx; /* Queue index. */
-	bool possessor; /* Shared rxq_ctrl allocated for the 1st time. */
 	RTE_ATOMIC(uint32_t) refcnt; /* Reference counter. */
 	struct mlx5_rxq_ctrl *ctrl; /* Shared Rx Queue. */
 	LIST_ENTRY(mlx5_rxq_priv) owner_entry; /* Entry in shared rxq_ctrl. */
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 5eac224b76..d437835b73 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -946,7 +946,6 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			rte_errno = ENOMEM;
 			return -rte_errno;
 		}
-		rxq->possessor = true;
 	}
 	rxq->priv = priv;
 	rxq->idx = idx;
@@ -954,6 +953,7 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	/* Join owner list. */
 	LIST_INSERT_HEAD(&rxq_ctrl->owners, rxq, owner_entry);
 	rxq->ctrl = rxq_ctrl;
+	rte_atomic_fetch_add_explicit(&rxq_ctrl->ctrl_ref, 1, rte_memory_order_relaxed);
 	mlx5_rxq_ref(dev, idx);
 	DRV_LOG(DEBUG, "port %u adding Rx queue %u to list",
 		dev->data->port_id, idx);
@@ -1970,9 +1970,9 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		tmpl->rxq.shared = 1;
 		tmpl->share_group = conf->share_group;
 		tmpl->share_qid = conf->share_qid;
-		LIST_INSERT_HEAD(&priv->sh->shared_rxqs, tmpl, share_entry);
 	}
-	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
+	LIST_INSERT_HEAD(&priv->sh->shared_rxqs, tmpl, share_entry);
+	rte_atomic_store_explicit(&tmpl->ctrl_ref, 1, rte_memory_order_relaxed);
 	return tmpl;
 error:
 	mlx5_mr_btree_free(&tmpl->rxq.mr_ctrl.cache_bh);
@@ -2024,9 +2024,9 @@ mlx5_rxq_hairpin_new(struct rte_eth_dev *dev, struct mlx5_rxq_priv *rxq,
 	tmpl->rxq.mr_ctrl.cache_bh = (struct mlx5_mr_btree) { 0 };
 	tmpl->rxq.idx = idx;
 	rxq->hairpin_conf = *hairpin_conf;
-	rxq->possessor = true;
 	mlx5_rxq_ref(dev, idx);
-	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
+	LIST_INSERT_HEAD(&priv->sh->shared_rxqs, tmpl, share_entry);
+	rte_atomic_store_explicit(&tmpl->ctrl_ref, 1, rte_memory_order_relaxed);
 	return tmpl;
 }
 
@@ -2292,16 +2292,16 @@ mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx)
 					RTE_ETH_QUEUE_STATE_STOPPED;
 		}
 	} else { /* Refcnt zero, closing device. */
-		if (rxq->possessor)
-			LIST_REMOVE(rxq_ctrl, next);
 		LIST_REMOVE(rxq, owner_entry);
 		if (LIST_EMPTY(&rxq_ctrl->owners)) {
 			if (!rxq_ctrl->is_hairpin)
 				mlx5_mr_btree_free
 					(&rxq_ctrl->rxq.mr_ctrl.cache_bh);
-			if (rxq_ctrl->rxq.shared)
+			if (rte_atomic_fetch_sub_explicit(&rxq_ctrl->ctrl_ref, 1,
+			    rte_memory_order_relaxed) == 1) {
 				LIST_REMOVE(rxq_ctrl, share_entry);
-			mlx5_free(rxq_ctrl);
+				mlx5_free(rxq_ctrl);
+			}
 		}
 		dev->data->rx_queues[idx] = NULL;
 		mlx5_free(rxq);
@@ -2326,7 +2326,7 @@ mlx5_rxq_verify(struct rte_eth_dev *dev)
 	struct mlx5_rxq_ctrl *rxq_ctrl;
 	int ret = 0;
 
-	LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
+	LIST_FOREACH(rxq_ctrl, &priv->sh->shared_rxqs, share_entry) {
 		DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced",
 			dev->data->port_id, rxq_ctrl->rxq.idx);
 		++ret;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] net/mlx5: fix the Rx queue control management
  2024-11-04 16:15 [PATCH] net/mlx5: fix the Rx queue control management Bing Zhao
@ 2024-11-13 13:38 ` Raslan Darawsheh
  0 siblings, 0 replies; 2+ messages in thread
From: Raslan Darawsheh @ 2024-11-13 13:38 UTC (permalink / raw)
  To: Bing Zhao, Dariusz Sosnowski, Slava Ovsiienko, dev
  Cc: Ori Kam, Suanming Mou, Matan Azrad, stable

Hi,

From: Bing Zhao <bingz@nvidia.com>
Sent: Monday, November 4, 2024 6:15 PM
To: Dariusz Sosnowski; Slava Ovsiienko; dev@dpdk.org; Raslan Darawsheh
Cc: Ori Kam; Suanming Mou; Matan Azrad; stable@dpdk.org
Subject: [PATCH] net/mlx5: fix the Rx queue control management

With the shared Rx queue feature introduced, the control and private
Rx queue structures are decoupled, each control structure can be
shared for multiple queue for all representors inside a domain.

So it should be only managed by the shared context instead of any
private data of each device. The previous workaround is using a flag
to check the owner (allocator) of the structure and handle it only
on that device closing stage.

A proper formal solution is to add a reference count for each control
structure and only free the structure when there is no reference to
it to get rid of the UAF issue.

Fixes: f957ac996435 ("net/mlx5: workaround list management of Rx queue control")
Fixes: bcc220cb57d7 ("net/mlx5: fix shared Rx queue list management")
CC: stable@dpdk.org

Signed-off-by: Bing Zhao <bingz@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Acked-by: Dariusz Sosnowski <dsosnowski@nvidia.com>

Patch applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-11-13 13:38 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-11-04 16:15 [PATCH] net/mlx5: fix the Rx queue control management Bing Zhao
2024-11-13 13:38 ` Raslan Darawsheh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).