patches for DPDK stable branches
 help / color / mirror / Atom feed
From: Gregory Etelson <getelson@nvidia.com>
To: <dev@dpdk.org>
Cc: <getelson@nvidia.com>, <mkashani@nvidia.com>,
	<rasland@nvidia.com>,
	Viacheslav Ovsiienko <viacheslavo@nvidia.com>, <stable@dpdk.org>,
	"Dariusz Sosnowski" <dsosnowski@nvidia.com>,
	Bing Zhao <bingz@nvidia.com>, Ori Kam <orika@nvidia.com>,
	Suanming Mou <suanmingm@nvidia.com>,
	Matan Azrad <matan@nvidia.com>, Xueming Li <xuemingl@nvidia.com>
Subject: [PATCH 2/3] net/mlx5: fix control flow leakage for external SQ
Date: Wed, 29 Oct 2025 17:57:09 +0200	[thread overview]
Message-ID: <20251029155711.169580-2-getelson@nvidia.com> (raw)
In-Reply-To: <20251029155711.169580-1-getelson@nvidia.com>

From: Viacheslav Ovsiienko <viacheslavo@nvidia.com>

There is the private API rte_pmd_mlx5_external_sq_enable(),
that allows application to create the Send Queue (SQ) on its own
and then enable this queue usage as "external SQ".

On this enabling call some implicit flows are created to provide
compliant SQs behavior - copy metadata register, forward queue
originated packet to correct VF, etc.

These implicit flows are marked as "external" ones, and there is
no cleanup on device start and stop for this kind of flows.
Also, PMD has no knowledge if external SQ is still in use by
application and implicit cleanup can not be performed.

As a result, on multiple device start/stop cycles application
re-creates and re-enables many external SQs, causing implicit
flow tables overflow.

To resolve this issue the rte_pmd_mlx5_external_sq_disable()
API is provided, that allows to application to notify PMD
the external SQ is not in usage anymore and related implicit
flows can be dismissed.

Fixes: 26e1eaf2dac4 ("net/mlx5: support device control for E-Switch default rule")
Cc: stable@dpdk.org

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Acked-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.h    |  12 ++--
 drivers/net/mlx5/mlx5_flow_hw.c | 106 +++++++++++++++++++++++++++++++-
 drivers/net/mlx5/mlx5_trigger.c |   2 +-
 drivers/net/mlx5/mlx5_txq.c     |  55 +++++++++++++++--
 drivers/net/mlx5/rte_pmd_mlx5.h |  18 ++++++
 5 files changed, 181 insertions(+), 12 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 07d2f4185c..adfe84ef54 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -3580,12 +3580,16 @@ int mlx5_flow_hw_flush_ctrl_flows(struct rte_eth_dev *dev);
 int mlx5_flow_hw_esw_create_sq_miss_flow(struct rte_eth_dev *dev,
 					 uint32_t sqn, bool external);
 int mlx5_flow_hw_esw_destroy_sq_miss_flow(struct rte_eth_dev *dev,
-					  uint32_t sqn);
+					  uint32_t sqn, bool external);
 int mlx5_flow_hw_esw_create_default_jump_flow(struct rte_eth_dev *dev);
 int mlx5_flow_hw_create_tx_default_mreg_copy_flow(struct rte_eth_dev *dev,
-						  uint32_t sqn,
-						  bool external);
-int mlx5_flow_hw_tx_repr_matching_flow(struct rte_eth_dev *dev, uint32_t sqn, bool external);
+						  uint32_t sqn, bool external);
+int mlx5_flow_hw_destroy_tx_default_mreg_copy_flow(struct rte_eth_dev *dev,
+						   uint32_t sqn, bool external);
+int mlx5_flow_hw_create_tx_repr_matching_flow(struct rte_eth_dev *dev,
+					      uint32_t sqn, bool external);
+int mlx5_flow_hw_destroy_tx_repr_matching_flow(struct rte_eth_dev *dev,
+					       uint32_t sqn, bool external);
 int mlx5_flow_hw_lacp_rx_flow(struct rte_eth_dev *dev);
 int mlx5_flow_actions_validate(struct rte_eth_dev *dev,
 		const struct rte_flow_actions_template_attr *attr,
diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index d945c88eb0..eb3dcce59d 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -15897,7 +15897,7 @@ flow_hw_is_matching_sq_miss_flow(struct mlx5_ctrl_flow_entry *cf,
 }
 
 int
-mlx5_flow_hw_esw_destroy_sq_miss_flow(struct rte_eth_dev *dev, uint32_t sqn)
+mlx5_flow_hw_esw_destroy_sq_miss_flow(struct rte_eth_dev *dev, uint32_t sqn, bool external)
 {
 	uint16_t port_id = dev->data->port_id;
 	uint16_t proxy_port_id = dev->data->port_id;
@@ -15924,7 +15924,8 @@ mlx5_flow_hw_esw_destroy_sq_miss_flow(struct rte_eth_dev *dev, uint32_t sqn)
 	    !proxy_priv->hw_ctrl_fdb->hw_esw_sq_miss_root_tbl ||
 	    !proxy_priv->hw_ctrl_fdb->hw_esw_sq_miss_tbl)
 		return 0;
-	cf = LIST_FIRST(&proxy_priv->hw_ctrl_flows);
+	cf = external ? LIST_FIRST(&proxy_priv->hw_ext_ctrl_flows) :
+			LIST_FIRST(&proxy_priv->hw_ctrl_flows);
 	while (cf != NULL) {
 		cf_next = LIST_NEXT(cf, next);
 		if (flow_hw_is_matching_sq_miss_flow(cf, dev, sqn)) {
@@ -16058,8 +16059,58 @@ mlx5_flow_hw_create_tx_default_mreg_copy_flow(struct rte_eth_dev *dev, uint32_t
 					items, 0, copy_reg_action, 0, &flow_info, external);
 }
 
+static bool
+flow_hw_is_matching_tx_mreg_copy_flow(struct mlx5_ctrl_flow_entry *cf,
+				      struct rte_eth_dev *dev,
+				      uint32_t sqn)
+{
+	if (cf->owner_dev != dev)
+		return false;
+	if (cf->info.type == MLX5_CTRL_FLOW_TYPE_TX_META_COPY && cf->info.tx_repr_sq == sqn)
+		return true;
+	return false;
+}
+
+int
+mlx5_flow_hw_destroy_tx_default_mreg_copy_flow(struct rte_eth_dev *dev, uint32_t sqn, bool external)
+{
+	uint16_t port_id = dev->data->port_id;
+	uint16_t proxy_port_id = dev->data->port_id;
+	struct rte_eth_dev *proxy_dev;
+	struct mlx5_priv *proxy_priv;
+	struct mlx5_ctrl_flow_entry *cf;
+	struct mlx5_ctrl_flow_entry *cf_next;
+	int ret;
+
+	ret = rte_flow_pick_transfer_proxy(port_id, &proxy_port_id, NULL);
+	if (ret) {
+		DRV_LOG(ERR, "Unable to pick transfer proxy port for port %u. Transfer proxy "
+			     "port must be present for default SQ miss flow rules to exist.",
+			     port_id);
+		return ret;
+	}
+	proxy_dev = &rte_eth_devices[proxy_port_id];
+	proxy_priv = proxy_dev->data->dev_private;
+	if (!proxy_priv->dr_ctx ||
+	    !proxy_priv->hw_ctrl_fdb ||
+	    !proxy_priv->hw_ctrl_fdb->hw_tx_meta_cpy_tbl)
+		return 0;
+	cf = external ? LIST_FIRST(&proxy_priv->hw_ext_ctrl_flows) :
+			LIST_FIRST(&proxy_priv->hw_ctrl_flows);
+	while (cf != NULL) {
+		cf_next = LIST_NEXT(cf, next);
+		if (flow_hw_is_matching_tx_mreg_copy_flow(cf, dev, sqn)) {
+			claim_zero(flow_hw_destroy_ctrl_flow(proxy_dev, cf->flow));
+			LIST_REMOVE(cf, next);
+			mlx5_free(cf);
+		}
+		cf = cf_next;
+	}
+	return 0;
+}
+
 int
-mlx5_flow_hw_tx_repr_matching_flow(struct rte_eth_dev *dev, uint32_t sqn, bool external)
+mlx5_flow_hw_create_tx_repr_matching_flow(struct rte_eth_dev *dev, uint32_t sqn, bool external)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	struct mlx5_rte_flow_item_sq sq_spec = {
@@ -16116,6 +16167,55 @@ mlx5_flow_hw_tx_repr_matching_flow(struct rte_eth_dev *dev, uint32_t sqn, bool e
 					items, 0, actions, 0, &flow_info, external);
 }
 
+static bool
+flow_hw_is_tx_matching_repr_matching_flow(struct mlx5_ctrl_flow_entry *cf,
+					  struct rte_eth_dev *dev,
+					  uint32_t sqn)
+{
+	if (cf->owner_dev != dev)
+		return false;
+	if (cf->info.type == MLX5_CTRL_FLOW_TYPE_TX_REPR_MATCH && cf->info.tx_repr_sq == sqn)
+		return true;
+	return false;
+}
+
+int
+mlx5_flow_hw_destroy_tx_repr_matching_flow(struct rte_eth_dev *dev, uint32_t sqn, bool external)
+{
+	uint16_t port_id = dev->data->port_id;
+	uint16_t proxy_port_id = dev->data->port_id;
+	struct rte_eth_dev *proxy_dev;
+	struct mlx5_priv *proxy_priv;
+	struct mlx5_ctrl_flow_entry *cf;
+	struct mlx5_ctrl_flow_entry *cf_next;
+	int ret;
+
+	ret = rte_flow_pick_transfer_proxy(port_id, &proxy_port_id, NULL);
+	if (ret) {
+		DRV_LOG(ERR, "Unable to pick transfer proxy port for port %u. Transfer proxy "
+			     "port must be present for default SQ miss flow rules to exist.",
+			     port_id);
+		return ret;
+	}
+	proxy_dev = &rte_eth_devices[proxy_port_id];
+	proxy_priv = proxy_dev->data->dev_private;
+	if (!proxy_priv->dr_ctx ||
+	    !proxy_priv->hw_tx_repr_tagging_tbl)
+		return 0;
+	cf = external ? LIST_FIRST(&proxy_priv->hw_ext_ctrl_flows) :
+			LIST_FIRST(&proxy_priv->hw_ctrl_flows);
+	while (cf != NULL) {
+		cf_next = LIST_NEXT(cf, next);
+		if (flow_hw_is_tx_matching_repr_matching_flow(cf, dev, sqn)) {
+			claim_zero(flow_hw_destroy_ctrl_flow(proxy_dev, cf->flow));
+			LIST_REMOVE(cf, next);
+			mlx5_free(cf);
+		}
+		cf = cf_next;
+	}
+	return 0;
+}
+
 int
 mlx5_flow_hw_lacp_rx_flow(struct rte_eth_dev *dev)
 {
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index e6acb56d4d..6acf398ccc 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -1622,7 +1622,7 @@ mlx5_traffic_enable_hws(struct rte_eth_dev *dev)
 			}
 		}
 		if (config->dv_esw_en && config->repr_matching) {
-			if (mlx5_flow_hw_tx_repr_matching_flow(dev, queue, false)) {
+			if (mlx5_flow_hw_create_tx_repr_matching_flow(dev, queue, false)) {
 				mlx5_txq_release(dev, i);
 				goto error;
 			}
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 834ca541d5..1d258f979c 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -1433,7 +1433,7 @@ rte_pmd_mlx5_external_sq_enable(uint16_t port_id, uint32_t sq_num)
 	priv = dev->data->dev_private;
 	if ((!priv->representor && !priv->master) ||
 	    !priv->sh->config.dv_esw_en) {
-		DRV_LOG(ERR, "Port %u must be represetnor or master port in E-Switch mode.",
+		DRV_LOG(ERR, "Port %u must be representor or master port in E-Switch mode.",
 			port_id);
 		rte_errno = EINVAL;
 		return -rte_errno;
@@ -1454,9 +1454,9 @@ rte_pmd_mlx5_external_sq_enable(uint16_t port_id, uint32_t sq_num)
 		}
 
 		if (priv->sh->config.repr_matching &&
-		    mlx5_flow_hw_tx_repr_matching_flow(dev, sq_num, true)) {
+		    mlx5_flow_hw_create_tx_repr_matching_flow(dev, sq_num, true)) {
 			if (sq_miss_created)
-				mlx5_flow_hw_esw_destroy_sq_miss_flow(dev, sq_num);
+				mlx5_flow_hw_esw_destroy_sq_miss_flow(dev, sq_num, true);
 			return -rte_errno;
 		}
 
@@ -1464,7 +1464,7 @@ rte_pmd_mlx5_external_sq_enable(uint16_t port_id, uint32_t sq_num)
 		    priv->sh->config.dv_xmeta_en == MLX5_XMETA_MODE_META32_HWS &&
 		    mlx5_flow_hw_create_tx_default_mreg_copy_flow(dev, sq_num, true)) {
 			if (sq_miss_created)
-				mlx5_flow_hw_esw_destroy_sq_miss_flow(dev, sq_num);
+				mlx5_flow_hw_esw_destroy_sq_miss_flow(dev, sq_num, true);
 			return -rte_errno;
 		}
 		return 0;
@@ -1478,6 +1478,53 @@ rte_pmd_mlx5_external_sq_enable(uint16_t port_id, uint32_t sq_num)
 	return -rte_errno;
 }
 
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_pmd_mlx5_external_sq_disable, 25.11)
+int
+rte_pmd_mlx5_external_sq_disable(uint16_t port_id, uint32_t sq_num)
+{
+	struct rte_eth_dev *dev;
+	struct mlx5_priv *priv;
+
+	if (rte_eth_dev_is_valid_port(port_id) < 0) {
+		DRV_LOG(ERR, "There is no Ethernet device for port %u.",
+			port_id);
+		rte_errno = ENODEV;
+		return -rte_errno;
+	}
+	dev = &rte_eth_devices[port_id];
+	priv = dev->data->dev_private;
+	if ((!priv->representor && !priv->master) ||
+	    !priv->sh->config.dv_esw_en) {
+		DRV_LOG(ERR, "Port %u must be representor or master port in E-Switch mode.",
+			port_id);
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	if (sq_num == 0) {
+		DRV_LOG(ERR, "Invalid SQ number.");
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+#ifdef HAVE_MLX5_HWS_SUPPORT
+	if (priv->sh->config.dv_flow_en == 2) {
+		if (priv->sh->config.fdb_def_rule &&
+		    mlx5_flow_hw_esw_destroy_sq_miss_flow(dev, sq_num, true))
+			return -rte_errno;
+		if (priv->sh->config.repr_matching &&
+		    mlx5_flow_hw_destroy_tx_repr_matching_flow(dev, sq_num, true))
+			return -rte_errno;
+		if (!priv->sh->config.repr_matching &&
+		    priv->sh->config.dv_xmeta_en == MLX5_XMETA_MODE_META32_HWS &&
+		    mlx5_flow_hw_destroy_tx_default_mreg_copy_flow(dev, sq_num, true))
+			return -rte_errno;
+		return 0;
+	}
+#endif
+	/* Not supported for software steering. */
+	rte_errno = ENOTSUP;
+	return -rte_errno;
+}
+
 /**
  * Set the Tx queue dynamic timestamp (mask and offset)
  *
diff --git a/drivers/net/mlx5/rte_pmd_mlx5.h b/drivers/net/mlx5/rte_pmd_mlx5.h
index 4d4821afae..31f99e7a78 100644
--- a/drivers/net/mlx5/rte_pmd_mlx5.h
+++ b/drivers/net/mlx5/rte_pmd_mlx5.h
@@ -484,6 +484,24 @@ typedef void (*rte_pmd_mlx5_driver_event_callback_t)(uint16_t port_id,
 		const void *opaque);
 
 
+/**
+ * Disable traffic for external SQ. Should be invoked by application
+ * before destroying the external SQ.
+ *
+ * @param[in] port_id
+ *   The port identifier of the Ethernet device.
+ * @param[in] sq_num
+ *   SQ HW number.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ *   Possible values for rte_errno:
+ *   - EINVAL - invalid sq_number or port type.
+ *   - ENODEV - there is no Ethernet device for this port id.
+ */
+__rte_experimental
+int rte_pmd_mlx5_external_sq_disable(uint16_t port_id, uint32_t sq_num);
+
 /**
  * Register mlx5 driver event callback.
  *
-- 
2.51.0


           reply	other threads:[~2025-10-29 15:59 UTC|newest]

Thread overview: expand[flat|nested]  mbox.gz  Atom feed
 [parent not found: <20251029155711.169580-1-getelson@nvidia.com>]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251029155711.169580-2-getelson@nvidia.com \
    --to=getelson@nvidia.com \
    --cc=bingz@nvidia.com \
    --cc=dev@dpdk.org \
    --cc=dsosnowski@nvidia.com \
    --cc=matan@nvidia.com \
    --cc=mkashani@nvidia.com \
    --cc=orika@nvidia.com \
    --cc=rasland@nvidia.com \
    --cc=stable@dpdk.org \
    --cc=suanmingm@nvidia.com \
    --cc=viacheslavo@nvidia.com \
    --cc=xuemingl@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).