* [PATCH 1/4] drivers: fix flow devarg handling for future HW
@ 2026-01-12 7:26 Maayan Kashani
2026-01-12 7:26 ` [PATCH 2/4] net/mlx5: fix default memzone requirements in HWS Maayan Kashani
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Maayan Kashani @ 2026-01-12 7:26 UTC (permalink / raw)
To: stable
Cc: mkashani, rasland, Dariusz Sosnowski, Viacheslav Ovsiienko,
Bing Zhao, Ori Kam, Suanming Mou, Matan Azrad
SWS (software steering) will be disabled on future hardware generations.
This fix should check for SWS capability bits and return relevant error.
Update the defaults for the dv_flow_en and allow_duplicate_pattern
devargs accordingly.
- Default dv_flow_en devarg value will be chosen based on whether
NIC supports SW steering and/or HW steering.
- If DV flow is not supported and allow_duplicate_pattern is
set by the user, forcibly disable it and emit a clear log message.
This change improves reliability by ensuring only valid
configurations are applied, and provides clear feedback to
the user when fallbacks are triggered.
Fixes: 1b55eeb7b76f ("common/mlx5: add ConnectX-9 SuperNIC")
Cc: stable@dpdk.org
Signed-off-by: Maayan Kashani <mkashani@nvidia.com>
---
doc/guides/nics/mlx5.rst | 11 +++--
drivers/common/mlx5/mlx5_devx_cmds.c | 18 +++++++
drivers/common/mlx5/mlx5_devx_cmds.h | 6 +++
drivers/common/mlx5/mlx5_prm.h | 14 ++++--
drivers/net/mlx5/mlx5.c | 71 +++++++++++++++++++++++++++-
5 files changed, 111 insertions(+), 9 deletions(-)
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 904a0ac3584..0b123461a30 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -698,8 +698,11 @@ for an additional list of options shared with other mlx5 drivers.
Value 2 enables the WQE based hardware steering.
In this mode, only queue-based flow management is supported.
- It is configured by default to 1 (DV flow steering) if supported.
- Otherwise, the value is 0 which indicates legacy Verbs flow offloading.
+ By default, the PMD will set this value according to capability.
+ If DV flow steering is supported, it will be set to 1.
+ If DV flow steering is not supported and HW steering is supported,
+ then it will be set to 2.
+ Otherwise, it will be set to 0.
- ``dv_esw_en`` parameter [int]
@@ -834,8 +837,10 @@ for an additional list of options shared with other mlx5 drivers.
- 1. Allow insertion of rules with the same pattern items.
In this case, all rules are inserted but only the first rule takes effect,
the next rule takes effect only if the previous rules are deleted.
+ This option is not supported in :ref:`HWS mode <mlx5_hws>`.
+ If this option is set to 1 in HWS mode, it will be set to 0.
- By default, the PMD will set this value to 1.
+ By default, the PMD will set this value according to capability.
.. _mlx5_net_stats:
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index d885a95c04e..d12ebf8487d 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -1304,6 +1304,18 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
rx_reg |= ((0xff & reg_c_8_15) << 8);
attr->set_reg_c &= (rx_reg & tx_reg);
+ attr->rx_sw_owner_v2 = MLX5_GET(flow_table_nic_cap, hcattr,
+ flow_table_properties_nic_receive.sw_owner_v2);
+ if (!attr->rx_sw_owner_v2)
+ attr->rx_sw_owner = MLX5_GET(flow_table_nic_cap, hcattr,
+ flow_table_properties_nic_receive.sw_owner);
+
+ attr->tx_sw_owner_v2 = MLX5_GET(flow_table_nic_cap, hcattr,
+ flow_table_properties_nic_transmit.sw_owner_v2);
+ if (!attr->tx_sw_owner_v2)
+ attr->tx_sw_owner = MLX5_GET(flow_table_nic_cap, hcattr,
+ flow_table_properties_nic_transmit.sw_owner);
+
#undef GET_RX_REG_X_BITS
#undef GET_TX_REG_X_BITS
}
@@ -1456,6 +1468,12 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
reg_c_8_15 = MLX5_GET(flow_table_esw_cap, hcattr,
ft_field_support_2_esw_fdb.metadata_reg_c_8_15);
attr->set_reg_c &= ((0xff & reg_c_8_15) << 8) | esw_reg;
+
+ attr->esw_sw_owner_v2 = MLX5_GET(flow_table_esw_cap, hcattr,
+ flow_table_properties_nic_esw_fdb.sw_owner_v2);
+ if (!attr->esw_sw_owner_v2)
+ attr->esw_sw_owner = MLX5_GET(flow_table_esw_cap, hcattr,
+ flow_table_properties_nic_esw_fdb.sw_owner);
}
return 0;
error:
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index f6e8afbf395..da50fc686cd 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -337,6 +337,12 @@ struct mlx5_hca_attr {
uint8_t max_header_modify_pattern_length;
uint64_t system_image_guid;
uint32_t log_max_conn_track_offload:5;
+ uint8_t rx_sw_owner:1;
+ uint8_t rx_sw_owner_v2:1;
+ uint8_t tx_sw_owner:1;
+ uint8_t tx_sw_owner_v2:1;
+ uint8_t esw_sw_owner:1;
+ uint8_t esw_sw_owner_v2:1;
};
/* LAG Context. */
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 411c35f456e..ba33336e588 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -1597,9 +1597,13 @@ enum {
#define MLX5_HCA_FLEX_GTPU_DW_0_ENABLED (1UL << 18)
#define MLX5_HCA_FLEX_GTPU_TEID_ENABLED (1UL << 19)
-/* The device steering logic format. */
-#define MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 0x0
-#define MLX5_STEERING_LOGIC_FORMAT_CONNECTX_6DX 0x1
+/* The device steering logic format version. */
+enum {
+ MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 = 0,
+ MLX5_STEERING_LOGIC_FORMAT_CONNECTX_6DX = 1,
+ MLX5_STEERING_LOGIC_FORMAT_CONNECTX_7 = 2,
+ MLX5_STEERING_LOGIC_FORMAT_CONNECTX_8 = 3,
+};
struct mlx5_ifc_cmd_hca_cap_bits {
u8 access_other_hca_roce[0x1];
@@ -2348,7 +2352,9 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
};
struct mlx5_ifc_flow_table_esw_cap_bits {
- u8 reserved_at_0[0x800];
+ u8 reserved_at_0[0x200];
+ struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb;
+ u8 reserved_at_400[0x400];
struct mlx5_ifc_ft_fields_support_bits ft_header_modify_esw_fdb;
u8 reserved_at_C00[0x800];
struct mlx5_ifc_ft_fields_support_2_bits
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index bde4d01e488..c78054eb3c5 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1441,6 +1441,45 @@ mlx5_dev_args_check_handler(const char *key, const char *val, void *opaque)
return 0;
}
+static bool
+mlx5_hws_is_supported(struct mlx5_dev_ctx_shared *sh)
+{
+ return (sh->cdev->config.devx &&
+ sh->cdev->config.hca_attr.wqe_based_flow_table_sup);
+}
+
+static bool
+mlx5_sws_is_any_supported(struct mlx5_dev_ctx_shared *sh)
+{
+ struct mlx5_common_device *cdev = sh->cdev;
+ struct mlx5_hca_attr *hca_attr = &cdev->config.hca_attr;
+
+ if (hca_attr->rx_sw_owner_v2 || hca_attr->rx_sw_owner)
+ return true;
+
+ if (hca_attr->tx_sw_owner_v2 || hca_attr->tx_sw_owner)
+ return true;
+
+ if (hca_attr->eswitch_manager && (hca_attr->esw_sw_owner_v2 || hca_attr->esw_sw_owner))
+ return true;
+
+ return false;
+}
+
+static bool
+mlx5_kvargs_is_used(struct mlx5_kvargs_ctrl *mkvlist, const char *key)
+{
+ const struct rte_kvargs_pair *pair;
+ uint32_t i;
+
+ for (i = 0; i < mkvlist->kvlist->count; ++i) {
+ pair = &mkvlist->kvlist->pairs[i];
+ if (strcmp(pair->key, key) == 0 && mkvlist->is_used[i])
+ return true;
+ }
+ return false;
+}
+
/**
* Parse user device parameters and adjust them according to device
* capabilities.
@@ -1481,6 +1520,8 @@ mlx5_shared_dev_ctx_args_config(struct mlx5_dev_ctx_shared *sh,
int ret = 0;
size_t alignment = rte_mem_page_size();
uint32_t max_queue_umem_size = MLX5_WQE_SIZE * mlx5_dev_get_max_wq_size(sh);
+ bool hws_is_supported = mlx5_hws_is_supported(sh);
+ bool sws_is_supported = mlx5_sws_is_any_supported(sh);
if (alignment == (size_t)-1) {
alignment = (1 << MLX5_LOG_PAGE_SIZE);
@@ -1491,9 +1532,15 @@ mlx5_shared_dev_ctx_args_config(struct mlx5_dev_ctx_shared *sh,
memset(config, 0, sizeof(*config));
config->vf_nl_en = 1;
config->dv_esw_en = 1;
- config->dv_flow_en = 1;
+ if (!sws_is_supported && hws_is_supported)
+ config->dv_flow_en = 2;
+ else
+ config->dv_flow_en = 1;
config->decap_en = 1;
- config->allow_duplicate_pattern = 1;
+ if (config->dv_flow_en == 2)
+ config->allow_duplicate_pattern = 0;
+ else
+ config->allow_duplicate_pattern = 1;
config->fdb_def_rule = 1;
config->cnt_svc.cycle_time = MLX5_CNT_SVC_CYCLE_TIME_DEFAULT;
config->cnt_svc.service_core = rte_get_main_lcore();
@@ -1513,6 +1560,26 @@ mlx5_shared_dev_ctx_args_config(struct mlx5_dev_ctx_shared *sh,
DRV_LOG(WARNING, "DV flow is not supported.");
config->dv_flow_en = 0;
}
+ /* Inform user if DV flow is not supported. */
+ if (config->dv_flow_en == 1 && !sws_is_supported && hws_is_supported) {
+ DRV_LOG(WARNING, "DV flow is not supported. Changing to HWS mode.");
+ config->dv_flow_en = 2;
+ }
+ /* Handle allow_duplicate_pattern based on final dv_flow_en mode.
+ * HWS mode (dv_flow_en=2) doesn't support duplicate patterns.
+ * Warn only if user explicitly requested an incompatible setting.
+ */
+ bool allow_dup_pattern_set = mkvlist != NULL &&
+ mlx5_kvargs_is_used(mkvlist, MLX5_ALLOW_DUPLICATE_PATTERN);
+ if (config->dv_flow_en == 2) {
+ if (config->allow_duplicate_pattern == 1 && allow_dup_pattern_set)
+ DRV_LOG(WARNING, "Duplicate pattern is not supported with HWS. Disabling it.");
+ config->allow_duplicate_pattern = 0;
+ } else if (!allow_dup_pattern_set) {
+ /* Non-HWS mode: set default to 1 only if not explicitly set by user */
+ config->allow_duplicate_pattern = 1;
+ }
+
if (config->dv_esw_en && !sh->dev_cap.dv_esw_en) {
DRV_LOG(DEBUG, "E-Switch DV flow is not supported.");
config->dv_esw_en = 0;
--
2.21.0
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 2/4] net/mlx5: fix default memzone requirements in HWS
2026-01-12 7:26 [PATCH 1/4] drivers: fix flow devarg handling for future HW Maayan Kashani
@ 2026-01-12 7:26 ` Maayan Kashani
2026-01-12 7:26 ` [PATCH 3/4] net/mlx5: fix internal HWS pattern template creation Maayan Kashani
2026-01-12 7:26 ` [PATCH 4/4] net/mlx5: fix redundant control rules in promiscuous mode Maayan Kashani
2 siblings, 0 replies; 5+ messages in thread
From: Maayan Kashani @ 2026-01-12 7:26 UTC (permalink / raw)
To: stable
Cc: mkashani, rasland, Dariusz Sosnowski, Viacheslav Ovsiienko,
Bing Zhao, Ori Kam, Suanming Mou, Matan Azrad
From: Dariusz Sosnowski <dsosnowski@nvidia.com>
Commit [1] has changed the default behavior of flow engine selection
in mlx5 PMD to accommodate for new NIC generations.
Whenever underlying device does not support SWS (e.g., ConnectX-9
or untrusted VFs/SFs) and device does support HWS,
default flow engine would be HWS (dv_flow_en=2) which also supports
sync flow API.
This behavior change had consequence in memory usage whenever
SFs are probed by DPDK. In default HWS configuration supporting
sync flow API (i.e. without calling rte_flow_configure())
mlx5 PMD allocated 4 rte_ring objects per port:
- indir_iq and indir_cq - For handling indirect action completions.
- flow_transfer_pending and flow_transfer_completed - For handling
template table resizing.
This has not happened previously with SWS as default flow engine.
Since a dedicated memzone is allocated for each rte_ring object,
this lead to exhaustion of default memzone limit
on setups with ~1K SFs to probe.
It resulted in the following error on port start:
EAL: memzone_reserve_aligned_thread_unsafe():
Number of requested memzone segments exceeds maximum 2560
RING: Cannot reserve memory
mlx5_net: Failed to start port 998 mlx5_core.sf.998:
fail to configure port
Since template table resizing is allowed if and only if
async flow API was configured, 2 of the aforementioned rings
are never used in the default sync flow API configuration.
This patch removes allocation of flow_transfer_pending and
flow_transfer_completed rings in default sync flow API configuration
of mlx5 PMD to reduce memzone usage and allow DPDK probing
to succeed on setups with ~1K SFs to probe.
[1] commit d1ac7b6c64d9
("net/mlx5: update flow devargs handling for future HW")
Fixes: 27d171b88031 ("net/mlx5: abstract flow action and enable reconfigure")
Cc: stable@dpdk.org
Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
---
drivers/net/mlx5/mlx5_flow_hw.c | 86 ++++++++++++++++++++++++++-------
1 file changed, 68 insertions(+), 18 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index 98483abc7fc..1dada2e7cef 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -4483,6 +4483,9 @@ mlx5_hw_pull_flow_transfer_comp(struct rte_eth_dev *dev,
struct mlx5_priv *priv = dev->data->dev_private;
struct rte_ring *ring = priv->hw_q[queue].flow_transfer_completed;
+ if (ring == NULL)
+ return 0;
+
size = RTE_MIN(rte_ring_count(ring), n_res);
for (i = 0; i < size; i++) {
res[i].status = RTE_FLOW_OP_SUCCESS;
@@ -4714,8 +4717,9 @@ __flow_hw_push_action(struct rte_eth_dev *dev,
struct mlx5_hw_q *hw_q = &priv->hw_q[queue];
mlx5_hw_push_queue(hw_q->indir_iq, hw_q->indir_cq);
- mlx5_hw_push_queue(hw_q->flow_transfer_pending,
- hw_q->flow_transfer_completed);
+ if (hw_q->flow_transfer_pending != NULL && hw_q->flow_transfer_completed != NULL)
+ mlx5_hw_push_queue(hw_q->flow_transfer_pending,
+ hw_q->flow_transfer_completed);
if (!priv->shared_host) {
if (priv->hws_ctpool)
mlx5_aso_push_wqe(priv->sh,
@@ -11889,6 +11893,60 @@ mlx5_hwq_ring_create(uint16_t port_id, uint32_t queue, uint32_t size, const char
RING_F_SP_ENQ | RING_F_SC_DEQ | RING_F_EXACT_SZ);
}
+static int
+flow_hw_queue_setup_rings(struct rte_eth_dev *dev,
+ uint16_t queue,
+ uint32_t queue_size,
+ bool nt_mode)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+
+ /* HWS queue info container must be already allocated. */
+ MLX5_ASSERT(priv->hw_q != NULL);
+
+ /* Notice ring name length is limited. */
+ priv->hw_q[queue].indir_cq = mlx5_hwq_ring_create
+ (dev->data->port_id, queue, queue_size, "indir_act_cq");
+ if (!priv->hw_q[queue].indir_cq) {
+ DRV_LOG(ERR, "port %u failed to allocate indir_act_cq ring for HWS",
+ dev->data->port_id);
+ return -ENOMEM;
+ }
+
+ priv->hw_q[queue].indir_iq = mlx5_hwq_ring_create
+ (dev->data->port_id, queue, queue_size, "indir_act_iq");
+ if (!priv->hw_q[queue].indir_iq) {
+ DRV_LOG(ERR, "port %u failed to allocate indir_act_iq ring for HWS",
+ dev->data->port_id);
+ return -ENOMEM;
+ }
+
+ /*
+ * Sync flow API does not require rings used for table resize handling,
+ * because these rings are only used through async flow APIs.
+ */
+ if (nt_mode)
+ return 0;
+
+ priv->hw_q[queue].flow_transfer_pending = mlx5_hwq_ring_create
+ (dev->data->port_id, queue, queue_size, "tx_pending");
+ if (!priv->hw_q[queue].flow_transfer_pending) {
+ DRV_LOG(ERR, "port %u failed to allocate tx_pending ring for HWS",
+ dev->data->port_id);
+ return -ENOMEM;
+ }
+
+ priv->hw_q[queue].flow_transfer_completed = mlx5_hwq_ring_create
+ (dev->data->port_id, queue, queue_size, "tx_done");
+ if (!priv->hw_q[queue].flow_transfer_completed) {
+ DRV_LOG(ERR, "port %u failed to allocate tx_done ring for HWS",
+ dev->data->port_id);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
static int
flow_hw_validate_attributes(const struct rte_flow_port_attr *port_attr,
uint16_t nb_queue,
@@ -12057,22 +12115,8 @@ __flow_hw_configure(struct rte_eth_dev *dev,
&priv->hw_q[i].job[_queue_attr[i]->size];
for (j = 0; j < _queue_attr[i]->size; j++)
priv->hw_q[i].job[j] = &job[j];
- /* Notice ring name length is limited. */
- priv->hw_q[i].indir_cq = mlx5_hwq_ring_create
- (dev->data->port_id, i, _queue_attr[i]->size, "indir_act_cq");
- if (!priv->hw_q[i].indir_cq)
- goto err;
- priv->hw_q[i].indir_iq = mlx5_hwq_ring_create
- (dev->data->port_id, i, _queue_attr[i]->size, "indir_act_iq");
- if (!priv->hw_q[i].indir_iq)
- goto err;
- priv->hw_q[i].flow_transfer_pending = mlx5_hwq_ring_create
- (dev->data->port_id, i, _queue_attr[i]->size, "tx_pending");
- if (!priv->hw_q[i].flow_transfer_pending)
- goto err;
- priv->hw_q[i].flow_transfer_completed = mlx5_hwq_ring_create
- (dev->data->port_id, i, _queue_attr[i]->size, "tx_done");
- if (!priv->hw_q[i].flow_transfer_completed)
+
+ if (flow_hw_queue_setup_rings(dev, i, _queue_attr[i]->size, nt_mode) < 0)
goto err;
}
dr_ctx_attr.pd = priv->sh->cdev->pd;
@@ -15440,6 +15484,12 @@ flow_hw_update_resized(struct rte_eth_dev *dev, uint32_t queue,
};
MLX5_ASSERT(hw_flow->flags & MLX5_FLOW_HW_FLOW_FLAG_MATCHER_SELECTOR);
+ /*
+ * Update resized can be called only through async flow API.
+ * These rings are allocated if and only if async flow API was configured.
+ */
+ MLX5_ASSERT(priv->hw_q[queue].flow_transfer_completed != NULL);
+ MLX5_ASSERT(priv->hw_q[queue].flow_transfer_pending != NULL);
/**
* mlx5dr_matcher_resize_rule_move() accepts original table matcher -
* the one that was used BEFORE table resize.
--
2.21.0
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 3/4] net/mlx5: fix internal HWS pattern template creation
2026-01-12 7:26 [PATCH 1/4] drivers: fix flow devarg handling for future HW Maayan Kashani
2026-01-12 7:26 ` [PATCH 2/4] net/mlx5: fix default memzone requirements in HWS Maayan Kashani
@ 2026-01-12 7:26 ` Maayan Kashani
2026-01-12 7:26 ` [PATCH 4/4] net/mlx5: fix redundant control rules in promiscuous mode Maayan Kashani
2 siblings, 0 replies; 5+ messages in thread
From: Maayan Kashani @ 2026-01-12 7:26 UTC (permalink / raw)
To: stable
Cc: mkashani, rasland, Gregory Etelson, Dariusz Sosnowski,
Viacheslav Ovsiienko, Bing Zhao, Ori Kam, Suanming Mou,
Matan Azrad
HWS pattern template creation tries to build a table with
the tested items after basic verifications to check
if the pattern is valid.
Time consumed in that table creation can be critical for applications
that require fast PMD initialization.
The patch separates pattern templates to internal and external.
Internal templates are created by the PMD and are considered safe and
can skip some validations.
Pattern templates provided by applications will be fully validated.
Fixes: a190f25e6a93 ("net/mlx5: improve pattern template validation")
Cc: stable@dpdk.org
Signed-off-by: Gregory Etelson <getelson@nvidia.com>
Signed-off-by: Maayan Kashani <mkashani@nvidia.com>
---
drivers/net/mlx5/mlx5_flow_hw.c | 35 +++++++++++++++++++++++----------
1 file changed, 25 insertions(+), 10 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index 1dada2e7cef..4af654967ca 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -9110,6 +9110,7 @@ static struct rte_flow_pattern_template *
flow_hw_pattern_template_create(struct rte_eth_dev *dev,
const struct rte_flow_pattern_template_attr *attr,
const struct rte_flow_item items[],
+ bool external,
struct rte_flow_error *error)
{
struct mlx5_priv *priv = dev->data->dev_private;
@@ -9264,9 +9265,11 @@ flow_hw_pattern_template_create(struct rte_eth_dev *dev,
}
}
rte_atomic_fetch_add_explicit(&it->refcnt, 1, rte_memory_order_relaxed);
- rc = pattern_template_validate(dev, &it, 1, error);
- if (rc)
- goto error;
+ if (external) {
+ rc = pattern_template_validate(dev, &it, 1, error);
+ if (rc)
+ goto error;
+ }
LIST_INSERT_HEAD(&priv->flow_hw_itt, it, next);
return it;
error:
@@ -9285,6 +9288,16 @@ flow_hw_pattern_template_create(struct rte_eth_dev *dev,
return NULL;
}
+static struct rte_flow_pattern_template *
+flow_hw_external_pattern_template_create
+ (struct rte_eth_dev *dev,
+ const struct rte_flow_pattern_template_attr *attr,
+ const struct rte_flow_item items[],
+ struct rte_flow_error *error)
+{
+ return flow_hw_pattern_template_create(dev, attr, items, true, error);
+}
+
/**
* Destroy flow item template.
*
@@ -9890,7 +9903,7 @@ flow_hw_create_tx_repr_sq_pattern_tmpl(struct rte_eth_dev *dev, struct rte_flow_
},
};
- return flow_hw_pattern_template_create(dev, &attr, items, error);
+ return flow_hw_pattern_template_create(dev, &attr, items, false, error);
}
static __rte_always_inline uint32_t
@@ -10182,7 +10195,7 @@ flow_hw_create_ctrl_esw_mgr_pattern_template(struct rte_eth_dev *dev,
},
};
- return flow_hw_pattern_template_create(dev, &attr, items, error);
+ return flow_hw_pattern_template_create(dev, &attr, items, false, error);
}
/**
@@ -10236,7 +10249,7 @@ flow_hw_create_ctrl_regc_sq_pattern_template(struct rte_eth_dev *dev,
},
};
- return flow_hw_pattern_template_create(dev, &attr, items, error);
+ return flow_hw_pattern_template_create(dev, &attr, items, false, error);
}
/**
@@ -10273,7 +10286,7 @@ flow_hw_create_ctrl_port_pattern_template(struct rte_eth_dev *dev,
},
};
- return flow_hw_pattern_template_create(dev, &attr, items, error);
+ return flow_hw_pattern_template_create(dev, &attr, items, false, error);
}
/*
@@ -10309,7 +10322,8 @@ flow_hw_create_lacp_rx_pattern_template(struct rte_eth_dev *dev, struct rte_flow
.type = RTE_FLOW_ITEM_TYPE_END,
},
};
- return flow_hw_pattern_template_create(dev, &pa_attr, eth_all, error);
+ return flow_hw_pattern_template_create(dev, &pa_attr, eth_all,
+ false, error);
}
/**
@@ -11553,7 +11567,7 @@ flow_hw_create_ctrl_rx_pattern_template
{ .type = RTE_FLOW_ITEM_TYPE_END }
};
- return flow_hw_pattern_template_create(dev, &attr, items, NULL);
+ return flow_hw_pattern_template_create(dev, &attr, items, false, NULL);
}
int
@@ -15574,6 +15588,7 @@ flow_hw_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
return 0;
}
+
const struct mlx5_flow_driver_ops mlx5_flow_hw_drv_ops = {
.list_create = flow_hw_list_create,
.list_destroy = flow_hw_list_destroy,
@@ -15581,7 +15596,7 @@ const struct mlx5_flow_driver_ops mlx5_flow_hw_drv_ops = {
.info_get = flow_hw_info_get,
.configure = flow_hw_configure,
.pattern_validate = flow_hw_pattern_validate,
- .pattern_template_create = flow_hw_pattern_template_create,
+ .pattern_template_create = flow_hw_external_pattern_template_create,
.pattern_template_destroy = flow_hw_pattern_template_destroy,
.actions_validate = flow_hw_actions_validate,
.actions_template_create = flow_hw_actions_template_create,
--
2.21.0
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 4/4] net/mlx5: fix redundant control rules in promiscuous mode
2026-01-12 7:26 [PATCH 1/4] drivers: fix flow devarg handling for future HW Maayan Kashani
2026-01-12 7:26 ` [PATCH 2/4] net/mlx5: fix default memzone requirements in HWS Maayan Kashani
2026-01-12 7:26 ` [PATCH 3/4] net/mlx5: fix internal HWS pattern template creation Maayan Kashani
@ 2026-01-12 7:26 ` Maayan Kashani
2 siblings, 0 replies; 5+ messages in thread
From: Maayan Kashani @ 2026-01-12 7:26 UTC (permalink / raw)
To: stable
Cc: mkashani, rasland, Dariusz Sosnowski, Viacheslav Ovsiienko,
Bing Zhao, Ori Kam, Suanming Mou, Matan Azrad
When promiscuous mode is enabled, the device receives all traffic
regardless of destination MAC address. Previously, the code was
setting both the promiscuous flag AND the DMAC/multicast control
flow rules, which is redundant.
This patch makes the DMAC and multicast/broadcast control flow
rules conditional on NOT being in promiscuous mode. When promiscuous
mode is enabled, only the MLX5_CTRL_PROMISCUOUS flag is set.
Fixes: 9fa7c1cddb85 ("net/mlx5: create control flow rules with HWS")
Cc: stable@dpdk.org
Signed-off-by: Maayan Kashani <mkashani@nvidia.com>
---
drivers/net/mlx5/mlx5_trigger.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 028844e45d6..b38ba9022ea 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -1682,13 +1682,17 @@ mlx5_traffic_enable_hws(struct rte_eth_dev *dev)
dev->data->port_id, -ret);
goto error;
}
- if (dev->data->promiscuous)
+ if (dev->data->promiscuous) {
flags |= MLX5_CTRL_PROMISCUOUS;
- if (dev->data->all_multicast)
- flags |= MLX5_CTRL_ALL_MULTICAST;
- else
- flags |= MLX5_CTRL_BROADCAST | MLX5_CTRL_IPV4_MULTICAST | MLX5_CTRL_IPV6_MULTICAST;
- flags |= MLX5_CTRL_DMAC;
+ } else {
+ if (dev->data->all_multicast)
+ flags |= MLX5_CTRL_ALL_MULTICAST;
+ else
+ flags |= (MLX5_CTRL_BROADCAST |
+ MLX5_CTRL_IPV4_MULTICAST |
+ MLX5_CTRL_IPV6_MULTICAST);
+ flags |= MLX5_CTRL_DMAC;
+ }
if (priv->vlan_filter_n)
flags |= MLX5_CTRL_VLAN_FILTER;
return mlx5_flow_hw_ctrl_flows(dev, flags);
--
2.21.0
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 1/4] drivers: fix flow devarg handling for future HW
[not found] <20260112092439.14843-1-mkashani@nvidia.com>
@ 2026-01-12 9:24 ` Maayan Kashani
0 siblings, 0 replies; 5+ messages in thread
From: Maayan Kashani @ 2026-01-12 9:24 UTC (permalink / raw)
To: dev
Cc: mkashani, rasland, stable, Dariusz Sosnowski,
Viacheslav Ovsiienko, Bing Zhao, Ori Kam, Suanming Mou,
Matan Azrad
SWS (software steering) will be disabled on future hardware generations.
This fix should check for SWS capability bits and return relevant error.
Update the defaults for the dv_flow_en and allow_duplicate_pattern
devargs accordingly.
- Default dv_flow_en devarg value will be chosen based on whether
NIC supports SW steering and/or HW steering.
- If DV flow is not supported and allow_duplicate_pattern is
set by the user, forcibly disable it and emit a clear log message.
This change improves reliability by ensuring only valid
configurations are applied, and provides clear feedback to
the user when fallbacks are triggered.
Fixes: 1b55eeb7b76f ("common/mlx5: add ConnectX-9 SuperNIC")
Cc: stable@dpdk.org
Signed-off-by: Maayan Kashani <mkashani@nvidia.com>
---
doc/guides/nics/mlx5.rst | 11 +++--
drivers/common/mlx5/mlx5_devx_cmds.c | 18 +++++++
drivers/common/mlx5/mlx5_devx_cmds.h | 6 +++
drivers/common/mlx5/mlx5_prm.h | 14 ++++--
drivers/net/mlx5/mlx5.c | 71 +++++++++++++++++++++++++++-
5 files changed, 111 insertions(+), 9 deletions(-)
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 904a0ac3584..0b123461a30 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -698,8 +698,11 @@ for an additional list of options shared with other mlx5 drivers.
Value 2 enables the WQE based hardware steering.
In this mode, only queue-based flow management is supported.
- It is configured by default to 1 (DV flow steering) if supported.
- Otherwise, the value is 0 which indicates legacy Verbs flow offloading.
+ By default, the PMD will set this value according to capability.
+ If DV flow steering is supported, it will be set to 1.
+ If DV flow steering is not supported and HW steering is supported,
+ then it will be set to 2.
+ Otherwise, it will be set to 0.
- ``dv_esw_en`` parameter [int]
@@ -834,8 +837,10 @@ for an additional list of options shared with other mlx5 drivers.
- 1. Allow insertion of rules with the same pattern items.
In this case, all rules are inserted but only the first rule takes effect,
the next rule takes effect only if the previous rules are deleted.
+ This option is not supported in :ref:`HWS mode <mlx5_hws>`.
+ If this option is set to 1 in HWS mode, it will be set to 0.
- By default, the PMD will set this value to 1.
+ By default, the PMD will set this value according to capability.
.. _mlx5_net_stats:
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index d885a95c04e..d12ebf8487d 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -1304,6 +1304,18 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
rx_reg |= ((0xff & reg_c_8_15) << 8);
attr->set_reg_c &= (rx_reg & tx_reg);
+ attr->rx_sw_owner_v2 = MLX5_GET(flow_table_nic_cap, hcattr,
+ flow_table_properties_nic_receive.sw_owner_v2);
+ if (!attr->rx_sw_owner_v2)
+ attr->rx_sw_owner = MLX5_GET(flow_table_nic_cap, hcattr,
+ flow_table_properties_nic_receive.sw_owner);
+
+ attr->tx_sw_owner_v2 = MLX5_GET(flow_table_nic_cap, hcattr,
+ flow_table_properties_nic_transmit.sw_owner_v2);
+ if (!attr->tx_sw_owner_v2)
+ attr->tx_sw_owner = MLX5_GET(flow_table_nic_cap, hcattr,
+ flow_table_properties_nic_transmit.sw_owner);
+
#undef GET_RX_REG_X_BITS
#undef GET_TX_REG_X_BITS
}
@@ -1456,6 +1468,12 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
reg_c_8_15 = MLX5_GET(flow_table_esw_cap, hcattr,
ft_field_support_2_esw_fdb.metadata_reg_c_8_15);
attr->set_reg_c &= ((0xff & reg_c_8_15) << 8) | esw_reg;
+
+ attr->esw_sw_owner_v2 = MLX5_GET(flow_table_esw_cap, hcattr,
+ flow_table_properties_nic_esw_fdb.sw_owner_v2);
+ if (!attr->esw_sw_owner_v2)
+ attr->esw_sw_owner = MLX5_GET(flow_table_esw_cap, hcattr,
+ flow_table_properties_nic_esw_fdb.sw_owner);
}
return 0;
error:
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index f6e8afbf395..da50fc686cd 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -337,6 +337,12 @@ struct mlx5_hca_attr {
uint8_t max_header_modify_pattern_length;
uint64_t system_image_guid;
uint32_t log_max_conn_track_offload:5;
+ uint8_t rx_sw_owner:1;
+ uint8_t rx_sw_owner_v2:1;
+ uint8_t tx_sw_owner:1;
+ uint8_t tx_sw_owner_v2:1;
+ uint8_t esw_sw_owner:1;
+ uint8_t esw_sw_owner_v2:1;
};
/* LAG Context. */
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 411c35f456e..ba33336e588 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -1597,9 +1597,13 @@ enum {
#define MLX5_HCA_FLEX_GTPU_DW_0_ENABLED (1UL << 18)
#define MLX5_HCA_FLEX_GTPU_TEID_ENABLED (1UL << 19)
-/* The device steering logic format. */
-#define MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 0x0
-#define MLX5_STEERING_LOGIC_FORMAT_CONNECTX_6DX 0x1
+/* The device steering logic format version. */
+enum {
+ MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 = 0,
+ MLX5_STEERING_LOGIC_FORMAT_CONNECTX_6DX = 1,
+ MLX5_STEERING_LOGIC_FORMAT_CONNECTX_7 = 2,
+ MLX5_STEERING_LOGIC_FORMAT_CONNECTX_8 = 3,
+};
struct mlx5_ifc_cmd_hca_cap_bits {
u8 access_other_hca_roce[0x1];
@@ -2348,7 +2352,9 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
};
struct mlx5_ifc_flow_table_esw_cap_bits {
- u8 reserved_at_0[0x800];
+ u8 reserved_at_0[0x200];
+ struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb;
+ u8 reserved_at_400[0x400];
struct mlx5_ifc_ft_fields_support_bits ft_header_modify_esw_fdb;
u8 reserved_at_C00[0x800];
struct mlx5_ifc_ft_fields_support_2_bits
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index bde4d01e488..c78054eb3c5 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1441,6 +1441,45 @@ mlx5_dev_args_check_handler(const char *key, const char *val, void *opaque)
return 0;
}
+static bool
+mlx5_hws_is_supported(struct mlx5_dev_ctx_shared *sh)
+{
+ return (sh->cdev->config.devx &&
+ sh->cdev->config.hca_attr.wqe_based_flow_table_sup);
+}
+
+static bool
+mlx5_sws_is_any_supported(struct mlx5_dev_ctx_shared *sh)
+{
+ struct mlx5_common_device *cdev = sh->cdev;
+ struct mlx5_hca_attr *hca_attr = &cdev->config.hca_attr;
+
+ if (hca_attr->rx_sw_owner_v2 || hca_attr->rx_sw_owner)
+ return true;
+
+ if (hca_attr->tx_sw_owner_v2 || hca_attr->tx_sw_owner)
+ return true;
+
+ if (hca_attr->eswitch_manager && (hca_attr->esw_sw_owner_v2 || hca_attr->esw_sw_owner))
+ return true;
+
+ return false;
+}
+
+static bool
+mlx5_kvargs_is_used(struct mlx5_kvargs_ctrl *mkvlist, const char *key)
+{
+ const struct rte_kvargs_pair *pair;
+ uint32_t i;
+
+ for (i = 0; i < mkvlist->kvlist->count; ++i) {
+ pair = &mkvlist->kvlist->pairs[i];
+ if (strcmp(pair->key, key) == 0 && mkvlist->is_used[i])
+ return true;
+ }
+ return false;
+}
+
/**
* Parse user device parameters and adjust them according to device
* capabilities.
@@ -1481,6 +1520,8 @@ mlx5_shared_dev_ctx_args_config(struct mlx5_dev_ctx_shared *sh,
int ret = 0;
size_t alignment = rte_mem_page_size();
uint32_t max_queue_umem_size = MLX5_WQE_SIZE * mlx5_dev_get_max_wq_size(sh);
+ bool hws_is_supported = mlx5_hws_is_supported(sh);
+ bool sws_is_supported = mlx5_sws_is_any_supported(sh);
if (alignment == (size_t)-1) {
alignment = (1 << MLX5_LOG_PAGE_SIZE);
@@ -1491,9 +1532,15 @@ mlx5_shared_dev_ctx_args_config(struct mlx5_dev_ctx_shared *sh,
memset(config, 0, sizeof(*config));
config->vf_nl_en = 1;
config->dv_esw_en = 1;
- config->dv_flow_en = 1;
+ if (!sws_is_supported && hws_is_supported)
+ config->dv_flow_en = 2;
+ else
+ config->dv_flow_en = 1;
config->decap_en = 1;
- config->allow_duplicate_pattern = 1;
+ if (config->dv_flow_en == 2)
+ config->allow_duplicate_pattern = 0;
+ else
+ config->allow_duplicate_pattern = 1;
config->fdb_def_rule = 1;
config->cnt_svc.cycle_time = MLX5_CNT_SVC_CYCLE_TIME_DEFAULT;
config->cnt_svc.service_core = rte_get_main_lcore();
@@ -1513,6 +1560,26 @@ mlx5_shared_dev_ctx_args_config(struct mlx5_dev_ctx_shared *sh,
DRV_LOG(WARNING, "DV flow is not supported.");
config->dv_flow_en = 0;
}
+ /* Inform user if DV flow is not supported. */
+ if (config->dv_flow_en == 1 && !sws_is_supported && hws_is_supported) {
+ DRV_LOG(WARNING, "DV flow is not supported. Changing to HWS mode.");
+ config->dv_flow_en = 2;
+ }
+ /* Handle allow_duplicate_pattern based on final dv_flow_en mode.
+ * HWS mode (dv_flow_en=2) doesn't support duplicate patterns.
+ * Warn only if user explicitly requested an incompatible setting.
+ */
+ bool allow_dup_pattern_set = mkvlist != NULL &&
+ mlx5_kvargs_is_used(mkvlist, MLX5_ALLOW_DUPLICATE_PATTERN);
+ if (config->dv_flow_en == 2) {
+ if (config->allow_duplicate_pattern == 1 && allow_dup_pattern_set)
+ DRV_LOG(WARNING, "Duplicate pattern is not supported with HWS. Disabling it.");
+ config->allow_duplicate_pattern = 0;
+ } else if (!allow_dup_pattern_set) {
+ /* Non-HWS mode: set default to 1 only if not explicitly set by user */
+ config->allow_duplicate_pattern = 1;
+ }
+
if (config->dv_esw_en && !sh->dev_cap.dv_esw_en) {
DRV_LOG(DEBUG, "E-Switch DV flow is not supported.");
config->dv_esw_en = 0;
--
2.21.0
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2026-01-12 9:25 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-01-12 7:26 [PATCH 1/4] drivers: fix flow devarg handling for future HW Maayan Kashani
2026-01-12 7:26 ` [PATCH 2/4] net/mlx5: fix default memzone requirements in HWS Maayan Kashani
2026-01-12 7:26 ` [PATCH 3/4] net/mlx5: fix internal HWS pattern template creation Maayan Kashani
2026-01-12 7:26 ` [PATCH 4/4] net/mlx5: fix redundant control rules in promiscuous mode Maayan Kashani
[not found] <20260112092439.14843-1-mkashani@nvidia.com>
2026-01-12 9:24 ` [PATCH 1/4] drivers: fix flow devarg handling for future HW Maayan Kashani
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).