* [PATCH 1/4] net/mlx5: add driver event callbacks
2025-08-26 11:45 [PATCH 0/4] [25.11] net/mlx5: add driver event and steering toggle APIs Maayan Kashani
@ 2025-08-26 11:45 ` Maayan Kashani
2025-08-26 11:45 ` [PATCH 2/4] net/mlx5: move eCPRI release function to flex Maayan Kashani
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Maayan Kashani @ 2025-08-26 11:45 UTC (permalink / raw)
To: dev
Cc: mkashani, dsosnowski, rasland, Viacheslav Ovsiienko, Bing Zhao,
Ori Kam, Suanming Mou, Matan Azrad
From: Dariusz Sosnowski <dsosnowski@nvidia.com>
mlx5 PMD is a bifurcated driver,
which means that instead of communicating with HW directly
using UIO or VFIO, the driver uses existing kernel and
userspace APIs for that purpose.
One specific area of this usage is creation and configuration
of Rx and Tx queues. This is achieved through mlx5dv_devx_*
family of APIs exposed by rdma-core.
They allow userspace processes to access NIC FW securely.
It is theoretically possible for other libraries or
applications built on top of rdma-core to use Rx and Tx queues
created by DPDK for example in HW offloading set up outside of DPDK.
For example library/application can set up an offloaded flow rule
which will direct the traffic to DPDK queue.
Such use case cannot be achieved right now, because neither DPDK
nor mlx5 PMD expose the identifiers of Rx and Tx queues
created through DevX.
This patch addresses such use case,
by adding new functions to mlx5 PMD private API:
- rte_pmd_mlx5_driver_event_cb_register()
- rte_pmd_mlx5_driver_event_cb_unregister()
These allow external users to register custom callbacks,
which will be called whenever mlx5 PMD performs
some operation (driver event) on a managed HW object,
which was allocated through DevX.
At the moment the following driver events are supported:
- Rx queue creation,
- Tx queue destruction,
- Rx queue creation,
- Tx queue destruction.
Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
---
drivers/net/mlx5/meson.build | 1 +
drivers/net/mlx5/mlx5_devx.c | 17 ++
drivers/net/mlx5/mlx5_driver_event.c | 300 +++++++++++++++++++++++++++
drivers/net/mlx5/mlx5_driver_event.h | 18 ++
drivers/net/mlx5/rte_pmd_mlx5.h | 136 ++++++++++++
5 files changed, 472 insertions(+)
create mode 100644 drivers/net/mlx5/mlx5_driver_event.c
create mode 100644 drivers/net/mlx5/mlx5_driver_event.h
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
index f16fe181930..d67fb0969f8 100644
--- a/drivers/net/mlx5/meson.build
+++ b/drivers/net/mlx5/meson.build
@@ -18,6 +18,7 @@ headers = files('rte_pmd_mlx5.h')
sources = files(
'mlx5.c',
'mlx5_devx.c',
+ 'mlx5_driver_event.c',
'mlx5_ethdev.c',
'mlx5_flow.c',
'mlx5_flow_aso.c',
diff --git a/drivers/net/mlx5/mlx5_devx.c b/drivers/net/mlx5/mlx5_devx.c
index 10bd93c29a4..673c9f3902f 100644
--- a/drivers/net/mlx5/mlx5_devx.c
+++ b/drivers/net/mlx5/mlx5_devx.c
@@ -20,6 +20,7 @@
#include "mlx5.h"
#include "mlx5_common_os.h"
+#include "mlx5_driver_event.h"
#include "mlx5_tx.h"
#include "mlx5_rx.h"
#include "mlx5_utils.h"
@@ -239,6 +240,12 @@ mlx5_rxq_devx_obj_release(struct mlx5_rxq_priv *rxq)
if (rxq_obj == NULL)
return;
+ /*
+ * Notify external users that Rx queue will be destroyed.
+ * Skip notification for not started queues and internal drop queue.
+ */
+ if (rxq->ctrl->started && rxq != rxq->priv->drop_queue.rxq)
+ mlx5_driver_event_notify_rxq_destroy(rxq);
if (rxq_obj->rxq_ctrl->is_hairpin) {
if (rxq_obj->rq == NULL)
return;
@@ -612,6 +619,8 @@ mlx5_rxq_obj_hairpin_new(struct mlx5_rxq_priv *rxq)
return -rte_errno;
}
create_rq_set_state:
+ /* Notify external users that Rx queue was created. */
+ mlx5_driver_event_notify_rxq_create(rxq);
priv->dev_data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_HAIRPIN;
return 0;
}
@@ -691,6 +700,8 @@ mlx5_rxq_devx_obj_new(struct mlx5_rxq_priv *rxq)
}
rxq_ctrl->wqn = rxq->devx_rq.rq->id;
}
+ /* Notify external users that Rx queue was created. */
+ mlx5_driver_event_notify_rxq_create(rxq);
priv->dev_data->rx_queue_state[rxq->idx] = RTE_ETH_QUEUE_STATE_STARTED;
return 0;
error:
@@ -1440,6 +1451,8 @@ mlx5_txq_obj_hairpin_new(struct rte_eth_dev *dev, uint16_t idx)
rte_errno = errno;
return -rte_errno;
}
+ /* Notify external users that Tx queue was created. */
+ mlx5_driver_event_notify_txq_create(txq_ctrl);
return 0;
}
@@ -1670,6 +1683,8 @@ mlx5_txq_devx_obj_new(struct rte_eth_dev *dev, uint16_t idx)
priv->consec_tx_mem.cq_cur_off += txq_data->cq_mem_len;
ppriv->uar_table[txq_data->idx] = sh->tx_uar.bf_db;
dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
+ /* Notify external users that Tx queue was created. */
+ mlx5_driver_event_notify_txq_create(txq_ctrl);
return 0;
error:
ret = rte_errno; /* Save rte_errno before cleanup. */
@@ -1689,6 +1704,8 @@ void
mlx5_txq_devx_obj_release(struct mlx5_txq_obj *txq_obj)
{
MLX5_ASSERT(txq_obj);
+ /* Notify external users that Tx queue will be destroyed. */
+ mlx5_driver_event_notify_txq_destroy(txq_obj->txq_ctrl);
if (txq_obj->txq_ctrl->is_hairpin) {
if (txq_obj->sq) {
claim_zero(mlx5_devx_cmd_destroy(txq_obj->sq));
diff --git a/drivers/net/mlx5/mlx5_driver_event.c b/drivers/net/mlx5/mlx5_driver_event.c
new file mode 100644
index 00000000000..cad1f875180
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_driver_event.c
@@ -0,0 +1,300 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 NVIDIA Corporation & Affiliates
+ */
+
+#include "mlx5_driver_event.h"
+
+#include <sys/queue.h>
+
+#include <eal_export.h>
+
+#include "mlx5.h"
+#include "mlx5_rx.h"
+#include "mlx5_tx.h"
+#include "rte_pmd_mlx5.h"
+
+/*
+ * Macro serving as a longest possible "queue_info" string generated as part of driver event
+ * callback. Used to derive the correct size of the static buffer, so the dynamic allocation
+ * can be skipped during callback.
+ */
+#define MAX_QUEUE_INFO ( \
+ "lro_timeout=" RTE_STR(UINT32_MAX) "," \
+ "max_lro_msg_size=" RTE_STR(UINT32_MAX) "," \
+ "td=" RTE_STR(UINT32_MAX) "," \
+ "lpbk=1")
+
+static char queue_info_buf[sizeof(MAX_QUEUE_INFO)];
+
+struct registered_cb {
+ LIST_ENTRY(registered_cb) list;
+ rte_pmd_mlx5_driver_event_callback_t cb;
+ const void *opaque;
+};
+
+LIST_HEAD(, registered_cb) cb_list_head = LIST_HEAD_INITIALIZER(cb_list_head);
+
+static const char *
+generate_rx_queue_info(struct mlx5_rxq_priv *rxq)
+{
+ struct mlx5_priv *priv = rxq->priv;
+ uint32_t max_lro_msg_size = 0;
+ uint32_t lro_timeout = 0;
+ uint32_t lpbk = 0;
+ uint32_t td = 0;
+ int ret __rte_unused;
+
+ if (rxq->ctrl->rxq.lro) {
+ lro_timeout = priv->config.lro_timeout;
+ max_lro_msg_size = priv->max_lro_msg_size / MLX5_LRO_SEG_CHUNK_SIZE;
+ }
+
+ if (rxq->ctrl->is_hairpin)
+ td = priv->sh->td->id;
+ else
+ td = priv->sh->tdn;
+
+ lpbk = !!priv->dev_data->dev_conf.lpbk_mode;
+
+ ret = snprintf(queue_info_buf, sizeof(queue_info_buf),
+ "lro_timeout=%u,max_lro_msg_size=%u,td=%u,lpbk=%u",
+ lro_timeout, max_lro_msg_size, td, lpbk);
+ /*
+ * queue_info_buf is set up to accommodate maximum possible values.
+ * As a result, snprintf should always succeed here.
+ */
+ MLX5_ASSERT(ret >= 0);
+
+ return queue_info_buf;
+}
+
+static void
+fill_rxq_info(struct mlx5_rxq_priv *rxq,
+ struct rte_pmd_mlx5_driver_event_cb_queue_info *queue,
+ enum rte_pmd_mlx5_driver_event_cb_type event)
+{
+ /* It is assumed that port is started so all control structs should be initialized. */
+ MLX5_ASSERT(rxq != NULL);
+ MLX5_ASSERT(rxq->ctrl != NULL);
+
+ queue->dpdk_queue_id = rxq->idx;
+ if (rxq->ctrl->is_hairpin) {
+ MLX5_ASSERT(rxq->ctrl->obj != NULL && rxq->ctrl->obj->rq != NULL);
+ queue->hw_queue_id = rxq->ctrl->obj->rq->id;
+ } else {
+ MLX5_ASSERT(rxq->devx_rq.rq != NULL);
+ queue->hw_queue_id = rxq->devx_rq.rq->id;
+ }
+ if (event == RTE_PMD_MLX5_DRIVER_EVENT_CB_TYPE_RXQ_CREATE)
+ queue->queue_info = generate_rx_queue_info(rxq);
+}
+
+static void
+notify_rxq_event(struct mlx5_rxq_priv *rxq,
+ enum rte_pmd_mlx5_driver_event_cb_type event)
+{
+ struct rte_pmd_mlx5_driver_event_cb_info cb_info = {
+ .event = event,
+ };
+ struct registered_cb *r;
+ uint16_t port_id;
+
+ MLX5_ASSERT(rxq != NULL);
+ MLX5_ASSERT(event == RTE_PMD_MLX5_DRIVER_EVENT_CB_TYPE_RXQ_CREATE ||
+ event == RTE_PMD_MLX5_DRIVER_EVENT_CB_TYPE_RXQ_DESTROY);
+
+ if (LIST_EMPTY(&cb_list_head))
+ return;
+
+ port_id = rxq->priv->dev_data->port_id;
+ fill_rxq_info(rxq, &cb_info.queue, event);
+
+ LIST_FOREACH(r, &cb_list_head, list)
+ r->cb(port_id, &cb_info, r->opaque);
+}
+
+void
+mlx5_driver_event_notify_rxq_create(struct mlx5_rxq_priv *rxq)
+{
+ notify_rxq_event(rxq, RTE_PMD_MLX5_DRIVER_EVENT_CB_TYPE_RXQ_CREATE);
+}
+
+void
+mlx5_driver_event_notify_rxq_destroy(struct mlx5_rxq_priv *rxq)
+{
+ notify_rxq_event(rxq, RTE_PMD_MLX5_DRIVER_EVENT_CB_TYPE_RXQ_DESTROY);
+}
+
+static void
+fill_txq_info(struct mlx5_txq_ctrl *txq_ctrl,
+ struct rte_pmd_mlx5_driver_event_cb_queue_info *queue)
+{
+ /* It is assumed that port is started so all control structs should be initialized. */
+ MLX5_ASSERT(txq_ctrl != NULL);
+ MLX5_ASSERT(txq_ctrl->obj != NULL);
+
+ queue->dpdk_queue_id = txq_ctrl->txq.idx;
+ if (txq_ctrl->is_hairpin) {
+ MLX5_ASSERT(txq_ctrl->obj->sq != NULL);
+ queue->hw_queue_id = txq_ctrl->obj->sq->id;
+ } else {
+ MLX5_ASSERT(txq_ctrl->obj->sq_obj.sq != NULL);
+ queue->hw_queue_id = txq_ctrl->obj->sq_obj.sq->id;
+ }
+}
+
+static void
+notify_txq_event(struct mlx5_txq_ctrl *txq_ctrl,
+ enum rte_pmd_mlx5_driver_event_cb_type event)
+{
+ struct rte_pmd_mlx5_driver_event_cb_info cb_info = {
+ .event = event,
+ };
+ struct registered_cb *r;
+ uint16_t port_id;
+
+ MLX5_ASSERT(txq_ctrl != NULL);
+ MLX5_ASSERT(event == RTE_PMD_MLX5_DRIVER_EVENT_CB_TYPE_TXQ_CREATE ||
+ event == RTE_PMD_MLX5_DRIVER_EVENT_CB_TYPE_TXQ_DESTROY);
+
+ if (LIST_EMPTY(&cb_list_head))
+ return;
+
+ port_id = txq_ctrl->priv->dev_data->port_id;
+ fill_txq_info(txq_ctrl, &cb_info.queue);
+
+ LIST_FOREACH(r, &cb_list_head, list)
+ r->cb(port_id, &cb_info, r->opaque);
+}
+
+void
+mlx5_driver_event_notify_txq_create(struct mlx5_txq_ctrl *txq_ctrl)
+{
+ notify_txq_event(txq_ctrl, RTE_PMD_MLX5_DRIVER_EVENT_CB_TYPE_TXQ_CREATE);
+}
+
+void
+mlx5_driver_event_notify_txq_destroy(struct mlx5_txq_ctrl *txq_ctrl)
+{
+ notify_txq_event(txq_ctrl, RTE_PMD_MLX5_DRIVER_EVENT_CB_TYPE_TXQ_DESTROY);
+}
+
+static void
+notify_existing_queues(uint16_t port_id,
+ rte_pmd_mlx5_driver_event_callback_t cb,
+ void *opaque)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ struct mlx5_priv *priv = (struct mlx5_priv *)dev->data->dev_private;
+ unsigned int i;
+
+ /* Stopped port does not have any queues. */
+ if (!dev->data->dev_started)
+ return;
+
+ for (i = 0; i < priv->rxqs_n; ++i) {
+ struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
+ struct rte_pmd_mlx5_driver_event_cb_info cb_info = {
+ .event = RTE_PMD_MLX5_DRIVER_EVENT_CB_TYPE_RXQ_CREATE,
+ };
+
+ /* Port is started and only known queues are iterated on. All should be there. */
+ MLX5_ASSERT(rxq != NULL);
+
+ fill_rxq_info(rxq, &cb_info.queue, cb_info.event);
+ cb(port_id, &cb_info, opaque);
+ }
+
+ for (i = 0; i < priv->txqs_n; ++i) {
+ struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
+ struct rte_pmd_mlx5_driver_event_cb_info cb_info = {
+ .event = RTE_PMD_MLX5_DRIVER_EVENT_CB_TYPE_TXQ_CREATE,
+ };
+
+ /* Port is started and only known queues are iterated on. All should be there. */
+ MLX5_ASSERT(txq_ctrl != NULL);
+
+ fill_txq_info(txq_ctrl, &cb_info.queue);
+ cb(port_id, &cb_info, opaque);
+
+ /* mlx5_txq_get() increments a ref count on Tx queue. Need to decrement. */
+ mlx5_txq_release(dev, i);
+ }
+}
+
+static void
+notify_existing_devices(rte_pmd_mlx5_driver_event_callback_t cb, void *opaque)
+{
+ uint16_t port_id;
+
+ /*
+ * Whenever there is at least one available port,
+ * it means that EAL was initialized and ports were probed.
+ * Logging library should be available, so it is safe to use DRV_LOG.
+ */
+ MLX5_ETH_FOREACH_DEV(port_id, NULL)
+ notify_existing_queues(port_id, cb, opaque);
+}
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_pmd_mlx5_driver_event_cb_register, 25.11)
+int
+rte_pmd_mlx5_driver_event_cb_register(rte_pmd_mlx5_driver_event_callback_t cb, void *opaque)
+{
+ struct registered_cb *r;
+
+ if (cb == NULL)
+ return -EINVAL;
+
+ LIST_FOREACH(r, &cb_list_head, list) {
+ if (r->cb == cb)
+ return -EEXIST;
+ }
+
+ r = calloc(1, sizeof(*r));
+ if (r == NULL)
+ return -ENOMEM;
+
+ r->cb = cb;
+ r->opaque = opaque;
+
+ notify_existing_devices(cb, opaque);
+
+ LIST_INSERT_HEAD(&cb_list_head, r, list);
+
+ return 0;
+}
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_pmd_mlx5_driver_event_cb_unregister, 25.11)
+int
+rte_pmd_mlx5_driver_event_cb_unregister(rte_pmd_mlx5_driver_event_callback_t cb)
+{
+ struct registered_cb *r;
+ bool found = false;
+
+ if (cb == NULL)
+ return -EINVAL;
+
+ LIST_FOREACH(r, &cb_list_head, list) {
+ if (r->cb == cb) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ return 0;
+
+ LIST_REMOVE(r, list);
+ free(r);
+
+ return 0;
+}
+
+RTE_FINI(rte_pmd_mlx5_driver_event_cb_cleanup) {
+ struct registered_cb *r;
+
+ while (!LIST_EMPTY(&cb_list_head)) {
+ r = LIST_FIRST(&cb_list_head);
+ LIST_REMOVE(r, list);
+ free(r);
+ }
+}
diff --git a/drivers/net/mlx5/mlx5_driver_event.h b/drivers/net/mlx5/mlx5_driver_event.h
new file mode 100644
index 00000000000..ff4ce6e186f
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_driver_event.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 NVIDIA Corporation & Affiliates
+ */
+
+#ifndef RTE_PMD_MLX5_DRIVER_EVENT_H_
+#define RTE_PMD_MLX5_DRIVER_EVENT_H_
+
+/* Forward declarations. */
+struct mlx5_rxq_priv;
+struct mlx5_txq_ctrl;
+
+void mlx5_driver_event_notify_rxq_create(struct mlx5_rxq_priv *rxq);
+void mlx5_driver_event_notify_rxq_destroy(struct mlx5_rxq_priv *rxq);
+
+void mlx5_driver_event_notify_txq_create(struct mlx5_txq_ctrl *txq_ctrl);
+void mlx5_driver_event_notify_txq_destroy(struct mlx5_txq_ctrl *txq_ctrl);
+
+#endif /* RTE_PMD_MLX5_DRIVER_EVENT_H_ */
diff --git a/drivers/net/mlx5/rte_pmd_mlx5.h b/drivers/net/mlx5/rte_pmd_mlx5.h
index fdd2f658887..da8d4b1c83c 100644
--- a/drivers/net/mlx5/rte_pmd_mlx5.h
+++ b/drivers/net/mlx5/rte_pmd_mlx5.h
@@ -5,7 +5,10 @@
#ifndef RTE_PMD_PRIVATE_MLX5_H_
#define RTE_PMD_PRIVATE_MLX5_H_
+#include <stdint.h>
+
#include <rte_compat.h>
+#include <rte_per_lcore.h>
/**
* @file
@@ -415,6 +418,139 @@ __rte_experimental
int
rte_pmd_mlx5_txq_dump_contexts(uint16_t port_id, uint16_t queue_id, const char *filename);
+/** Type of mlx5 driver event for which custom callback is called. */
+enum rte_pmd_mlx5_driver_event_cb_type {
+ /** Called after HW Rx queue is created. */
+ RTE_PMD_MLX5_DRIVER_EVENT_CB_TYPE_RXQ_CREATE,
+ /** Called before HW Rx queue will be destroyed. */
+ RTE_PMD_MLX5_DRIVER_EVENT_CB_TYPE_RXQ_DESTROY,
+ /** Called after HW Tx queue is created. */
+ RTE_PMD_MLX5_DRIVER_EVENT_CB_TYPE_TXQ_CREATE,
+ /** Called before HW Tx queue will be destroyed. */
+ RTE_PMD_MLX5_DRIVER_EVENT_CB_TYPE_TXQ_DESTROY,
+};
+
+/** Information about the queue for which driver event is being called. */
+struct rte_pmd_mlx5_driver_event_cb_queue_info {
+ /** DPDK queue index. */
+ uint16_t dpdk_queue_id;
+ /** HW queue identifier (DevX object ID). */
+ uint32_t hw_queue_id;
+ /**
+ * Low-level HW configuration of the port related to the queue.
+ * This configuration is presented as a string
+ * with "key=value" pairs, separated by commas.
+ * This string is owned by mlx5 PMD and should not be freed by the user,
+ * and should be copied to the memory owned by the user.
+ *
+ * For RTE_PMD_MLX5_DRIVER_EVENT_CB_TYPE_RXQ_CREATE this will contain:
+ *
+ * - lro_timeout - Configured timeout of LRO session in microseconds.
+ * Set to 0 if LRO is not configured.
+ * - max_lro_msg_size - Maximum size of a single LRO message.
+ * Provided in granularity of 256 bytes.
+ * Set to 0 if LRO is not configured.
+ * - td - Identifier of transport domain allocated from HW (DevX object ID).
+ * - lbpk - Set to 1 if loopback is enabled on the given queue
+ *
+ * For all other events, this field will be set to NULL.
+ */
+ const char *queue_info;
+};
+
+/** Information related to a driver event. */
+struct rte_pmd_mlx5_driver_event_cb_info {
+ /** Type of the driver event for which the callback is called. */
+ enum rte_pmd_mlx5_driver_event_cb_type event;
+ union {
+ /**
+ * Information about the queue for which driver event is being called.
+ *
+ * This union variant is valid for the following events:
+ *
+ * - RTE_PMD_MLX5_DRIVER_EVENT_CB_TYPE_RXQ_CREATE
+ * - RTE_PMD_MLX5_DRIVER_EVENT_CB_TYPE_RXQ_DESTROY
+ * - RTE_PMD_MLX5_DRIVER_EVENT_CB_TYPE_TXQ_CREATE
+ * - RTE_PMD_MLX5_DRIVER_EVENT_CB_TYPE_TXQ_DESTROY
+ */
+ struct rte_pmd_mlx5_driver_event_cb_queue_info queue;
+ };
+};
+
+/** Prototype of the callback called on mlx5 driver events. */
+typedef void (*rte_pmd_mlx5_driver_event_callback_t)(uint16_t port_id,
+ const struct rte_pmd_mlx5_driver_event_cb_info *info,
+ const void *opaque);
+
+
+/**
+ * Register mlx5 driver event callback.
+ *
+ * mlx5 PMD configures HW through interfaces exposed by rdma-core and mlx5 kernel driver.
+ * Any HW object created this way may be used by other libraries or applications.
+ * This function allows application to register a custom callback which will be called
+ * whenever mlx5 PMD performs some operation (driver event) on a managed HW objects.
+ * #rte_pmd_mlx5_driver_event_cb_type defines exposed driver events.
+ *
+ * This function can be called multiple times with different callbacks.
+ * mlx5 PMD will register all of them and all of them will be called for triggered driver events.
+ *
+ * This function can be called:
+ *
+ * - before or after #rte_eal_init (potentially in a constructor function as well),
+ * - before or after any mlx5 port is probed.
+ *
+ * If this function is called when mlx5 ports (at least one) exist,
+ * then provided callback will be immediately called for all applicable driver events,
+ * for all existing mlx5 ports.
+ *
+ * This function is lock-free and it is assumed that it won't be called concurrently
+ * with other functions from ethdev API used to configure any of the mlx5 ports.
+ * It is the responsibility of the application to enforce this.
+ *
+ * Registered callbacks might be called during control path configuration triggered
+ * by DPDK API. It is the user's responsibility to prevent
+ * calling more configurations by the DPDK API from the callback itself.
+ *
+ * mlx5 PMD registers a destructor (through #RTE_FINI)
+ * which will unregister all known callbacks.
+ *
+ * @param[in] cb
+ * Pointer to callback.
+ * @param[in] opaque
+ * Opaque pointer which will be passed as an argument to @p cb on each event.
+ *
+ * @return
+ * - 0 if callback was successfully registered.
+ * - (-EINVAL) if @p cb is NULL.
+ * - (-EEXIST) if @p cb was already registered.
+ * - (-ENOMEM) if failed to allocate memory for callback entry.
+ */
+__rte_experimental
+int
+rte_pmd_mlx5_driver_event_cb_register(rte_pmd_mlx5_driver_event_callback_t cb, void *opaque);
+
+/**
+ * Unregister driver event callback.
+ *
+ * Unregisters a mlx5 driver event callback which was previously registered
+ * through #rte_pmd_mlx5_driver_event_cb_unregister.
+ *
+ * This function is lock-free and it is assumed that it won't be called concurrently
+ * with other functions from ethdev API used to configure any of the mlx5 ports.
+ * It is the responsibility of the application to enforce this.
+ *
+ * @param[in] cb
+ * Pointer to callback.
+ *
+ * @return
+ * - 0 if callback was successfully unregistered or if no such callback was registered.
+ * - (-EINVAL) if @p cb is NULL.
+ */
+__rte_experimental
+int
+rte_pmd_mlx5_driver_event_cb_unregister(rte_pmd_mlx5_driver_event_callback_t cb);
+
#ifdef __cplusplus
}
#endif
--
2.21.0
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 2/4] net/mlx5: move eCPRI release function to flex
2025-08-26 11:45 [PATCH 0/4] [25.11] net/mlx5: add driver event and steering toggle APIs Maayan Kashani
2025-08-26 11:45 ` [PATCH 1/4] net/mlx5: add driver event callbacks Maayan Kashani
@ 2025-08-26 11:45 ` Maayan Kashani
2025-08-26 11:45 ` [PATCH 3/4] net/mlx5: rework Rx queue mark flag functions Maayan Kashani
2025-08-26 11:45 ` [PATCH 4/4] net/mlx5: add steering toggle API Maayan Kashani
3 siblings, 0 replies; 5+ messages in thread
From: Maayan Kashani @ 2025-08-26 11:45 UTC (permalink / raw)
To: dev
Cc: mkashani, dsosnowski, rasland, Viacheslav Ovsiienko, Bing Zhao,
Ori Kam, Suanming Mou, Matan Azrad
From: Dariusz Sosnowski <dsosnowski@nvidia.com>
Move mlx5_flex_parser_ecpri_release() from main source file
to the file dedicated for flex parser logic.
Also, make the function non-static so the function can be used
in flow steering clean up introduced in follow up patches.
Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
---
drivers/net/mlx5/mlx5.c | 18 ------------------
drivers/net/mlx5/mlx5.h | 1 +
drivers/net/mlx5/mlx5_flow_flex.c | 18 ++++++++++++++++++
3 files changed, 19 insertions(+), 18 deletions(-)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 1321be779bb..ece29fb2164 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1044,24 +1044,6 @@ mlx5_flex_parser_ecpri_alloc(struct rte_eth_dev *dev)
return (rte_errno == 0) ? -ENODEV : -rte_errno;
}
-/*
- * Destroy the flex parser node, including the parser itself, input / output
- * arcs and DW samples. Resources could be reused then.
- *
- * @param dev
- * Pointer to Ethernet device structure.
- */
-static void
-mlx5_flex_parser_ecpri_release(struct rte_eth_dev *dev)
-{
- struct mlx5_priv *priv = dev->data->dev_private;
- struct mlx5_ecpri_parser_profile *prf = &priv->sh->ecpri_parser;
-
- if (prf->obj)
- mlx5_devx_cmd_destroy(prf->obj);
- prf->obj = NULL;
-}
-
/*
* Allocation of a flex parser for srh. Once refcnt is zero, the resources held
* by this parser will be freed.
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 53f0a27445a..32f98f969ce 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -2735,6 +2735,7 @@ int mlx5_flex_acquire_index(struct rte_eth_dev *dev,
struct rte_flow_item_flex_handle *handle,
bool acquire);
int mlx5_flex_release_index(struct rte_eth_dev *dev, int index);
+void mlx5_flex_parser_ecpri_release(struct rte_eth_dev *dev);
/* Flex parser list callbacks. */
struct mlx5_list_entry *mlx5_flex_parser_create_cb(void *list_ctx, void *ctx);
diff --git a/drivers/net/mlx5/mlx5_flow_flex.c b/drivers/net/mlx5/mlx5_flow_flex.c
index afed16985ac..b1174fe0eda 100644
--- a/drivers/net/mlx5/mlx5_flow_flex.c
+++ b/drivers/net/mlx5/mlx5_flow_flex.c
@@ -1557,3 +1557,21 @@ mlx5_flex_parser_clone_free_cb(void *list_ctx, struct mlx5_list_entry *entry)
RTE_SET_USED(list_ctx);
mlx5_free(fp);
}
+
+/*
+ * Destroy the flex parser node, including the parser itself, input / output
+ * arcs and DW samples. Resources could be reused then.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ */
+void
+mlx5_flex_parser_ecpri_release(struct rte_eth_dev *dev)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_ecpri_parser_profile *prf = &priv->sh->ecpri_parser;
+
+ if (prf->obj)
+ mlx5_devx_cmd_destroy(prf->obj);
+ prf->obj = NULL;
+}
--
2.21.0
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 3/4] net/mlx5: rework Rx queue mark flag functions
2025-08-26 11:45 [PATCH 0/4] [25.11] net/mlx5: add driver event and steering toggle APIs Maayan Kashani
2025-08-26 11:45 ` [PATCH 1/4] net/mlx5: add driver event callbacks Maayan Kashani
2025-08-26 11:45 ` [PATCH 2/4] net/mlx5: move eCPRI release function to flex Maayan Kashani
@ 2025-08-26 11:45 ` Maayan Kashani
2025-08-26 11:45 ` [PATCH 4/4] net/mlx5: add steering toggle API Maayan Kashani
3 siblings, 0 replies; 5+ messages in thread
From: Maayan Kashani @ 2025-08-26 11:45 UTC (permalink / raw)
To: dev
Cc: mkashani, dsosnowski, rasland, Viacheslav Ovsiienko, Bing Zhao,
Ori Kam, Suanming Mou, Matan Azrad
From: Dariusz Sosnowski <dsosnowski@nvidia.com>
This patch:
- Renames flow_rxq_mark_flag_set() to mlx5_flow_rxq_mark_flag_set()
so that it has a proper prefix and makes it non-static.
- Renames flow_rxq_flags_clear() to mlx5_flow_rxq_flags_clear()
so that is has a proper prefix and makes it non-static.
Both of these functions will be used in a follow up patch
to implement flow steering clean up when disabling steering.
Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
---
drivers/net/mlx5/mlx5_flow.c | 12 ++++++------
drivers/net/mlx5/mlx5_flow.h | 4 +++-
2 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index cc9ec73dfee..e6a057160cb 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1630,8 +1630,8 @@ flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
}
}
-static void
-flow_rxq_mark_flag_set(struct rte_eth_dev *dev)
+void
+mlx5_flow_rxq_mark_flag_set(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_rxq_ctrl *rxq_ctrl;
@@ -1687,7 +1687,7 @@ flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
MLX5_ASSERT(wks);
if (wks->mark)
- flow_rxq_mark_flag_set(dev);
+ mlx5_flow_rxq_mark_flag_set(dev);
SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
handle_idx, dev_handle, next)
flow_drv_rxq_flags_set(dev, dev_handle);
@@ -1784,8 +1784,8 @@ flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
* @param dev
* Pointer to Ethernet device.
*/
-static void
-flow_rxq_flags_clear(struct rte_eth_dev *dev)
+void
+mlx5_flow_rxq_flags_clear(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
unsigned int i;
@@ -8177,7 +8177,7 @@ mlx5_flow_stop_default(struct rte_eth_dev *dev)
}
#endif
flow_mreg_del_default_copy_action(dev);
- flow_rxq_flags_clear(dev);
+ mlx5_flow_rxq_flags_clear(dev);
}
/**
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index e890e732c34..36be7660012 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -3592,6 +3592,8 @@ int mlx5_flow_item_field_width(struct rte_eth_dev *dev,
enum rte_flow_field_id field, int inherit,
const struct rte_flow_attr *attr,
struct rte_flow_error *error);
+void mlx5_flow_rxq_mark_flag_set(struct rte_eth_dev *dev);
+void mlx5_flow_rxq_flags_clear(struct rte_eth_dev *dev);
uintptr_t flow_legacy_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
const struct rte_flow_attr *attr,
const struct rte_flow_item items[],
@@ -3768,5 +3770,5 @@ mlx5_flow_hw_action_flags_get(const struct rte_flow_action actions[],
#include "mlx5_nta_sample.h"
-#endif
+#endif /* HAVE_MLX5_HWS_SUPPORT */
#endif /* RTE_PMD_MLX5_FLOW_H_ */
--
2.21.0
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 4/4] net/mlx5: add steering toggle API
2025-08-26 11:45 [PATCH 0/4] [25.11] net/mlx5: add driver event and steering toggle APIs Maayan Kashani
` (2 preceding siblings ...)
2025-08-26 11:45 ` [PATCH 3/4] net/mlx5: rework Rx queue mark flag functions Maayan Kashani
@ 2025-08-26 11:45 ` Maayan Kashani
3 siblings, 0 replies; 5+ messages in thread
From: Maayan Kashani @ 2025-08-26 11:45 UTC (permalink / raw)
To: dev
Cc: mkashani, dsosnowski, rasland, Viacheslav Ovsiienko, Bing Zhao,
Ori Kam, Suanming Mou, Matan Azrad
From: Dariusz Sosnowski <dsosnowski@nvidia.com>
This patch adds:
- rte_pmd_mlx5_driver_disable_steering()
- rte_pmd_mlx5_driver_enable_steering()
private mlx5 PMD APIs, which allow applications to enable/disable
flow rule handling in mlx5 PMD (both internally and externally managed).
It allows applications (along with driver event callback API)
to use external libraries to configure flow rules which would forward
traffic to Rx and Tx queues managed by DPDK.
Signed-off-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
---
drivers/net/mlx5/mlx5_flow.c | 187 +++++++++++++++++++++++++++++++-
drivers/net/mlx5/mlx5_flow.h | 3 +
drivers/net/mlx5/mlx5_trigger.c | 30 +++++
drivers/net/mlx5/rte_pmd_mlx5.h | 56 ++++++++++
4 files changed, 272 insertions(+), 4 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index e6a057160cb..1de398982a9 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -8165,9 +8165,12 @@ mlx5_flow_list_flush(struct rte_eth_dev *dev, enum mlx5_flow_type type,
void
mlx5_flow_stop_default(struct rte_eth_dev *dev)
{
-#ifdef HAVE_MLX5_HWS_SUPPORT
struct mlx5_priv *priv = dev->data->dev_private;
+ if (mlx5_flow_is_steering_disabled())
+ return;
+
+#ifdef HAVE_MLX5_HWS_SUPPORT
if (priv->sh->config.dv_flow_en == 2) {
mlx5_flow_nta_del_default_copy_action(dev);
if (!rte_atomic_load_explicit(&priv->hws_mark_refcnt,
@@ -8175,6 +8178,8 @@ mlx5_flow_stop_default(struct rte_eth_dev *dev)
flow_hw_rxq_flag_set(dev, false);
return;
}
+#else
+ RTE_SET_USED(priv);
#endif
flow_mreg_del_default_copy_action(dev);
mlx5_flow_rxq_flags_clear(dev);
@@ -8220,10 +8225,12 @@ int
mlx5_flow_start_default(struct rte_eth_dev *dev)
{
struct rte_flow_error error;
-#ifdef HAVE_MLX5_HWS_SUPPORT
- struct mlx5_priv *priv = dev->data->dev_private;
- if (priv->sh->config.dv_flow_en == 2) {
+ if (mlx5_flow_is_steering_disabled())
+ return 0;
+
+#ifdef HAVE_MLX5_HWS_SUPPORT
+ if (MLX5_SH(dev)->config.dv_flow_en == 2) {
/*
* Ignore this failure, if the proxy port is not started, other
* default jump actions are not created and this rule will not
@@ -8879,6 +8886,13 @@ int
mlx5_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
const struct rte_flow_ops **ops)
{
+ if (mlx5_flow_is_steering_disabled()) {
+ DRV_LOG(WARNING, "port %u flow API is not supported since steering was disabled",
+ dev->data->port_id);
+ *ops = NULL;
+ return 0;
+ }
+
*ops = &mlx5_flow_ops;
return 0;
}
@@ -12347,3 +12361,168 @@ mlx5_ctrl_flow_uc_dmac_vlan_exists(struct rte_eth_dev *dev,
}
return exists;
}
+
+static bool mlx5_steering_disabled;
+
+bool
+mlx5_flow_is_steering_disabled(void)
+{
+ return mlx5_steering_disabled;
+}
+
+static void
+flow_disable_steering_flush(struct rte_eth_dev *dev)
+{
+ /*
+ * This repeats the steps done in mlx5_dev_stop(), with a small difference:
+ * - mlx5_flow_hw_cleanup_ctrl_rx_templates() and mlx5_action_handle_detach()
+ * They are rearranged to make it work with different dev->data->dev_started.
+ * Please see a TODO note in mlx5_dev_stop().
+ */
+
+ mlx5_flow_stop_default(dev);
+ mlx5_traffic_disable(dev);
+ mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
+ mlx5_flow_meter_rxq_flush(dev);
+#ifdef HAVE_MLX5_HWS_SUPPORT
+ mlx5_flow_hw_cleanup_ctrl_rx_templates(dev);
+#endif
+ mlx5_action_handle_detach(dev);
+}
+
+static void
+flow_disable_steering_cleanup(struct rte_eth_dev *dev)
+{
+ /*
+ * See mlx5_dev_close(). Only steps not done on mlx5_dev_stop() are executed here.
+ * Necessary steps are copied as is because steering resource cleanup in mlx5_dev_close()
+ * is interleaved with other steps.
+ * TODO: Rework steering resource cleanup in mlx5_dev_close() to allow code reuse.
+ */
+
+ struct mlx5_priv *priv = dev->data->dev_private;
+
+ mlx5_action_handle_flush(dev);
+ mlx5_flow_meter_flush(dev, NULL);
+ mlx5_flex_parser_ecpri_release(dev);
+ mlx5_flex_item_port_cleanup(dev);
+ mlx5_indirect_list_handles_release(dev);
+#ifdef HAVE_MLX5_HWS_SUPPORT
+ flow_hw_destroy_vport_action(dev);
+ flow_hw_resource_release(dev);
+ flow_hw_clear_port_info(dev);
+ if (priv->tlv_options != NULL) {
+ /* Free the GENEVE TLV parser resource. */
+ claim_zero(mlx5_geneve_tlv_options_destroy(priv->tlv_options, priv->sh->phdev));
+ priv->tlv_options = NULL;
+ }
+ if (priv->ptype_rss_groups) {
+ mlx5_ipool_destroy(priv->ptype_rss_groups);
+ priv->ptype_rss_groups = NULL;
+ }
+ if (priv->dr_ctx) {
+ claim_zero(mlx5dr_context_close(priv->dr_ctx));
+ priv->dr_ctx = NULL;
+ }
+#else
+ RTE_SET_USED(priv);
+#endif
+}
+
+typedef void (*run_on_related_cb_t)(struct rte_eth_dev *dev);
+
+static void
+flow_disable_steering_run_on_related(struct rte_eth_dev *dev,
+ run_on_related_cb_t cb)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ uint16_t other_port_id;
+ uint16_t proxy_port_id;
+ uint16_t port_id;
+ int ret __rte_unused;
+
+ if (priv->sh->config.dv_esw_en) {
+ ret = mlx5_flow_pick_transfer_proxy(dev, &proxy_port_id, NULL);
+ if (ret != 0) {
+ /*
+ * This case should not happen because E-Switch is enabled.
+ * However, in any case, release resources on the given port
+ * and log the misconfigured port.
+ */
+ DRV_LOG(ERR, "port %u unable to find transfer proxy port ret=%d",
+ priv->dev_data->port_id, ret);
+ cb(dev);
+ return;
+ }
+
+ /* Run callback on representors. */
+ MLX5_ETH_FOREACH_DEV(other_port_id, dev->device) {
+ struct rte_eth_dev *other_dev = &rte_eth_devices[other_port_id];
+
+ if (other_port_id != proxy_port_id)
+ cb(other_dev);
+ }
+
+ /* Run callback on proxy port. */
+ cb(&rte_eth_devices[proxy_port_id]);
+ } else if (rte_atomic_load_explicit(&priv->shared_refcnt, rte_memory_order_relaxed) > 0) {
+ /* Run callback on guest ports. */
+ MLX5_ETH_FOREACH_DEV(port_id, NULL) {
+ struct rte_eth_dev *other_dev = &rte_eth_devices[port_id];
+ struct mlx5_priv *other_priv = other_dev->data->dev_private;
+
+ if (other_priv->shared_host == dev)
+ cb(other_dev);
+ }
+
+ /* Run callback on host port. */
+ cb(dev);
+ } else {
+ cb(dev);
+ }
+}
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_pmd_mlx5_disable_steering, 25.11)
+void
+rte_pmd_mlx5_disable_steering(void)
+{
+ uint16_t port_id;
+
+ if (mlx5_steering_disabled)
+ return;
+
+ MLX5_ETH_FOREACH_DEV(port_id, NULL) {
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+
+ if (mlx5_hws_active(dev)) {
+ flow_disable_steering_run_on_related(dev, flow_disable_steering_flush);
+ flow_disable_steering_run_on_related(dev, flow_disable_steering_cleanup);
+ } else {
+ flow_disable_steering_flush(dev);
+ flow_disable_steering_cleanup(dev);
+ }
+
+ mlx5_flow_rxq_mark_flag_set(dev);
+ }
+
+ mlx5_steering_disabled = true;
+}
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_pmd_mlx5_enable_steering, 25.11)
+int
+rte_pmd_mlx5_enable_steering(void)
+{
+ uint16_t port_id;
+
+ if (!mlx5_steering_disabled)
+ return 0;
+
+ /* If any mlx5 port is probed, disallow enabling steering. */
+ port_id = mlx5_eth_find_next(0, NULL);
+ if (port_id != RTE_MAX_ETHPORTS)
+ return -EBUSY;
+
+ mlx5_steering_disabled = false;
+
+ return 0;
+}
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 36be7660012..8201b7aa4e3 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -3670,6 +3670,9 @@ flow_hw_get_ipv6_route_ext_mod_id_from_ctx(void *dr_ctx, uint8_t idx)
}
void
mlx5_indirect_list_handles_release(struct rte_eth_dev *dev);
+
+bool mlx5_flow_is_steering_disabled(void);
+
#ifdef HAVE_MLX5_HWS_SUPPORT
#define MLX5_REPR_STC_MEMORY_LOG 11
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 6c6f228afd1..b104ca9f520 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -1253,6 +1253,14 @@ mlx5_dev_start(struct rte_eth_dev *dev)
if (priv->sh->config.dv_flow_en == 2) {
struct rte_flow_error error = { 0, };
+ /*
+ * If steering is disabled, then:
+ * - There are no limitations regarding port start ordering,
+ * since no flow rules need to be created as part of port start.
+ * - Non template API initialization will be skipped.
+ */
+ if (mlx5_flow_is_steering_disabled())
+ goto continue_dev_start;
/*If previous configuration does not exist. */
if (!(priv->dr_ctx)) {
ret = flow_hw_init(dev, &error);
@@ -1420,6 +1428,8 @@ mlx5_dev_start(struct rte_eth_dev *dev)
dev->data->port_id, rte_strerror(rte_errno));
goto error;
}
+ if (mlx5_flow_is_steering_disabled())
+ mlx5_flow_rxq_mark_flag_set(dev);
rte_wmb();
dev->tx_pkt_burst = mlx5_select_tx_function(dev);
dev->rx_pkt_burst = mlx5_select_rx_function(dev);
@@ -1530,6 +1540,13 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
#ifdef HAVE_MLX5_HWS_SUPPORT
if (priv->sh->config.dv_flow_en == 2) {
+ /*
+ * If steering is disabled,
+ * then there are no limitations regarding port stop ordering,
+ * since no flow rules need to be destroyed as part of port stop.
+ */
+ if (mlx5_flow_is_steering_disabled())
+ goto continue_dev_stop;
/* If there is no E-Switch, then there are no start/stop order limitations. */
if (!priv->sh->config.dv_esw_en)
goto continue_dev_stop;
@@ -1552,6 +1569,8 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
mlx5_mp_os_req_stop_rxtx(dev);
rte_delay_us_sleep(1000 * priv->rxqs_n);
DRV_LOG(DEBUG, "port %u stopping device", dev->data->port_id);
+ if (mlx5_flow_is_steering_disabled())
+ mlx5_flow_rxq_flags_clear(dev);
mlx5_flow_stop_default(dev);
/* Control flows for default traffic can be removed firstly. */
mlx5_traffic_disable(dev);
@@ -1692,6 +1711,9 @@ mlx5_traffic_enable(struct rte_eth_dev *dev)
unsigned int j;
int ret;
+ if (mlx5_flow_is_steering_disabled())
+ return 0;
+
#ifdef HAVE_MLX5_HWS_SUPPORT
if (priv->sh->config.dv_flow_en == 2)
return mlx5_traffic_enable_hws(dev);
@@ -1878,6 +1900,9 @@ mlx5_traffic_disable_legacy(struct rte_eth_dev *dev)
void
mlx5_traffic_disable(struct rte_eth_dev *dev)
{
+ if (mlx5_flow_is_steering_disabled())
+ return;
+
#ifdef HAVE_MLX5_HWS_SUPPORT
struct mlx5_priv *priv = dev->data->dev_private;
@@ -1900,6 +1925,9 @@ mlx5_traffic_disable(struct rte_eth_dev *dev)
int
mlx5_traffic_restart(struct rte_eth_dev *dev)
{
+ if (mlx5_flow_is_steering_disabled())
+ return 0;
+
if (dev->data->dev_started) {
mlx5_traffic_disable(dev);
#ifdef HAVE_MLX5_HWS_SUPPORT
@@ -1915,6 +1943,8 @@ mac_flows_update_needed(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
+ if (mlx5_flow_is_steering_disabled())
+ return false;
if (!dev->data->dev_started)
return false;
if (dev->data->promiscuous)
diff --git a/drivers/net/mlx5/rte_pmd_mlx5.h b/drivers/net/mlx5/rte_pmd_mlx5.h
index da8d4b1c83c..4e253a602ae 100644
--- a/drivers/net/mlx5/rte_pmd_mlx5.h
+++ b/drivers/net/mlx5/rte_pmd_mlx5.h
@@ -551,6 +551,62 @@ __rte_experimental
int
rte_pmd_mlx5_driver_event_cb_unregister(rte_pmd_mlx5_driver_event_callback_t cb);
+/**
+ * Disable flow steering for all mlx5 ports.
+ *
+ * In mlx5 PMD, HW flow rules are generally used in 2 ways:
+ *
+ * - "internal" - to connect HW objects created by mlx5 PMD (e.g. Rx queues)
+ * to datapath, so traffic can be received in user space by DPDK application,
+ * bypassing the kernel driver. Such rules are created implicitly by mlx5 PMD.
+ * - "external" - flow rules created by application explicitly through flow API.
+ *
+ * In mlx5 PMD language, configuring flow rules is known as configuring flow steering.
+ *
+ * If an application wants to use any other library compatible with NVIDIA hardware
+ * to configure flow steering or delegate flow steering to another process,
+ * the application can call this function to disable flow steering globally for all mlx5 ports.
+ *
+ * Information required to configure flow steering in such a way that externally created
+ * flow rules would forward/match traffic to DPDK-managed Rx/Tx queues can be extracted
+ * through #rte_pmd_mlx5_driver_event_cb_register API.
+ *
+ * This function can be called:
+ *
+ * - before or after #rte_eal_init.
+ * - before or after any mlx5 port is probed.
+ *
+ * If this function is called when mlx5 ports (at least one) exist,
+ * then steering will be disabled for all existing mlx5 port.
+ * This will invalidate *ALL* handles to objects return from flow API for these ports
+ * (for example handles to flow rules, indirect actions, template tables).
+ *
+ * This function is lock-free and it is assumed that it won't be called concurrently
+ * with other functions from ethdev API used to configure any of the mlx5 ports.
+ * It is the responsibility of the application to enforce this.
+ */
+__rte_experimental
+void
+rte_pmd_mlx5_disable_steering(void);
+
+/**
+ * Enable flow steering for mlx5 ports.
+ *
+ * This function reverses the effects of #rte_pmd_mlx5_driver_disable_steering.
+ *
+ * It can be called if and only if there are no mlx5 ports known by DPDK,
+ * so in case if #rte_pmd_mlx5_driver_disable_steering was previously called
+ * the application has to remove mlx5 devices, call this function and
+ * re-probe the mlx5 devices.
+ *
+ * @return
+ * - 0 - Flow steering was successfully enabled or it flow steering was never disabled.
+ * - (-EBUSY) - There are mlx5 ports probed and re-enabling steering cannot be done safely.
+ */
+__rte_experimental
+int
+rte_pmd_mlx5_enable_steering(void);
+
#ifdef __cplusplus
}
#endif
--
2.21.0
^ permalink raw reply [flat|nested] 5+ messages in thread