From: Bing Zhao <bingz@nvidia.com>
To: <viacheslavo@nvidia.com>, <matan@nvidia.com>
Cc: <dev@dpdk.org>, <thomas@monjalon.net>, <dsosnowski@nvidia.com>,
<suanmingm@nvidia.com>, <rasland@nvidia.com>
Subject: [PATCH v3 4/5] net/mlx5: pass the information in Tx queue start
Date: Fri, 27 Jun 2025 19:37:28 +0300 [thread overview]
Message-ID: <20250627163729.50460-5-bingz@nvidia.com> (raw)
In-Reply-To: <20250627163729.50460-1-bingz@nvidia.com>
The actual Devx object of SQs and CQs are only created in the
function mlx5_txq_start() in the device stage.
By changing the 1-level iteration to 2-level iterations, the Tx
queue with a big number of queue depth will be set up firstly.
This will help to split the memory from big trunks to small trunks.
In the testing, such assignment will help to improve the performance
a little bit. All the doorbells will be grouped and padded at the end
of the umem area.
The umem object and offsets information are passed to the Devx
creation function for the further usage.
Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
drivers/common/mlx5/mlx5_devx_cmds.h | 10 ++++
drivers/net/mlx5/mlx5_devx.c | 32 ++++++++++-
drivers/net/mlx5/mlx5_trigger.c | 81 ++++++++++++++--------------
3 files changed, 82 insertions(+), 41 deletions(-)
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 6c726a0d46..f5fda02c1e 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -483,6 +483,11 @@ struct mlx5_devx_create_sq_attr {
uint32_t packet_pacing_rate_limit_index:16;
uint32_t tis_lst_sz:16;
uint32_t tis_num:24;
+ uint32_t q_off;
+ void *umem;
+ void *umem_obj;
+ uint32_t q_len;
+ uint32_t db_off;
struct mlx5_devx_wq_attr wq_attr;
};
@@ -514,6 +519,11 @@ struct mlx5_devx_cq_attr {
uint64_t db_umem_offset;
uint32_t eqn;
uint64_t db_addr;
+ void *umem;
+ void *umem_obj;
+ uint32_t q_off;
+ uint32_t q_len;
+ uint32_t db_off;
};
/* Virtq attributes structure, used by VIRTQ operations. */
diff --git a/drivers/net/mlx5/mlx5_devx.c b/drivers/net/mlx5/mlx5_devx.c
index 3d49e096ef..985ffdfd18 100644
--- a/drivers/net/mlx5/mlx5_devx.c
+++ b/drivers/net/mlx5/mlx5_devx.c
@@ -1493,10 +1493,25 @@ mlx5_txq_create_devx_sq_resources(struct rte_eth_dev *dev, uint16_t idx,
mlx5_ts_format_conv(cdev->config.hca_attr.sq_ts_format),
.tis_num = mlx5_get_txq_tis_num(dev, idx),
};
+ uint32_t db_start = priv->consec_tx_mem.sq_total_size + priv->consec_tx_mem.cq_total_size;
+ uint32_t act_sq_len, alignment;
+ int ret;
/* Create Send Queue object with DevX. */
- return mlx5_devx_sq_create(cdev->ctx, &txq_obj->sq_obj,
- log_desc_n, &sq_attr, priv->sh->numa_node);
+ if (priv->sh->config.txq_mem_algn) {
+ alignment = RTE_BIT32(priv->sh->config.txq_mem_algn);
+ sq_attr.umem = priv->consec_tx_mem.umem;
+ sq_attr.umem_obj = priv->consec_tx_mem.umem_obj;
+ act_sq_len = RTE_ALIGN(txq_data->sq_mem_len, alignment);
+ sq_attr.q_off = priv->consec_tx_mem.sq_cur_off;
+ sq_attr.db_off = db_start + (2 * idx) * MLX5_DBR_SIZE;
+ sq_attr.q_len = txq_data->sq_mem_len;
+ }
+ ret = mlx5_devx_sq_create(cdev->ctx, &txq_obj->sq_obj,
+ log_desc_n, &sq_attr, priv->sh->numa_node);
+ if (!ret)
+ priv->consec_tx_mem.sq_cur_off += act_sq_len;
+ return ret;
}
#endif
@@ -1536,6 +1551,8 @@ mlx5_txq_devx_obj_new(struct rte_eth_dev *dev, uint16_t idx)
uint32_t cqe_n, log_desc_n;
uint32_t wqe_n, wqe_size;
int ret = 0;
+ uint32_t db_start = priv->consec_tx_mem.sq_total_size + priv->consec_tx_mem.cq_total_size;
+ uint32_t act_cq_len, alignment;
MLX5_ASSERT(txq_data);
MLX5_ASSERT(txq_obj);
@@ -1557,6 +1574,15 @@ mlx5_txq_devx_obj_new(struct rte_eth_dev *dev, uint16_t idx)
rte_errno = EINVAL;
return 0;
}
+ if (priv->sh->config.txq_mem_algn) {
+ alignment = RTE_BIT32(priv->sh->config.txq_mem_algn);
+ cq_attr.umem = priv->consec_tx_mem.umem;
+ cq_attr.umem_obj = priv->consec_tx_mem.umem_obj;
+ act_cq_len = RTE_ALIGN(txq_data->cq_mem_len, alignment);
+ cq_attr.q_off = priv->consec_tx_mem.cq_cur_off;
+ cq_attr.db_off = db_start + (2 * idx + 1) * MLX5_DBR_SIZE;
+ cq_attr.q_len = txq_data->cq_mem_len;
+ }
/* Create completion queue object with DevX. */
ret = mlx5_devx_cq_create(sh->cdev->ctx, &txq_obj->cq_obj, log_desc_n,
&cq_attr, priv->sh->numa_node);
@@ -1641,6 +1667,8 @@ mlx5_txq_devx_obj_new(struct rte_eth_dev *dev, uint16_t idx)
#endif
txq_ctrl->uar_mmap_offset =
mlx5_os_get_devx_uar_mmap_offset(sh->tx_uar.obj);
+ if (priv->sh->config.txq_mem_algn)
+ priv->consec_tx_mem.cq_cur_off += act_cq_len;
ppriv->uar_table[txq_data->idx] = sh->tx_uar.bf_db;
dev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;
return 0;
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 0fdf66d696..80ffe88120 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -51,52 +51,55 @@ static int
mlx5_txq_start(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
- unsigned int i;
+ uint32_t log_max_wqe = log2above(mlx5_dev_get_max_wq_size(priv->sh));
+ uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
+ unsigned int i, cnt;
int ret;
- for (i = 0; i != priv->txqs_n; ++i) {
- struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
- struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
- uint32_t flags = MLX5_MEM_RTE | MLX5_MEM_ZERO;
+ for (cnt = log_max_wqe; cnt > 0; cnt -= 1) {
+ for (i = 0; i != priv->txqs_n; ++i) {
+ struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
+ struct mlx5_txq_data *txq_data = &txq_ctrl->txq;
- if (!txq_ctrl)
- continue;
- if (!txq_ctrl->is_hairpin)
- txq_alloc_elts(txq_ctrl);
- MLX5_ASSERT(!txq_ctrl->obj);
- txq_ctrl->obj = mlx5_malloc_numa_tolerant(flags, sizeof(struct mlx5_txq_obj),
- 0, txq_ctrl->socket);
- if (!txq_ctrl->obj) {
- DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
- "memory resources.", dev->data->port_id,
- txq_data->idx);
- rte_errno = ENOMEM;
- goto error;
- }
- ret = priv->obj_ops.txq_obj_new(dev, i);
- if (ret < 0) {
- mlx5_free(txq_ctrl->obj);
- txq_ctrl->obj = NULL;
- goto error;
- }
- if (!txq_ctrl->is_hairpin) {
- size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
-
- txq_data->fcqs = mlx5_malloc_numa_tolerant(flags, size,
- RTE_CACHE_LINE_SIZE,
- txq_ctrl->socket);
- if (!txq_data->fcqs) {
- DRV_LOG(ERR, "Port %u Tx queue %u cannot "
- "allocate memory (FCQ).",
- dev->data->port_id, i);
+ if (!txq_ctrl || txq_data->elts_n != cnt)
+ continue;
+ if (!txq_ctrl->is_hairpin)
+ txq_alloc_elts(txq_ctrl);
+ MLX5_ASSERT(!txq_ctrl->obj);
+ txq_ctrl->obj = mlx5_malloc_numa_tolerant(flags, sizeof(struct mlx5_txq_obj),
+ 0, txq_ctrl->socket);
+ if (!txq_ctrl->obj) {
+ DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
+ "memory resources.", dev->data->port_id,
+ txq_data->idx);
rte_errno = ENOMEM;
goto error;
}
- }
- DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
- dev->data->port_id, i, (void *)&txq_ctrl->obj);
- LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
+ ret = priv->obj_ops.txq_obj_new(dev, i);
+ if (ret < 0) {
+ mlx5_free(txq_ctrl->obj);
+ txq_ctrl->obj = NULL;
+ goto error;
+ }
+ if (!txq_ctrl->is_hairpin) {
+ size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
+
+ txq_data->fcqs = mlx5_malloc_numa_tolerant(flags, size,
+ RTE_CACHE_LINE_SIZE,
+ txq_ctrl->socket);
+ if (!txq_data->fcqs) {
+ DRV_LOG(ERR, "Port %u Tx queue %u cannot "
+ "allocate memory (FCQ).",
+ dev->data->port_id, i);
+ rte_errno = ENOMEM;
+ goto error;
+ }
+ }
+ DRV_LOG(DEBUG, "Port %u txq %u updated with %p.",
+ dev->data->port_id, i, (void *)&txq_ctrl->obj);
+ LIST_INSERT_HEAD(&priv->txqsobj, txq_ctrl->obj, next);
}
+}
return 0;
error:
ret = rte_errno; /* Save rte_errno before cleanup. */
--
2.34.1
next prev parent reply other threads:[~2025-06-27 16:38 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20250623173524.128125-1:x-bingz@nvidia.com>
2025-06-23 18:34 ` [PATCH v2 0/3] Use consecutive Tx queues' memory Bing Zhao
2025-06-23 18:34 ` [PATCH v2 1/3] net/mlx5: fix the WQE size calculation for Tx queue Bing Zhao
2025-06-23 18:34 ` [PATCH v2 2/3] net/mlx5: add new devarg for Tx queue consecutive memory Bing Zhao
2025-06-24 12:01 ` Stephen Hemminger
2025-06-26 13:18 ` Bing Zhao
2025-06-26 14:29 ` Stephen Hemminger
2025-06-26 15:21 ` Thomas Monjalon
2025-06-23 18:34 ` [PATCH v2 3/3] net/mlx5: use consecutive memory for all Tx queues Bing Zhao
2025-06-27 16:37 ` [PATCH v3 0/5] Use consecutive Tx queues' memory Bing Zhao
2025-06-27 16:37 ` [PATCH v3 1/5] net/mlx5: add new devarg for Tx queue consecutive memory Bing Zhao
2025-06-27 16:37 ` [PATCH v3 2/5] net/mlx5: calculate the memory length for all Tx queues Bing Zhao
2025-06-27 16:37 ` [PATCH v3 3/5] net/mlx5: allocate and release unique resources for " Bing Zhao
2025-06-27 16:37 ` Bing Zhao [this message]
2025-06-27 16:37 ` [PATCH v3 5/5] net/mlx5: use consecutive memory for Tx queue creation Bing Zhao
2025-06-27 8:25 ` [PATCH] net/mlx5: fix the WQE size calculation for Tx queue Bing Zhao
2025-06-27 9:27 ` Thomas Monjalon
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250627163729.50460-5-bingz@nvidia.com \
--to=bingz@nvidia.com \
--cc=dev@dpdk.org \
--cc=dsosnowski@nvidia.com \
--cc=matan@nvidia.com \
--cc=rasland@nvidia.com \
--cc=suanmingm@nvidia.com \
--cc=thomas@monjalon.net \
--cc=viacheslavo@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).