From: Bing Zhao <bingz@nvidia.com>
To: <viacheslavo@nvidia.com>, <matan@nvidia.com>
Cc: <dev@dpdk.org>, <thomas@monjalon.net>, <dsosnowski@nvidia.com>,
<suanmingm@nvidia.com>, <rasland@nvidia.com>
Subject: [PATCH v3 5/5] net/mlx5: use consecutive memory for Tx queue creation
Date: Fri, 27 Jun 2025 19:37:29 +0300 [thread overview]
Message-ID: <20250627163729.50460-6-bingz@nvidia.com> (raw)
In-Reply-To: <20250627163729.50460-1-bingz@nvidia.com>
The queue starting addresses offsets of a umem and doorbell offsets
are already passed to the Devx object creation function.
When the queue length is not zero, it means that the memory was
pre-allocated and the new object creation with consecutive memory
should be enabled.
When destroying the SQ / CQ objects, if it is in consecutive mode,
the umem and MR should not be released and the global resources
should only be released when stopping the device.
Signed-off-by: Bing Zhao <bingz@nvidia.com>
---
drivers/common/mlx5/mlx5_common_devx.c | 160 +++++++++++++++++--------
drivers/common/mlx5/mlx5_common_devx.h | 2 +
2 files changed, 110 insertions(+), 52 deletions(-)
diff --git a/drivers/common/mlx5/mlx5_common_devx.c b/drivers/common/mlx5/mlx5_common_devx.c
index aace5283e7..e237558ec2 100644
--- a/drivers/common/mlx5/mlx5_common_devx.c
+++ b/drivers/common/mlx5/mlx5_common_devx.c
@@ -30,6 +30,8 @@ mlx5_devx_cq_destroy(struct mlx5_devx_cq *cq)
{
if (cq->cq)
claim_zero(mlx5_devx_cmd_destroy(cq->cq));
+ if (cq->consec)
+ return;
if (cq->umem_obj)
claim_zero(mlx5_os_umem_dereg(cq->umem_obj));
if (cq->umem_buf)
@@ -93,6 +95,7 @@ mlx5_devx_cq_create(void *ctx, struct mlx5_devx_cq *cq_obj, uint16_t log_desc_n,
uint32_t eqn;
uint32_t num_of_cqes = RTE_BIT32(log_desc_n);
int ret;
+ uint32_t umem_offset, umem_id;
if (page_size == (size_t)-1 || alignment == (size_t)-1) {
DRV_LOG(ERR, "Failed to get page_size.");
@@ -108,29 +111,44 @@ mlx5_devx_cq_create(void *ctx, struct mlx5_devx_cq *cq_obj, uint16_t log_desc_n,
}
/* Allocate memory buffer for CQEs and doorbell record. */
umem_size = sizeof(struct mlx5_cqe) * num_of_cqes;
- umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE);
- umem_size += MLX5_DBR_SIZE;
- umem_buf = mlx5_malloc_numa_tolerant(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size,
- alignment, socket);
- if (!umem_buf) {
- DRV_LOG(ERR, "Failed to allocate memory for CQ.");
- rte_errno = ENOMEM;
- return -rte_errno;
- }
- /* Register allocated buffer in user space with DevX. */
- umem_obj = mlx5_os_umem_reg(ctx, (void *)(uintptr_t)umem_buf, umem_size,
- IBV_ACCESS_LOCAL_WRITE);
- if (!umem_obj) {
- DRV_LOG(ERR, "Failed to register umem for CQ.");
- rte_errno = errno;
- goto error;
+ if (!attr->q_len) {
+ umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE);
+ umem_size += MLX5_DBR_SIZE;
+ umem_buf = mlx5_malloc_numa_tolerant(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size,
+ alignment, socket);
+ if (!umem_buf) {
+ DRV_LOG(ERR, "Failed to allocate memory for CQ.");
+ rte_errno = ENOMEM;
+ return -rte_errno;
+ }
+ /* Register allocated buffer in user space with DevX. */
+ umem_obj = mlx5_os_umem_reg(ctx, (void *)(uintptr_t)umem_buf, umem_size,
+ IBV_ACCESS_LOCAL_WRITE);
+ if (!umem_obj) {
+ DRV_LOG(ERR, "Failed to register umem for CQ.");
+ rte_errno = errno;
+ goto error;
+ }
+ umem_offset = 0;
+ umem_id = mlx5_os_get_umem_id(umem_obj);
+ } else {
+ if (umem_size != attr->q_len) {
+ DRV_LOG(ERR, "Mismatch between saved length and calc length of CQ %u-%u",
+ umem_size, attr->q_len);
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ umem_buf = attr->umem;
+ umem_offset = attr->q_off;
+ umem_dbrec = attr->db_off;
+ umem_id = mlx5_os_get_umem_id(attr->umem_obj);
}
/* Fill attributes for CQ object creation. */
attr->q_umem_valid = 1;
- attr->q_umem_id = mlx5_os_get_umem_id(umem_obj);
- attr->q_umem_offset = 0;
+ attr->q_umem_id = umem_id;
+ attr->q_umem_offset = umem_offset;
attr->db_umem_valid = 1;
- attr->db_umem_id = attr->q_umem_id;
+ attr->db_umem_id = umem_id;
attr->db_umem_offset = umem_dbrec;
attr->eqn = eqn;
attr->log_cq_size = log_desc_n;
@@ -142,19 +160,29 @@ mlx5_devx_cq_create(void *ctx, struct mlx5_devx_cq *cq_obj, uint16_t log_desc_n,
rte_errno = ENOMEM;
goto error;
}
- cq_obj->umem_buf = umem_buf;
- cq_obj->umem_obj = umem_obj;
+ if (!attr->q_len) {
+ cq_obj->umem_buf = umem_buf;
+ cq_obj->umem_obj = umem_obj;
+ cq_obj->db_rec = RTE_PTR_ADD(cq_obj->umem_buf, umem_dbrec);
+ cq_obj->consec = false;
+ } else {
+ cq_obj->umem_buf = RTE_PTR_ADD(umem_buf, umem_offset);
+ cq_obj->umem_obj = attr->umem_obj;
+ cq_obj->db_rec = RTE_PTR_ADD(umem_buf, umem_dbrec);
+ cq_obj->consec = true;
+ }
cq_obj->cq = cq;
- cq_obj->db_rec = RTE_PTR_ADD(cq_obj->umem_buf, umem_dbrec);
/* Mark all CQEs initially as invalid. */
mlx5_cq_init(cq_obj, num_of_cqes);
return 0;
error:
ret = rte_errno;
- if (umem_obj)
- claim_zero(mlx5_os_umem_dereg(umem_obj));
- if (umem_buf)
- mlx5_free((void *)(uintptr_t)umem_buf);
+ if (!attr->q_len) {
+ if (umem_obj)
+ claim_zero(mlx5_os_umem_dereg(umem_obj));
+ if (umem_buf)
+ mlx5_free((void *)(uintptr_t)umem_buf);
+ }
rte_errno = ret;
return -rte_errno;
}
@@ -171,6 +199,8 @@ mlx5_devx_sq_destroy(struct mlx5_devx_sq *sq)
{
if (sq->sq)
claim_zero(mlx5_devx_cmd_destroy(sq->sq));
+ if (sq->consec)
+ return;
if (sq->umem_obj)
claim_zero(mlx5_os_umem_dereg(sq->umem_obj));
if (sq->umem_buf)
@@ -220,6 +250,7 @@ mlx5_devx_sq_create(void *ctx, struct mlx5_devx_sq *sq_obj, uint16_t log_wqbb_n,
uint32_t umem_size, umem_dbrec;
uint32_t num_of_wqbbs = RTE_BIT32(log_wqbb_n);
int ret;
+ uint32_t umem_offset, umem_id;
if (alignment == (size_t)-1) {
DRV_LOG(ERR, "Failed to get WQE buf alignment.");
@@ -228,30 +259,45 @@ mlx5_devx_sq_create(void *ctx, struct mlx5_devx_sq *sq_obj, uint16_t log_wqbb_n,
}
/* Allocate memory buffer for WQEs and doorbell record. */
umem_size = MLX5_WQE_SIZE * num_of_wqbbs;
- umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE);
- umem_size += MLX5_DBR_SIZE;
- umem_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size,
- alignment, socket);
- if (!umem_buf) {
- DRV_LOG(ERR, "Failed to allocate memory for SQ.");
- rte_errno = ENOMEM;
- return -rte_errno;
- }
- /* Register allocated buffer in user space with DevX. */
- umem_obj = mlx5_os_umem_reg(ctx, (void *)(uintptr_t)umem_buf, umem_size,
- IBV_ACCESS_LOCAL_WRITE);
- if (!umem_obj) {
- DRV_LOG(ERR, "Failed to register umem for SQ.");
- rte_errno = errno;
- goto error;
+ if (!attr->q_len) {
+ umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE);
+ umem_size += MLX5_DBR_SIZE;
+ umem_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size,
+ alignment, socket);
+ if (!umem_buf) {
+ DRV_LOG(ERR, "Failed to allocate memory for SQ.");
+ rte_errno = ENOMEM;
+ return -rte_errno;
+ }
+ /* Register allocated buffer in user space with DevX. */
+ umem_obj = mlx5_os_umem_reg(ctx, (void *)(uintptr_t)umem_buf, umem_size,
+ IBV_ACCESS_LOCAL_WRITE);
+ if (!umem_obj) {
+ DRV_LOG(ERR, "Failed to register umem for SQ.");
+ rte_errno = errno;
+ goto error;
+ }
+ umem_offset = 0;
+ umem_id = mlx5_os_get_umem_id(umem_obj);
+ } else {
+ if (umem_size != attr->q_len) {
+ DRV_LOG(ERR, "Mismatch between saved length and calc length of WQ %u-%u",
+ umem_size, attr->q_len);
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ umem_buf = attr->umem;
+ umem_offset = attr->q_off;
+ umem_dbrec = attr->db_off;
+ umem_id = mlx5_os_get_umem_id(attr->umem_obj);
}
/* Fill attributes for SQ object creation. */
attr->wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC;
attr->wq_attr.wq_umem_valid = 1;
- attr->wq_attr.wq_umem_id = mlx5_os_get_umem_id(umem_obj);
- attr->wq_attr.wq_umem_offset = 0;
+ attr->wq_attr.wq_umem_id = umem_id;
+ attr->wq_attr.wq_umem_offset = umem_offset;
attr->wq_attr.dbr_umem_valid = 1;
- attr->wq_attr.dbr_umem_id = attr->wq_attr.wq_umem_id;
+ attr->wq_attr.dbr_umem_id = umem_id;
attr->wq_attr.dbr_addr = umem_dbrec;
attr->wq_attr.log_wq_stride = rte_log2_u32(MLX5_WQE_SIZE);
attr->wq_attr.log_wq_sz = log_wqbb_n;
@@ -263,17 +309,27 @@ mlx5_devx_sq_create(void *ctx, struct mlx5_devx_sq *sq_obj, uint16_t log_wqbb_n,
rte_errno = ENOMEM;
goto error;
}
- sq_obj->umem_buf = umem_buf;
- sq_obj->umem_obj = umem_obj;
+ if (!attr->q_len) {
+ sq_obj->umem_buf = umem_buf;
+ sq_obj->umem_obj = umem_obj;
+ sq_obj->db_rec = RTE_PTR_ADD(sq_obj->umem_buf, umem_dbrec);
+ sq_obj->consec = false;
+ } else {
+ sq_obj->umem_buf = RTE_PTR_ADD(umem_buf, attr->q_off);
+ sq_obj->umem_obj = attr->umem_obj;
+ sq_obj->db_rec = RTE_PTR_ADD(umem_buf, attr->db_off);
+ sq_obj->consec = true;
+ }
sq_obj->sq = sq;
- sq_obj->db_rec = RTE_PTR_ADD(sq_obj->umem_buf, umem_dbrec);
return 0;
error:
ret = rte_errno;
- if (umem_obj)
- claim_zero(mlx5_os_umem_dereg(umem_obj));
- if (umem_buf)
- mlx5_free((void *)(uintptr_t)umem_buf);
+ if (!attr->q_len) {
+ if (umem_obj)
+ claim_zero(mlx5_os_umem_dereg(umem_obj));
+ if (umem_buf)
+ mlx5_free((void *)(uintptr_t)umem_buf);
+ }
rte_errno = ret;
return -rte_errno;
}
diff --git a/drivers/common/mlx5/mlx5_common_devx.h b/drivers/common/mlx5/mlx5_common_devx.h
index 743f06042c..4cb9111dbb 100644
--- a/drivers/common/mlx5/mlx5_common_devx.h
+++ b/drivers/common/mlx5/mlx5_common_devx.h
@@ -21,6 +21,7 @@ struct mlx5_devx_cq {
volatile struct mlx5_cqe *cqes; /* The CQ ring buffer. */
};
volatile uint32_t *db_rec; /* The CQ doorbell record. */
+ bool consec; /* Using consecutive memory. */
};
/* DevX Send Queue structure. */
@@ -33,6 +34,7 @@ struct mlx5_devx_sq {
volatile struct mlx5_aso_wqe *aso_wqes;
};
volatile uint32_t *db_rec; /* The SQ doorbell record. */
+ bool consec; /* Using consecutive memory. */
};
/* DevX Queue Pair structure. */
--
2.34.1
next prev parent reply other threads:[~2025-06-27 16:38 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20250623173524.128125-1:x-bingz@nvidia.com>
2025-06-23 18:34 ` [PATCH v2 0/3] Use consecutive Tx queues' memory Bing Zhao
2025-06-23 18:34 ` [PATCH v2 1/3] net/mlx5: fix the WQE size calculation for Tx queue Bing Zhao
2025-06-23 18:34 ` [PATCH v2 2/3] net/mlx5: add new devarg for Tx queue consecutive memory Bing Zhao
2025-06-24 12:01 ` Stephen Hemminger
2025-06-26 13:18 ` Bing Zhao
2025-06-26 14:29 ` Stephen Hemminger
2025-06-26 15:21 ` Thomas Monjalon
2025-06-23 18:34 ` [PATCH v2 3/3] net/mlx5: use consecutive memory for all Tx queues Bing Zhao
2025-06-27 16:37 ` [PATCH v3 0/5] Use consecutive Tx queues' memory Bing Zhao
2025-06-27 16:37 ` [PATCH v3 1/5] net/mlx5: add new devarg for Tx queue consecutive memory Bing Zhao
2025-06-27 16:37 ` [PATCH v3 2/5] net/mlx5: calculate the memory length for all Tx queues Bing Zhao
2025-06-27 16:37 ` [PATCH v3 3/5] net/mlx5: allocate and release unique resources for " Bing Zhao
2025-06-27 16:37 ` [PATCH v3 4/5] net/mlx5: pass the information in Tx queue start Bing Zhao
2025-06-27 16:37 ` Bing Zhao [this message]
2025-06-27 8:25 ` [PATCH] net/mlx5: fix the WQE size calculation for Tx queue Bing Zhao
2025-06-27 9:27 ` Thomas Monjalon
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250627163729.50460-6-bingz@nvidia.com \
--to=bingz@nvidia.com \
--cc=dev@dpdk.org \
--cc=dsosnowski@nvidia.com \
--cc=matan@nvidia.com \
--cc=rasland@nvidia.com \
--cc=suanmingm@nvidia.com \
--cc=thomas@monjalon.net \
--cc=viacheslavo@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).