* [PATCH] net/mlx5: mlx5 malloc NUMA fallback
@ 2025-06-19 7:01 Maayan Kashani
2025-06-25 15:00 ` [PATCH v2] net/mlx5: support NUMA node fallback Maayan Kashani
0 siblings, 1 reply; 2+ messages in thread
From: Maayan Kashani @ 2025-06-19 7:01 UTC (permalink / raw)
To: dev
Cc: mkashani, dsosnowski, rasland, Viacheslav Ovsiienko, Bing Zhao,
Ori Kam, Suanming Mou, Matan Azrad
If mlx5 malloc with specified socket failed,
malloc function would return an error.
Cross NUMA support means that when there is no
memory on the local NUMA, use other available
NUMA memory for port init and start.
To support cross NUMA,
added a flag to enable NUMA fallback to any NUMA.
Fallback will be enabled only if this flag is set.
Add a NUMA tolerant wraper to mlx5 malloc calls in
ipool and devx/memory region initializations
to support cross NUMA in device probing and port start stage.
Signed-off-by: Maayan Kashani <mkashani@nvidia.com>
---
drivers/common/mlx5/mlx5_common_devx.c | 8 ++---
drivers/common/mlx5/mlx5_common_mr.c | 6 ++--
drivers/common/mlx5/mlx5_devx_cmds.c | 4 +--
drivers/common/mlx5/mlx5_malloc.c | 20 +++++++++--
drivers/common/mlx5/mlx5_malloc.h | 20 +++++++++++
drivers/net/mlx5/mlx5.c | 4 +--
drivers/net/mlx5/mlx5_devx.c | 6 ++--
drivers/net/mlx5/mlx5_flow_hw.c | 21 +++++++-----
drivers/net/mlx5/mlx5_rxq.c | 2 +-
drivers/net/mlx5/mlx5_trigger.c | 16 ++++-----
drivers/net/mlx5/mlx5_txq.c | 4 +--
drivers/net/mlx5/mlx5_utils.c | 46 ++++++++++++++++++++------
12 files changed, 111 insertions(+), 46 deletions(-)
diff --git a/drivers/common/mlx5/mlx5_common_devx.c b/drivers/common/mlx5/mlx5_common_devx.c
index cd1292b92ba..aace5283e7d 100644
--- a/drivers/common/mlx5/mlx5_common_devx.c
+++ b/drivers/common/mlx5/mlx5_common_devx.c
@@ -110,8 +110,8 @@ mlx5_devx_cq_create(void *ctx, struct mlx5_devx_cq *cq_obj, uint16_t log_desc_n,
umem_size = sizeof(struct mlx5_cqe) * num_of_cqes;
umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE);
umem_size += MLX5_DBR_SIZE;
- umem_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size,
- alignment, socket);
+ umem_buf = mlx5_malloc_numa_tolerant(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size,
+ alignment, socket);
if (!umem_buf) {
DRV_LOG(ERR, "Failed to allocate memory for CQ.");
rte_errno = ENOMEM;
@@ -484,8 +484,8 @@ mlx5_devx_wq_init(void *ctx, uint32_t wqe_size, uint16_t log_wqbb_n, int socket,
umem_size = wqe_size * (1 << log_wqbb_n);
umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE);
umem_size += MLX5_DBR_SIZE;
- umem_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size,
- alignment, socket);
+ umem_buf = mlx5_malloc_numa_tolerant(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size,
+ alignment, socket);
if (!umem_buf) {
DRV_LOG(ERR, "Failed to allocate memory for RQ.");
rte_errno = ENOMEM;
diff --git a/drivers/common/mlx5/mlx5_common_mr.c b/drivers/common/mlx5/mlx5_common_mr.c
index c41ffff2d5a..62cbc9bc001 100644
--- a/drivers/common/mlx5/mlx5_common_mr.c
+++ b/drivers/common/mlx5/mlx5_common_mr.c
@@ -225,9 +225,9 @@ mlx5_mr_btree_init(struct mlx5_mr_btree *bt, int n, int socket)
}
MLX5_ASSERT(!bt->table && !bt->size);
memset(bt, 0, sizeof(*bt));
- bt->table = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
- sizeof(struct mr_cache_entry) * n,
- 0, socket);
+ bt->table = mlx5_malloc_numa_tolerant(MLX5_MEM_RTE | MLX5_MEM_ZERO,
+ sizeof(struct mr_cache_entry) * n,
+ 0, socket);
if (bt->table == NULL) {
rte_errno = ENOMEM;
DRV_LOG(DEBUG,
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index 39a4298b58c..15ca63fba9f 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -1537,7 +1537,7 @@ mlx5_devx_cmd_create_rq(void *ctx,
struct mlx5_devx_wq_attr *wq_attr;
struct mlx5_devx_obj *rq = NULL;
- rq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rq), 0, socket);
+ rq = mlx5_malloc_numa_tolerant(MLX5_MEM_ZERO, sizeof(*rq), 0, socket);
if (!rq) {
DRV_LOG(ERR, "Failed to allocate RQ data");
rte_errno = ENOMEM;
@@ -1680,7 +1680,7 @@ mlx5_devx_cmd_create_rmp(void *ctx,
struct mlx5_devx_wq_attr *wq_attr;
struct mlx5_devx_obj *rmp = NULL;
- rmp = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rmp), 0, socket);
+ rmp = mlx5_malloc_numa_tolerant(MLX5_MEM_ZERO, sizeof(*rmp), 0, socket);
if (!rmp) {
DRV_LOG(ERR, "Failed to allocate RMP data");
rte_errno = ENOMEM;
diff --git a/drivers/common/mlx5/mlx5_malloc.c b/drivers/common/mlx5/mlx5_malloc.c
index d56b4fb5a89..159182ee3cd 100644
--- a/drivers/common/mlx5/mlx5_malloc.c
+++ b/drivers/common/mlx5/mlx5_malloc.c
@@ -162,6 +162,13 @@ mlx5_alloc_align(size_t size, unsigned int align, unsigned int zero)
return buf;
}
+static void *
+mlx5_malloc_socket_internal(size_t size, unsigned int align, int socket, bool zero)
+{
+ return zero ? rte_zmalloc_socket(NULL, size, align, socket) :
+ rte_malloc_socket(NULL, size, align, socket);
+}
+
RTE_EXPORT_INTERNAL_SYMBOL(mlx5_malloc)
void *
mlx5_malloc(uint32_t flags, size_t size, unsigned int align, int socket)
@@ -181,9 +188,18 @@ mlx5_malloc(uint32_t flags, size_t size, unsigned int align, int socket)
rte_mem = mlx5_sys_mem.enable ? false : true;
if (rte_mem) {
if (flags & MLX5_MEM_ZERO)
- addr = rte_zmalloc_socket(NULL, size, align, socket);
+ addr = mlx5_malloc_socket_internal(size, align, socket, true);
else
- addr = rte_malloc_socket(NULL, size, align, socket);
+ addr = mlx5_malloc_socket_internal(size, align, socket, false);
+ if (addr == NULL && socket != SOCKET_ID_ANY && (flags & MLX5_NUMA_TOLERANT)) {
+ size_t alloc_size = size;
+ addr = mlx5_malloc_socket_internal(size, align, SOCKET_ID_ANY,
+ !!(flags & MLX5_MEM_ZERO));
+ if (addr) {
+ (DRV_LOG(WARNING, "Allocated %p (size %zu socket %d) through NUMA tolerant fallback",
+ (addr), (alloc_size), (socket)));
+ }
+ }
mlx5_mem_update_msl(addr);
#ifdef RTE_LIBRTE_MLX5_DEBUG
if (addr)
diff --git a/drivers/common/mlx5/mlx5_malloc.h b/drivers/common/mlx5/mlx5_malloc.h
index 9086a4f3f22..545a1124c24 100644
--- a/drivers/common/mlx5/mlx5_malloc.h
+++ b/drivers/common/mlx5/mlx5_malloc.h
@@ -28,6 +28,8 @@ enum mlx5_mem_flags {
/* Memory should be allocated from rte hugepage. */
MLX5_MEM_ZERO = 1 << 2,
/* Memory should be cleared to zero. */
+ MLX5_NUMA_TOLERANT = 1 << 3,
+ /* Fallback to any NUMA if the memory allocation fails. */
};
/**
@@ -101,6 +103,24 @@ void *mlx5_realloc(void *addr, uint32_t flags, size_t size, unsigned int align,
__rte_internal
void mlx5_free(void *addr);
+#if defined(RTE_TOOLCHAIN_GCC) || defined(RTE_TOOLCHAIN_CLANG)
+#define mlx5_malloc_numa_tolerant(flags, size, align, socket) (__extension__ ({ \
+ void *mem = mlx5_malloc((uint32_t)(flags), (size_t)(size), (align), (socket)); \
+ if (mem == NULL) { \
+ mem = mlx5_malloc((uint32_t)(flags), (size_t)(size), \
+ (align), SOCKET_ID_ANY); \
+ if (mem != NULL) { \
+ DRV_LOG(WARNING, \
+ "Allocated %p (size %zu socket %d) through NUMA tolerant fallback",\
+ (mem), ((size_t)(size)), (socket)); \
+ } \
+ } \
+ mem; \
+ }))
+#else
+#define mlx5_malloc_numa_tolerant(flags, size, align, socket)
+ (mlx5_malloc((flags) | MLX5_NUMA_TOLERANT, (size), (align), (socket)));
+#endif
#ifdef __cplusplus
}
#endif
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index b4bd43aae25..29700524458 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -2271,8 +2271,8 @@ mlx5_proc_priv_init(struct rte_eth_dev *dev)
*/
ppriv_size = sizeof(struct mlx5_proc_priv) +
priv->txqs_n * sizeof(struct mlx5_uar_data);
- ppriv = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, ppriv_size,
- RTE_CACHE_LINE_SIZE, dev->device->numa_node);
+ ppriv = mlx5_malloc_numa_tolerant(MLX5_MEM_RTE | MLX5_MEM_ZERO, ppriv_size,
+ RTE_CACHE_LINE_SIZE, dev->device->numa_node);
if (!ppriv) {
rte_errno = ENOMEM;
return -rte_errno;
diff --git a/drivers/net/mlx5/mlx5_devx.c b/drivers/net/mlx5/mlx5_devx.c
index 9711746edba..41e4142c1f8 100644
--- a/drivers/net/mlx5/mlx5_devx.c
+++ b/drivers/net/mlx5/mlx5_devx.c
@@ -1095,14 +1095,14 @@ mlx5_rxq_devx_obj_drop_create(struct rte_eth_dev *dev)
* They are required to hold pointers for cleanup
* and are only accessible via drop queue DevX objects.
*/
- rxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq), 0, socket_id);
+ rxq = mlx5_malloc_numa_tolerant(MLX5_MEM_ZERO, sizeof(*rxq), 0, socket_id);
if (rxq == NULL) {
DRV_LOG(ERR, "Port %u could not allocate drop queue private",
dev->data->port_id);
rte_errno = ENOMEM;
goto error;
}
- rxq_ctrl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq_ctrl),
+ rxq_ctrl = mlx5_malloc_numa_tolerant(MLX5_MEM_ZERO, sizeof(*rxq_ctrl),
0, socket_id);
if (rxq_ctrl == NULL) {
DRV_LOG(ERR, "Port %u could not allocate drop queue control",
@@ -1110,7 +1110,7 @@ mlx5_rxq_devx_obj_drop_create(struct rte_eth_dev *dev)
rte_errno = ENOMEM;
goto error;
}
- rxq_obj = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq_obj), 0, socket_id);
+ rxq_obj = mlx5_malloc_numa_tolerant(MLX5_MEM_ZERO, sizeof(*rxq_obj), 0, socket_id);
if (rxq_obj == NULL) {
DRV_LOG(ERR, "Port %u could not allocate drop queue object",
dev->data->port_id);
diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index e26093522fb..c6e732df77f 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -5069,7 +5069,8 @@ flow_hw_table_create(struct rte_eth_dev *dev,
tbl_mem_size = sizeof(*tbl);
tbl_mem_size += nb_action_templates * priv->nb_queue * sizeof(tbl->rule_acts[0]);
/* Allocate the table memory. */
- tbl = mlx5_malloc(MLX5_MEM_ZERO, tbl_mem_size, RTE_CACHE_LINE_SIZE, rte_socket_id());
+ tbl = mlx5_malloc_numa_tolerant(MLX5_MEM_ZERO, tbl_mem_size,
+ RTE_CACHE_LINE_SIZE, rte_socket_id());
if (!tbl)
goto error;
tbl->cfg = *table_cfg;
@@ -5078,8 +5079,10 @@ flow_hw_table_create(struct rte_eth_dev *dev,
if (!tbl->flow)
goto error;
/* Allocate table of auxiliary flow rule structs. */
- tbl->flow_aux = mlx5_malloc(MLX5_MEM_ZERO, sizeof(struct rte_flow_hw_aux) * nb_flows,
- RTE_CACHE_LINE_SIZE, rte_dev_numa_node(dev->device));
+ tbl->flow_aux = mlx5_malloc_numa_tolerant(MLX5_MEM_ZERO,
+ sizeof(struct rte_flow_hw_aux) * nb_flows,
+ RTE_CACHE_LINE_SIZE,
+ rte_dev_numa_node(dev->device));
if (!tbl->flow_aux)
goto error;
/* Register the flow group. */
@@ -8031,7 +8034,7 @@ __flow_hw_actions_template_create(struct rte_eth_dev *dev,
if (orig_act_len <= 0)
return NULL;
len += RTE_ALIGN(orig_act_len, 16);
- at = mlx5_malloc(MLX5_MEM_ZERO, len + sizeof(*at),
+ at = mlx5_malloc_numa_tolerant(MLX5_MEM_ZERO, len + sizeof(*at),
RTE_CACHE_LINE_SIZE, rte_socket_id());
if (!at) {
rte_flow_error_set(error, ENOMEM,
@@ -8200,7 +8203,7 @@ flow_hw_prepend_item(const struct rte_flow_item *items,
/* Allocate new array of items. */
size = sizeof(*copied_items) * (nb_items + 1);
- copied_items = mlx5_malloc(MLX5_MEM_ZERO, size, 0, rte_socket_id());
+ copied_items = mlx5_malloc_numa_tolerant(MLX5_MEM_ZERO, size, 0, rte_socket_id());
if (!copied_items) {
rte_flow_error_set(error, ENOMEM,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
@@ -9017,7 +9020,7 @@ flow_hw_pattern_template_create(struct rte_eth_dev *dev,
tmpl_items = items;
}
setup_pattern_template:
- it = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*it), 0, rte_socket_id());
+ it = mlx5_malloc_numa_tolerant(MLX5_MEM_ZERO, sizeof(*it), 0, rte_socket_id());
if (!it) {
rte_flow_error_set(error, ENOMEM,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
@@ -9037,7 +9040,8 @@ flow_hw_pattern_template_create(struct rte_eth_dev *dev,
goto error;
}
it_items_size = RTE_ALIGN(it_items_size, 16);
- it->items = mlx5_malloc(MLX5_MEM_ZERO, it_items_size, 0, rte_dev_numa_node(dev->device));
+ it->items = mlx5_malloc_numa_tolerant(MLX5_MEM_ZERO, it_items_size, 0,
+ rte_dev_numa_node(dev->device));
if (it->items == NULL) {
rte_flow_error_set(error, ENOMEM,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
@@ -11441,7 +11445,8 @@ flow_hw_create_ctrl_rx_tables(struct rte_eth_dev *dev)
int ret;
MLX5_ASSERT(!priv->hw_ctrl_rx);
- priv->hw_ctrl_rx = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*priv->hw_ctrl_rx),
+ priv->hw_ctrl_rx = mlx5_malloc_numa_tolerant(MLX5_MEM_ZERO,
+ sizeof(*priv->hw_ctrl_rx),
RTE_CACHE_LINE_SIZE, rte_socket_id());
if (!priv->hw_ctrl_rx) {
DRV_LOG(ERR, "Failed to allocate memory for Rx control flow tables");
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index b676e5394b0..628af59fcc7 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1793,7 +1793,7 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
desc >>= mprq_log_actual_stride_num;
alloc_size += desc * sizeof(struct mlx5_mprq_buf *);
}
- tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, alloc_size, 0, socket);
+ tmpl = mlx5_malloc_numa_tolerant(MLX5_MEM_RTE | MLX5_MEM_ZERO, alloc_size, 0, socket);
if (!tmpl) {
rte_errno = ENOMEM;
return NULL;
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 485984f9b06..f33cd86e609 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -64,8 +64,8 @@ mlx5_txq_start(struct rte_eth_dev *dev)
if (!txq_ctrl->is_hairpin)
txq_alloc_elts(txq_ctrl);
MLX5_ASSERT(!txq_ctrl->obj);
- txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
- 0, txq_ctrl->socket);
+ txq_ctrl->obj = mlx5_malloc_numa_tolerant(flags, sizeof(struct mlx5_txq_obj),
+ 0, txq_ctrl->socket);
if (!txq_ctrl->obj) {
DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
"memory resources.", dev->data->port_id,
@@ -82,9 +82,9 @@ mlx5_txq_start(struct rte_eth_dev *dev)
if (!txq_ctrl->is_hairpin) {
size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
- txq_data->fcqs = mlx5_malloc(flags, size,
- RTE_CACHE_LINE_SIZE,
- txq_ctrl->socket);
+ txq_data->fcqs = mlx5_malloc_numa_tolerant(flags, size,
+ RTE_CACHE_LINE_SIZE,
+ txq_ctrl->socket);
if (!txq_data->fcqs) {
DRV_LOG(ERR, "Port %u Tx queue %u cannot "
"allocate memory (FCQ).",
@@ -182,9 +182,9 @@ mlx5_rxq_ctrl_prepare(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
return ret;
}
MLX5_ASSERT(!rxq_ctrl->obj);
- rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
- sizeof(*rxq_ctrl->obj), 0,
- rxq_ctrl->socket);
+ rxq_ctrl->obj = mlx5_malloc_numa_tolerant(MLX5_MEM_RTE | MLX5_MEM_ZERO,
+ sizeof(*rxq_ctrl->obj), 0,
+ rxq_ctrl->socket);
if (!rxq_ctrl->obj) {
DRV_LOG(ERR, "Port %u Rx queue %u can't allocate resources.",
dev->data->port_id, idx);
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 5fee5bc4e87..fd9c477aa9f 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -1056,8 +1056,8 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
struct mlx5_txq_ctrl *tmpl;
uint16_t max_wqe;
- tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl) +
- desc * sizeof(struct rte_mbuf *), 0, socket);
+ tmpl = mlx5_malloc_numa_tolerant(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl) +
+ desc * sizeof(struct rte_mbuf *), 0, socket);
if (!tmpl) {
rte_errno = ENOMEM;
return NULL;
diff --git a/drivers/net/mlx5/mlx5_utils.c b/drivers/net/mlx5/mlx5_utils.c
index f8cd7bc0439..5752efa108f 100644
--- a/drivers/net/mlx5/mlx5_utils.c
+++ b/drivers/net/mlx5/mlx5_utils.c
@@ -10,6 +10,29 @@
/********************* Indexed pool **********************/
+#if defined(RTE_TOOLCHAIN_GCC) || defined(RTE_TOOLCHAIN_CLANG)
+#define pool_malloc(pool, flags, size, align, socket) (__extension__ ({ \
+ struct mlx5_indexed_pool *p = (struct mlx5_indexed_pool *)(pool); \
+ uint32_t f = (uint32_t)(flags); \
+ size_t s = (size_t)(size); \
+ uint32_t a = (uint32_t)(align); \
+ int so = (int)(socket); \
+ void *mem = p->cfg.malloc(f, s, a, so); \
+ if (mem == NULL) { \
+ mem = p->cfg.malloc(f, s, a, SOCKET_ID_ANY); \
+ if (mem) { \
+ DRV_LOG(WARNING, \
+ "Allocated %p (size %zu socket %d) through NUMA tolerant fallback", \
+ mem, s, so); \
+ } \
+ } \
+ mem; \
+}))
+#else
+#define pool_malloc(pool, flags, size, align, socket)
+ (pool)->cfg.malloc((uint32_t)(flags) | NUMA_TOLERANT, (size), (align), (socket));
+#endif
+
int mlx5_logtype_ipool;
/* Initialize driver log type. */
@@ -149,7 +172,7 @@ mlx5_ipool_grow(struct mlx5_indexed_pool *pool)
int n_grow = pool->n_trunk_valid ? pool->n_trunk :
RTE_CACHE_LINE_SIZE / sizeof(void *);
- p = pool->cfg.malloc(0, (pool->n_trunk_valid + n_grow) *
+ p = pool_malloc(pool, MLX5_MEM_ZERO, (pool->n_trunk_valid + n_grow) *
sizeof(struct mlx5_indexed_trunk *),
RTE_CACHE_LINE_SIZE, rte_socket_id());
if (!p)
@@ -179,7 +202,7 @@ mlx5_ipool_grow(struct mlx5_indexed_pool *pool)
/* rte_bitmap requires memory cacheline aligned. */
trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
trunk_size += bmp_size;
- trunk = pool->cfg.malloc(0, trunk_size,
+ trunk = pool_malloc(pool, MLX5_MEM_ZERO, trunk_size,
RTE_CACHE_LINE_SIZE, rte_socket_id());
if (!trunk)
return -ENOMEM;
@@ -253,9 +276,10 @@ mlx5_ipool_grow_bmp(struct mlx5_indexed_pool *pool, uint32_t new_size)
pool->cache_validator.bmp_size = new_size;
bmp_mem_size = rte_bitmap_get_memory_footprint(new_size);
- pool->cache_validator.bmp_mem = pool->cfg.malloc(MLX5_MEM_ZERO, bmp_mem_size,
- RTE_CACHE_LINE_SIZE,
- rte_socket_id());
+ pool->cache_validator.bmp_mem = pool_malloc(pool, MLX5_MEM_ZERO,
+ bmp_mem_size,
+ RTE_CACHE_LINE_SIZE,
+ rte_socket_id());
if (unlikely(!pool->cache_validator.bmp_mem)) {
DRV_LOG_IPOOL(ERR, "Unable to allocate memory for a new bitmap");
return;
@@ -343,7 +367,7 @@ mlx5_ipool_allocate_from_global(struct mlx5_indexed_pool *pool, int cidx)
RTE_CACHE_LINE_SIZE / sizeof(void *);
cur_max_idx = mlx5_trunk_idx_offset_get(pool, trunk_n + n_grow);
/* Resize the trunk array. */
- p = pool->cfg.malloc(0, ((trunk_idx + n_grow) *
+ p = pool_malloc(pool, MLX5_MEM_ZERO, ((trunk_idx + n_grow) *
sizeof(struct mlx5_indexed_trunk *)) +
(cur_max_idx * sizeof(uint32_t)) + sizeof(*p),
RTE_CACHE_LINE_SIZE, rte_socket_id());
@@ -365,7 +389,7 @@ mlx5_ipool_allocate_from_global(struct mlx5_indexed_pool *pool, int cidx)
trunk_size = sizeof(*trunk);
data_size = mlx5_trunk_size_get(pool, trunk_idx);
trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
- trunk = pool->cfg.malloc(0, trunk_size,
+ trunk = pool_malloc(pool, MLX5_MEM_ZERO, trunk_size,
RTE_CACHE_LINE_SIZE, rte_socket_id());
if (unlikely(!trunk)) {
pool->cfg.free(p);
@@ -429,7 +453,7 @@ _mlx5_ipool_get_cache(struct mlx5_indexed_pool *pool, int cidx, uint32_t idx)
MLX5_ASSERT(idx);
if (unlikely(!pool->cache[cidx])) {
- pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
+ pool->cache[cidx] = pool_malloc(pool, MLX5_MEM_ZERO,
sizeof(struct mlx5_ipool_per_lcore) +
(pool->cfg.per_core_cache * sizeof(uint32_t)),
RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
@@ -515,7 +539,7 @@ _mlx5_ipool_malloc_cache(struct mlx5_indexed_pool *pool, int cidx,
uint32_t *idx)
{
if (unlikely(!pool->cache[cidx])) {
- pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
+ pool->cache[cidx] = pool_malloc(pool, MLX5_MEM_ZERO,
sizeof(struct mlx5_ipool_per_lcore) +
(pool->cfg.per_core_cache * sizeof(uint32_t)),
RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
@@ -577,7 +601,7 @@ _mlx5_ipool_free_cache(struct mlx5_indexed_pool *pool, int cidx, uint32_t idx)
* case check if local cache on core B was allocated before.
*/
if (unlikely(!pool->cache[cidx])) {
- pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
+ pool->cache[cidx] = pool_malloc(pool, MLX5_MEM_ZERO,
sizeof(struct mlx5_ipool_per_lcore) +
(pool->cfg.per_core_cache * sizeof(uint32_t)),
RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
@@ -881,7 +905,7 @@ mlx5_ipool_flush_cache(struct mlx5_indexed_pool *pool)
/* Reset bmp. */
bmp_num = mlx5_trunk_idx_offset_get(pool, gc->n_trunk_valid);
mem_size = rte_bitmap_get_memory_footprint(bmp_num);
- pool->bmp_mem = pool->cfg.malloc(MLX5_MEM_ZERO, mem_size,
+ pool->bmp_mem = pool_malloc(pool, MLX5_MEM_ZERO, mem_size,
RTE_CACHE_LINE_SIZE, rte_socket_id());
if (!pool->bmp_mem) {
DRV_LOG(ERR, "Ipool bitmap mem allocate failed.\n");
--
2.21.0
^ permalink raw reply [flat|nested] 2+ messages in thread
* [PATCH v2] net/mlx5: support NUMA node fallback
2025-06-19 7:01 [PATCH] net/mlx5: mlx5 malloc NUMA fallback Maayan Kashani
@ 2025-06-25 15:00 ` Maayan Kashani
0 siblings, 0 replies; 2+ messages in thread
From: Maayan Kashani @ 2025-06-25 15:00 UTC (permalink / raw)
To: dev
Cc: mkashani, dsosnowski, rasland, Viacheslav Ovsiienko, Bing Zhao,
Ori Kam, Suanming Mou, Matan Azrad
Cross NUMA support means that when there is no
memory on the local NUMA, use other available
NUMA memory for port initialization and start.
If mlx5 malloc with specified socket failed,
malloc function would return an error.
To support cross NUMA,
NUMA tolerant wrapper will be used on supported toolchains,
and it will log the exact place where fallback allocation happened.
NUMA tolerant flags in mlx5_malloc were added to support cases
where the toolchain does not support statement expressions.
Add a NUMA tolerant wrapper to mlx5 malloc calls in
ipool and devx/memory region initializations
to support cross NUMA in device probing and port start stage.
For Tx/Rx initializations, use socket-num given by the user
and when SOCKET_ID_ANY is passed, default to device NUMA
node with fallback to SOCKET_ID_ANY.
Signed-off-by: Maayan Kashani <mkashani@nvidia.com>
Acked-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
---
drivers/common/mlx5/mlx5_common_devx.c | 8 ++---
drivers/common/mlx5/mlx5_devx_cmds.c | 4 +--
drivers/common/mlx5/mlx5_malloc.c | 21 +++++++++---
drivers/common/mlx5/mlx5_malloc.h | 20 +++++++++++
drivers/net/mlx5/mlx5.c | 4 +--
drivers/net/mlx5/mlx5_devx.c | 6 ++--
drivers/net/mlx5/mlx5_flow_hw.c | 14 ++++----
drivers/net/mlx5/mlx5_rxq.c | 30 +++++++++++++----
drivers/net/mlx5/mlx5_trigger.c | 16 ++++-----
drivers/net/mlx5/mlx5_txq.c | 31 +++++++++++++----
drivers/net/mlx5/mlx5_utils.c | 46 ++++++++++++++++++++------
11 files changed, 147 insertions(+), 53 deletions(-)
diff --git a/drivers/common/mlx5/mlx5_common_devx.c b/drivers/common/mlx5/mlx5_common_devx.c
index cd1292b92ba..aace5283e7d 100644
--- a/drivers/common/mlx5/mlx5_common_devx.c
+++ b/drivers/common/mlx5/mlx5_common_devx.c
@@ -110,8 +110,8 @@ mlx5_devx_cq_create(void *ctx, struct mlx5_devx_cq *cq_obj, uint16_t log_desc_n,
umem_size = sizeof(struct mlx5_cqe) * num_of_cqes;
umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE);
umem_size += MLX5_DBR_SIZE;
- umem_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size,
- alignment, socket);
+ umem_buf = mlx5_malloc_numa_tolerant(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size,
+ alignment, socket);
if (!umem_buf) {
DRV_LOG(ERR, "Failed to allocate memory for CQ.");
rte_errno = ENOMEM;
@@ -484,8 +484,8 @@ mlx5_devx_wq_init(void *ctx, uint32_t wqe_size, uint16_t log_wqbb_n, int socket,
umem_size = wqe_size * (1 << log_wqbb_n);
umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE);
umem_size += MLX5_DBR_SIZE;
- umem_buf = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size,
- alignment, socket);
+ umem_buf = mlx5_malloc_numa_tolerant(MLX5_MEM_RTE | MLX5_MEM_ZERO, umem_size,
+ alignment, socket);
if (!umem_buf) {
DRV_LOG(ERR, "Failed to allocate memory for RQ.");
rte_errno = ENOMEM;
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index 1a3f4580d87..cf601254ab2 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -1537,7 +1537,7 @@ mlx5_devx_cmd_create_rq(void *ctx,
struct mlx5_devx_wq_attr *wq_attr;
struct mlx5_devx_obj *rq = NULL;
- rq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rq), 0, socket);
+ rq = mlx5_malloc_numa_tolerant(MLX5_MEM_ZERO, sizeof(*rq), 0, socket);
if (!rq) {
DRV_LOG(ERR, "Failed to allocate RQ data");
rte_errno = ENOMEM;
@@ -1680,7 +1680,7 @@ mlx5_devx_cmd_create_rmp(void *ctx,
struct mlx5_devx_wq_attr *wq_attr;
struct mlx5_devx_obj *rmp = NULL;
- rmp = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rmp), 0, socket);
+ rmp = mlx5_malloc_numa_tolerant(MLX5_MEM_ZERO, sizeof(*rmp), 0, socket);
if (!rmp) {
DRV_LOG(ERR, "Failed to allocate RMP data");
rte_errno = ENOMEM;
diff --git a/drivers/common/mlx5/mlx5_malloc.c b/drivers/common/mlx5/mlx5_malloc.c
index d56b4fb5a89..28fb19b2852 100644
--- a/drivers/common/mlx5/mlx5_malloc.c
+++ b/drivers/common/mlx5/mlx5_malloc.c
@@ -162,6 +162,13 @@ mlx5_alloc_align(size_t size, unsigned int align, unsigned int zero)
return buf;
}
+static void *
+mlx5_malloc_socket_internal(size_t size, unsigned int align, int socket, bool zero)
+{
+ return zero ? rte_zmalloc_socket(NULL, size, align, socket) :
+ rte_malloc_socket(NULL, size, align, socket);
+}
+
RTE_EXPORT_INTERNAL_SYMBOL(mlx5_malloc)
void *
mlx5_malloc(uint32_t flags, size_t size, unsigned int align, int socket)
@@ -180,10 +187,16 @@ mlx5_malloc(uint32_t flags, size_t size, unsigned int align, int socket)
else
rte_mem = mlx5_sys_mem.enable ? false : true;
if (rte_mem) {
- if (flags & MLX5_MEM_ZERO)
- addr = rte_zmalloc_socket(NULL, size, align, socket);
- else
- addr = rte_malloc_socket(NULL, size, align, socket);
+ addr = mlx5_malloc_socket_internal(size, align, socket, !!(flags & MLX5_MEM_ZERO));
+ if (addr == NULL && socket != SOCKET_ID_ANY && (flags & MLX5_NUMA_TOLERANT)) {
+ size_t alloc_size = size;
+ addr = mlx5_malloc_socket_internal(size, align, SOCKET_ID_ANY,
+ !!(flags & MLX5_MEM_ZERO));
+ if (addr) {
+ DRV_LOG(WARNING, "Allocated %p (size %zu socket %d) through NUMA tolerant fallback",
+ (addr), (alloc_size), (socket));
+ }
+ }
mlx5_mem_update_msl(addr);
#ifdef RTE_LIBRTE_MLX5_DEBUG
if (addr)
diff --git a/drivers/common/mlx5/mlx5_malloc.h b/drivers/common/mlx5/mlx5_malloc.h
index 9086a4f3f22..1be3885c9cf 100644
--- a/drivers/common/mlx5/mlx5_malloc.h
+++ b/drivers/common/mlx5/mlx5_malloc.h
@@ -28,6 +28,8 @@ enum mlx5_mem_flags {
/* Memory should be allocated from rte hugepage. */
MLX5_MEM_ZERO = 1 << 2,
/* Memory should be cleared to zero. */
+ MLX5_NUMA_TOLERANT = 1 << 3,
+ /* Fallback to any NUMA if the memory allocation fails. */
};
/**
@@ -101,6 +103,24 @@ void *mlx5_realloc(void *addr, uint32_t flags, size_t size, unsigned int align,
__rte_internal
void mlx5_free(void *addr);
+#if defined(RTE_TOOLCHAIN_GCC) || defined(RTE_TOOLCHAIN_CLANG)
+#define mlx5_malloc_numa_tolerant(flags, size, align, socket) (__extension__ ({ \
+ void *mem = mlx5_malloc((uint32_t)(flags), (size_t)(size), (align), (socket)); \
+ if ((mem == NULL) && ((int)(socket) != SOCKET_ID_ANY)) { \
+ mem = mlx5_malloc((uint32_t)(flags), (size_t)(size), \
+ (align), SOCKET_ID_ANY); \
+ if (mem) { \
+ DRV_LOG(WARNING, \
+ "Allocated %p (size %zu socket %d) through NUMA tolerant fallback",\
+ (mem), ((size_t)(size)), (socket)); \
+ } \
+ } \
+ mem; \
+ }))
+#else
+#define mlx5_malloc_numa_tolerant(flags, size, align, socket)
+ (mlx5_malloc((flags) | MLX5_NUMA_TOLERANT, (size), (align), (socket)));
+#endif
#ifdef __cplusplus
}
#endif
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index b4bd43aae25..29700524458 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -2271,8 +2271,8 @@ mlx5_proc_priv_init(struct rte_eth_dev *dev)
*/
ppriv_size = sizeof(struct mlx5_proc_priv) +
priv->txqs_n * sizeof(struct mlx5_uar_data);
- ppriv = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, ppriv_size,
- RTE_CACHE_LINE_SIZE, dev->device->numa_node);
+ ppriv = mlx5_malloc_numa_tolerant(MLX5_MEM_RTE | MLX5_MEM_ZERO, ppriv_size,
+ RTE_CACHE_LINE_SIZE, dev->device->numa_node);
if (!ppriv) {
rte_errno = ENOMEM;
return -rte_errno;
diff --git a/drivers/net/mlx5/mlx5_devx.c b/drivers/net/mlx5/mlx5_devx.c
index ab0de7eb0ca..3d49e096ef3 100644
--- a/drivers/net/mlx5/mlx5_devx.c
+++ b/drivers/net/mlx5/mlx5_devx.c
@@ -1097,14 +1097,14 @@ mlx5_rxq_devx_obj_drop_create(struct rte_eth_dev *dev)
* They are required to hold pointers for cleanup
* and are only accessible via drop queue DevX objects.
*/
- rxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq), 0, socket_id);
+ rxq = mlx5_malloc_numa_tolerant(MLX5_MEM_ZERO, sizeof(*rxq), 0, socket_id);
if (rxq == NULL) {
DRV_LOG(ERR, "Port %u could not allocate drop queue private",
dev->data->port_id);
rte_errno = ENOMEM;
goto error;
}
- rxq_ctrl = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq_ctrl),
+ rxq_ctrl = mlx5_malloc_numa_tolerant(MLX5_MEM_ZERO, sizeof(*rxq_ctrl),
0, socket_id);
if (rxq_ctrl == NULL) {
DRV_LOG(ERR, "Port %u could not allocate drop queue control",
@@ -1112,7 +1112,7 @@ mlx5_rxq_devx_obj_drop_create(struct rte_eth_dev *dev)
rte_errno = ENOMEM;
goto error;
}
- rxq_obj = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq_obj), 0, socket_id);
+ rxq_obj = mlx5_malloc_numa_tolerant(MLX5_MEM_ZERO, sizeof(*rxq_obj), 0, socket_id);
if (rxq_obj == NULL) {
DRV_LOG(ERR, "Port %u could not allocate drop queue object",
dev->data->port_id);
diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index af2e7a84a54..f7f85effbac 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -5070,7 +5070,7 @@ flow_hw_table_create(struct rte_eth_dev *dev,
tbl_mem_size = sizeof(*tbl);
tbl_mem_size += nb_action_templates * priv->nb_queue * sizeof(tbl->rule_acts[0]);
/* Allocate the table memory. */
- tbl = mlx5_malloc(MLX5_MEM_ZERO, tbl_mem_size, RTE_CACHE_LINE_SIZE, rte_socket_id());
+ tbl = mlx5_malloc(MLX5_MEM_ZERO, tbl_mem_size, RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
if (!tbl)
goto error;
tbl->cfg = *table_cfg;
@@ -5080,7 +5080,7 @@ flow_hw_table_create(struct rte_eth_dev *dev,
goto error;
/* Allocate table of auxiliary flow rule structs. */
tbl->flow_aux = mlx5_malloc(MLX5_MEM_ZERO, sizeof(struct rte_flow_hw_aux) * nb_flows,
- RTE_CACHE_LINE_SIZE, rte_dev_numa_node(dev->device));
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
if (!tbl->flow_aux)
goto error;
/* Register the flow group. */
@@ -8033,7 +8033,7 @@ __flow_hw_actions_template_create(struct rte_eth_dev *dev,
return NULL;
len += RTE_ALIGN(orig_act_len, 16);
at = mlx5_malloc(MLX5_MEM_ZERO, len + sizeof(*at),
- RTE_CACHE_LINE_SIZE, rte_socket_id());
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
if (!at) {
rte_flow_error_set(error, ENOMEM,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
@@ -8201,7 +8201,7 @@ flow_hw_prepend_item(const struct rte_flow_item *items,
/* Allocate new array of items. */
size = sizeof(*copied_items) * (nb_items + 1);
- copied_items = mlx5_malloc(MLX5_MEM_ZERO, size, 0, rte_socket_id());
+ copied_items = mlx5_malloc(MLX5_MEM_ZERO, size, 0, SOCKET_ID_ANY);
if (!copied_items) {
rte_flow_error_set(error, ENOMEM,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
@@ -9018,7 +9018,7 @@ flow_hw_pattern_template_create(struct rte_eth_dev *dev,
tmpl_items = items;
}
setup_pattern_template:
- it = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*it), 0, rte_socket_id());
+ it = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*it), 0, SOCKET_ID_ANY);
if (!it) {
rte_flow_error_set(error, ENOMEM,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
@@ -9038,7 +9038,7 @@ flow_hw_pattern_template_create(struct rte_eth_dev *dev,
goto error;
}
it_items_size = RTE_ALIGN(it_items_size, 16);
- it->items = mlx5_malloc(MLX5_MEM_ZERO, it_items_size, 0, rte_dev_numa_node(dev->device));
+ it->items = mlx5_malloc(MLX5_MEM_ZERO, it_items_size, 0, SOCKET_ID_ANY);
if (it->items == NULL) {
rte_flow_error_set(error, ENOMEM,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
@@ -11443,7 +11443,7 @@ flow_hw_create_ctrl_rx_tables(struct rte_eth_dev *dev)
MLX5_ASSERT(!priv->hw_ctrl_rx);
priv->hw_ctrl_rx = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*priv->hw_ctrl_rx),
- RTE_CACHE_LINE_SIZE, rte_socket_id());
+ RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
if (!priv->hw_ctrl_rx) {
DRV_LOG(ERR, "Failed to allocate memory for Rx control flow tables");
rte_errno = ENOMEM;
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index f5df451a32f..2e9bcbea4db 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1756,6 +1756,7 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
const struct rte_eth_rxseg_split *rx_seg, uint16_t n_seg,
bool is_extmem)
{
+ int ret;
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_rxq_ctrl *tmpl;
unsigned int mb_len = rte_pktmbuf_data_room_size(rx_seg[0].mp);
@@ -1793,7 +1794,12 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
desc >>= mprq_log_actual_stride_num;
alloc_size += desc * sizeof(struct mlx5_mprq_buf *);
}
- tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, alloc_size, 0, socket);
+ if (socket != (unsigned int)SOCKET_ID_ANY) {
+ tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, alloc_size, 0, socket);
+ } else {
+ tmpl = mlx5_malloc_numa_tolerant(MLX5_MEM_RTE | MLX5_MEM_ZERO, alloc_size, 0,
+ dev->device->numa_node);
+ }
if (!tmpl) {
rte_errno = ENOMEM;
return NULL;
@@ -1882,12 +1888,24 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
goto error;
}
tmpl->is_hairpin = false;
- if (mlx5_mr_ctrl_init(&tmpl->rxq.mr_ctrl,
- &priv->sh->cdev->mr_scache.dev_gen, socket)) {
- /* rte_errno is already set. */
- goto error;
+ if (socket != (unsigned int)SOCKET_ID_ANY) {
+ if (mlx5_mr_ctrl_init(&tmpl->rxq.mr_ctrl,
+ &priv->sh->cdev->mr_scache.dev_gen, socket))
+ /* rte_errno is already set. */
+ goto error;
+ } else {
+ ret = mlx5_mr_ctrl_init(&tmpl->rxq.mr_ctrl,
+ &priv->sh->cdev->mr_scache.dev_gen, dev->device->numa_node);
+ if (ret == -ENOMEM) {
+ ret = mlx5_mr_ctrl_init(&tmpl->rxq.mr_ctrl,
+ &priv->sh->cdev->mr_scache.dev_gen, SOCKET_ID_ANY);
+ }
+ if (ret)
+ /* rte_errno is already set. */
+ goto error;
}
- tmpl->socket = socket;
+ tmpl->socket = (socket == (unsigned int)SOCKET_ID_ANY ?
+ (unsigned int)dev->device->numa_node : socket);
if (dev->data->dev_conf.intr_conf.rxq)
tmpl->irq = 1;
if (mprq_en) {
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 644927c19c9..3aa7d01ee2e 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -64,8 +64,8 @@ mlx5_txq_start(struct rte_eth_dev *dev)
if (!txq_ctrl->is_hairpin)
txq_alloc_elts(txq_ctrl);
MLX5_ASSERT(!txq_ctrl->obj);
- txq_ctrl->obj = mlx5_malloc(flags, sizeof(struct mlx5_txq_obj),
- 0, txq_ctrl->socket);
+ txq_ctrl->obj = mlx5_malloc_numa_tolerant(flags, sizeof(struct mlx5_txq_obj),
+ 0, txq_ctrl->socket);
if (!txq_ctrl->obj) {
DRV_LOG(ERR, "Port %u Tx queue %u cannot allocate "
"memory resources.", dev->data->port_id,
@@ -82,9 +82,9 @@ mlx5_txq_start(struct rte_eth_dev *dev)
if (!txq_ctrl->is_hairpin) {
size_t size = txq_data->cqe_s * sizeof(*txq_data->fcqs);
- txq_data->fcqs = mlx5_malloc(flags, size,
- RTE_CACHE_LINE_SIZE,
- txq_ctrl->socket);
+ txq_data->fcqs = mlx5_malloc_numa_tolerant(flags, size,
+ RTE_CACHE_LINE_SIZE,
+ txq_ctrl->socket);
if (!txq_data->fcqs) {
DRV_LOG(ERR, "Port %u Tx queue %u cannot "
"allocate memory (FCQ).",
@@ -182,9 +182,9 @@ mlx5_rxq_ctrl_prepare(struct rte_eth_dev *dev, struct mlx5_rxq_ctrl *rxq_ctrl,
return ret;
}
MLX5_ASSERT(!rxq_ctrl->obj);
- rxq_ctrl->obj = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
- sizeof(*rxq_ctrl->obj), 0,
- rxq_ctrl->socket);
+ rxq_ctrl->obj = mlx5_malloc_numa_tolerant(MLX5_MEM_RTE | MLX5_MEM_ZERO,
+ sizeof(*rxq_ctrl->obj), 0,
+ rxq_ctrl->socket);
if (!rxq_ctrl->obj) {
DRV_LOG(ERR, "Port %u Rx queue %u can't allocate resources.",
dev->data->port_id, idx);
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 5fee5bc4e87..5db12f5c645 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -1052,26 +1052,45 @@ struct mlx5_txq_ctrl *
mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
unsigned int socket, const struct rte_eth_txconf *conf)
{
+ int ret;
struct mlx5_priv *priv = dev->data->dev_private;
struct mlx5_txq_ctrl *tmpl;
uint16_t max_wqe;
- tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl) +
+ if (socket != (unsigned int)SOCKET_ID_ANY) {
+ tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl) +
desc * sizeof(struct rte_mbuf *), 0, socket);
+ } else {
+ tmpl = mlx5_malloc_numa_tolerant(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl) +
+ desc * sizeof(struct rte_mbuf *), 0,
+ dev->device->numa_node);
+ }
if (!tmpl) {
rte_errno = ENOMEM;
return NULL;
}
- if (mlx5_mr_ctrl_init(&tmpl->txq.mr_ctrl,
- &priv->sh->cdev->mr_scache.dev_gen, socket)) {
- /* rte_errno is already set. */
- goto error;
+ if (socket != (unsigned int)SOCKET_ID_ANY) {
+ if (mlx5_mr_ctrl_init(&tmpl->txq.mr_ctrl,
+ &priv->sh->cdev->mr_scache.dev_gen, socket))
+ /* rte_errno is already set. */
+ goto error;
+ } else {
+ ret = mlx5_mr_ctrl_init(&tmpl->txq.mr_ctrl,
+ &priv->sh->cdev->mr_scache.dev_gen, dev->device->numa_node);
+ if (ret == -ENOMEM) {
+ ret = mlx5_mr_ctrl_init(&tmpl->txq.mr_ctrl,
+ &priv->sh->cdev->mr_scache.dev_gen, SOCKET_ID_ANY);
+ }
+ if (ret)
+ /* rte_errno is already set. */
+ goto error;
}
MLX5_ASSERT(desc > MLX5_TX_COMP_THRESH);
tmpl->txq.offloads = conf->offloads |
dev->data->dev_conf.txmode.offloads;
tmpl->priv = priv;
- tmpl->socket = socket;
+ tmpl->socket = (socket == (unsigned int)SOCKET_ID_ANY ?
+ (unsigned int)dev->device->numa_node : socket);
tmpl->txq.elts_n = log2above(desc);
tmpl->txq.elts_s = desc;
tmpl->txq.elts_m = desc - 1;
diff --git a/drivers/net/mlx5/mlx5_utils.c b/drivers/net/mlx5/mlx5_utils.c
index 1503cacf387..3352195fa18 100644
--- a/drivers/net/mlx5/mlx5_utils.c
+++ b/drivers/net/mlx5/mlx5_utils.c
@@ -10,6 +10,29 @@
/********************* Indexed pool **********************/
+#if defined(RTE_TOOLCHAIN_GCC) || defined(RTE_TOOLCHAIN_CLANG)
+#define pool_malloc(pool, flags, size, align, socket) (__extension__ ({ \
+ struct mlx5_indexed_pool *p = (struct mlx5_indexed_pool *)(pool); \
+ uint32_t f = (uint32_t)(flags); \
+ size_t s = (size_t)(size); \
+ uint32_t a = (uint32_t)(align); \
+ int so = (int)(socket); \
+ void *mem = p->cfg.malloc(f, s, a, so); \
+ if (mem == NULL && so != SOCKET_ID_ANY) { \
+ mem = p->cfg.malloc(f, s, a, SOCKET_ID_ANY); \
+ if (mem) { \
+ DRV_LOG(WARNING, \
+ "Allocated %p (size %zu socket %d) through NUMA tolerant fallback", \
+ mem, s, so); \
+ } \
+ } \
+ mem; \
+}))
+#else
+#define pool_malloc(pool, flags, size, align, socket)
+ (pool)->cfg.malloc((uint32_t)(flags) | NUMA_TOLERANT, (size), (align), (socket));
+#endif
+
int mlx5_logtype_ipool;
/* Initialize driver log type. */
@@ -149,7 +172,7 @@ mlx5_ipool_grow(struct mlx5_indexed_pool *pool)
int n_grow = pool->n_trunk_valid ? pool->n_trunk :
RTE_CACHE_LINE_SIZE / sizeof(void *);
- p = pool->cfg.malloc(0, (pool->n_trunk_valid + n_grow) *
+ p = pool_malloc(pool, MLX5_MEM_ZERO, (pool->n_trunk_valid + n_grow) *
sizeof(struct mlx5_indexed_trunk *),
RTE_CACHE_LINE_SIZE, rte_socket_id());
if (!p)
@@ -179,7 +202,7 @@ mlx5_ipool_grow(struct mlx5_indexed_pool *pool)
/* rte_bitmap requires memory cacheline aligned. */
trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
trunk_size += bmp_size;
- trunk = pool->cfg.malloc(0, trunk_size,
+ trunk = pool_malloc(pool, MLX5_MEM_ZERO, trunk_size,
RTE_CACHE_LINE_SIZE, rte_socket_id());
if (!trunk)
return -ENOMEM;
@@ -253,9 +276,10 @@ mlx5_ipool_grow_bmp(struct mlx5_indexed_pool *pool, uint32_t new_size)
pool->cache_validator.bmp_size = new_size;
bmp_mem_size = rte_bitmap_get_memory_footprint(new_size);
- pool->cache_validator.bmp_mem = pool->cfg.malloc(MLX5_MEM_ZERO, bmp_mem_size,
- RTE_CACHE_LINE_SIZE,
- rte_socket_id());
+ pool->cache_validator.bmp_mem = pool_malloc(pool, MLX5_MEM_ZERO,
+ bmp_mem_size,
+ RTE_CACHE_LINE_SIZE,
+ rte_socket_id());
if (unlikely(!pool->cache_validator.bmp_mem)) {
DRV_LOG_IPOOL(ERR, "Unable to allocate memory for a new bitmap");
return;
@@ -343,7 +367,7 @@ mlx5_ipool_allocate_from_global(struct mlx5_indexed_pool *pool, int cidx)
RTE_CACHE_LINE_SIZE / sizeof(void *);
cur_max_idx = mlx5_trunk_idx_offset_get(pool, trunk_n + n_grow);
/* Resize the trunk array. */
- p = pool->cfg.malloc(0, ((trunk_idx + n_grow) *
+ p = pool_malloc(pool, MLX5_MEM_ZERO, ((trunk_idx + n_grow) *
sizeof(struct mlx5_indexed_trunk *)) +
(cur_max_idx * sizeof(uint32_t)) + sizeof(*p),
RTE_CACHE_LINE_SIZE, rte_socket_id());
@@ -365,7 +389,7 @@ mlx5_ipool_allocate_from_global(struct mlx5_indexed_pool *pool, int cidx)
trunk_size = sizeof(*trunk);
data_size = mlx5_trunk_size_get(pool, trunk_idx);
trunk_size += RTE_CACHE_LINE_ROUNDUP(data_size * pool->cfg.size);
- trunk = pool->cfg.malloc(0, trunk_size,
+ trunk = pool_malloc(pool, MLX5_MEM_ZERO, trunk_size,
RTE_CACHE_LINE_SIZE, rte_socket_id());
if (unlikely(!trunk)) {
pool->cfg.free(p);
@@ -429,7 +453,7 @@ _mlx5_ipool_get_cache(struct mlx5_indexed_pool *pool, int cidx, uint32_t idx)
MLX5_ASSERT(idx);
if (unlikely(!pool->cache[cidx])) {
- pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
+ pool->cache[cidx] = pool_malloc(pool, MLX5_MEM_ZERO,
sizeof(struct mlx5_ipool_per_lcore) +
(pool->cfg.per_core_cache * sizeof(uint32_t)),
RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
@@ -515,7 +539,7 @@ _mlx5_ipool_malloc_cache(struct mlx5_indexed_pool *pool, int cidx,
uint32_t *idx)
{
if (unlikely(!pool->cache[cidx])) {
- pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
+ pool->cache[cidx] = pool_malloc(pool, MLX5_MEM_ZERO,
sizeof(struct mlx5_ipool_per_lcore) +
(pool->cfg.per_core_cache * sizeof(uint32_t)),
RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
@@ -577,7 +601,7 @@ _mlx5_ipool_free_cache(struct mlx5_indexed_pool *pool, int cidx, uint32_t idx)
* case check if local cache on core B was allocated before.
*/
if (unlikely(!pool->cache[cidx])) {
- pool->cache[cidx] = pool->cfg.malloc(MLX5_MEM_ZERO,
+ pool->cache[cidx] = pool_malloc(pool, MLX5_MEM_ZERO,
sizeof(struct mlx5_ipool_per_lcore) +
(pool->cfg.per_core_cache * sizeof(uint32_t)),
RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
@@ -881,7 +905,7 @@ mlx5_ipool_flush_cache(struct mlx5_indexed_pool *pool)
/* Reset bmp. */
bmp_num = mlx5_trunk_idx_offset_get(pool, gc->n_trunk_valid);
mem_size = rte_bitmap_get_memory_footprint(bmp_num);
- pool->bmp_mem = pool->cfg.malloc(MLX5_MEM_ZERO, mem_size,
+ pool->bmp_mem = pool_malloc(pool, MLX5_MEM_ZERO, mem_size,
RTE_CACHE_LINE_SIZE, rte_socket_id());
if (!pool->bmp_mem) {
DRV_LOG(ERR, "Ipool bitmap mem allocate failed.\n");
--
2.21.0
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2025-06-25 15:01 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-06-19 7:01 [PATCH] net/mlx5: mlx5 malloc NUMA fallback Maayan Kashani
2025-06-25 15:00 ` [PATCH v2] net/mlx5: support NUMA node fallback Maayan Kashani
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).