DPDK patches and discussions
 help / color / mirror / Atom feed
* [RFC] lib: set/get max memzone segments
@ 2023-04-19  8:36 Ophir Munk
  2023-04-19  8:48 ` Ophir Munk
                   ` (4 more replies)
  0 siblings, 5 replies; 35+ messages in thread
From: Ophir Munk @ 2023-04-19  8:36 UTC (permalink / raw)
  To: dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad
  Cc: Ophir Munk, Matan Azrad, Thomas Monjalon, Lior Margalit

In current DPDK the RTE_MAX_MEMZONE definition is unconditionally hard
coded as 2560.  For applications requiring different values of this
parameter – it is more convenient to set the max value via an rte API -
rather than changing the dpdk source code per application.  In many
organizations, the possibility to compile a private DPDK library for a
particular application does not exist at all.  With this option there is
no need to recompile DPDK and it allows using an in-box packaged DPDK.
An example usage for updating the RTE_MAX_MEMZONE would be of an
application that uses the DPDK mempool library which is based on DPDK
memzone library.  The application may need to create a number of
steering tables, each of which will require its own mempool allocation.
This commit is not about how to optimize the application usage of
mempool nor about how to improve the mempool implementation based on
memzone.  It is about how to make the max memzone definition - run-time
customized.
This commit adds an API which must be called before rte_eal_init():
rte_memzone_max_set(int max).  If not called, the default memzone
(RTE_MAX_MEMZONE) is used.  There is also an API to query the effective
max memzone: rte_memzone_max_get().

Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
---
 app/test/test_func_reentrancy.c     |  2 +-
 app/test/test_malloc_perf.c         |  2 +-
 app/test/test_memzone.c             |  2 +-
 config/rte_config.h                 |  1 -
 drivers/net/qede/base/bcm_osal.c    | 26 +++++++++++++++++++++-----
 drivers/net/qede/base/bcm_osal.h    |  3 +++
 drivers/net/qede/qede_main.c        |  7 +++++++
 lib/eal/common/eal_common_memzone.c | 28 +++++++++++++++++++++++++---
 lib/eal/include/rte_memzone.h       | 20 ++++++++++++++++++++
 lib/eal/version.map                 |  4 ++++
 10 files changed, 83 insertions(+), 12 deletions(-)

diff --git a/app/test/test_func_reentrancy.c b/app/test/test_func_reentrancy.c
index d1ed5d4..ae9de6f 100644
--- a/app/test/test_func_reentrancy.c
+++ b/app/test/test_func_reentrancy.c
@@ -51,7 +51,7 @@ typedef void (*case_clean_t)(unsigned lcore_id);
 #define MEMPOOL_ELT_SIZE                    (sizeof(uint32_t))
 #define MEMPOOL_SIZE                        (4)
 
-#define MAX_LCORES	(RTE_MAX_MEMZONE / (MAX_ITER_MULTI * 4U))
+#define MAX_LCORES	(rte_memzone_max_get() / (MAX_ITER_MULTI * 4U))
 
 static uint32_t obj_count;
 static uint32_t synchro;
diff --git a/app/test/test_malloc_perf.c b/app/test/test_malloc_perf.c
index ccec43a..9bd1662 100644
--- a/app/test/test_malloc_perf.c
+++ b/app/test/test_malloc_perf.c
@@ -165,7 +165,7 @@ test_malloc_perf(void)
 		return -1;
 
 	if (test_alloc_perf("rte_memzone_reserve", memzone_alloc, memzone_free,
-			NULL, memset_us_gb, RTE_MAX_MEMZONE - 1) < 0)
+			NULL, memset_us_gb, rte_memzone_max_get() - 1) < 0)
 		return -1;
 
 	return 0;
diff --git a/app/test/test_memzone.c b/app/test/test_memzone.c
index c9255e5..a315826 100644
--- a/app/test/test_memzone.c
+++ b/app/test/test_memzone.c
@@ -871,7 +871,7 @@ test_memzone_bounded(void)
 static int
 test_memzone_free(void)
 {
-	const struct rte_memzone *mz[RTE_MAX_MEMZONE + 1];
+	const struct rte_memzone *mz[rte_memzone_max_get() + 1];
 	int i;
 	char name[20];
 
diff --git a/config/rte_config.h b/config/rte_config.h
index 7b8c85e..400e44e 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -34,7 +34,6 @@
 #define RTE_MAX_MEM_MB_PER_LIST 32768
 #define RTE_MAX_MEMSEG_PER_TYPE 32768
 #define RTE_MAX_MEM_MB_PER_TYPE 65536
-#define RTE_MAX_MEMZONE 2560
 #define RTE_MAX_TAILQ 32
 #define RTE_LOG_DP_LEVEL RTE_LOG_INFO
 #define RTE_MAX_VFIO_CONTAINERS 64
diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
index 2c59397..f195f2c 100644
--- a/drivers/net/qede/base/bcm_osal.c
+++ b/drivers/net/qede/base/bcm_osal.c
@@ -47,10 +47,26 @@ void osal_poll_mode_dpc(osal_int_ptr_t hwfn_cookie)
 }
 
 /* Array of memzone pointers */
-static const struct rte_memzone *ecore_mz_mapping[RTE_MAX_MEMZONE];
+static const struct rte_memzone **ecore_mz_mapping;
 /* Counter to track current memzone allocated */
 static uint16_t ecore_mz_count;
 
+int ecore_mz_mapping_alloc(void)
+{
+	ecore_mz_mapping = rte_malloc("ecore_mz_map", 0,
+		rte_memzone_max_get() * sizeof(struct rte_memzone *));
+
+	if (!ecore_mz_mapping)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void ecore_mz_mapping_free(void)
+{
+	rte_free(ecore_mz_mapping);
+}
+
 unsigned long qede_log2_align(unsigned long n)
 {
 	unsigned long ret = n ? 1 : 0;
@@ -132,9 +148,9 @@ void *osal_dma_alloc_coherent(struct ecore_dev *p_dev,
 	uint32_t core_id = rte_lcore_id();
 	unsigned int socket_id;
 
-	if (ecore_mz_count >= RTE_MAX_MEMZONE) {
+	if (ecore_mz_count >= rte_memzone_max_get()) {
 		DP_ERR(p_dev, "Memzone allocation count exceeds %u\n",
-		       RTE_MAX_MEMZONE);
+		       rte_memzone_max_get());
 		*phys = 0;
 		return OSAL_NULL;
 	}
@@ -171,9 +187,9 @@ void *osal_dma_alloc_coherent_aligned(struct ecore_dev *p_dev,
 	uint32_t core_id = rte_lcore_id();
 	unsigned int socket_id;
 
-	if (ecore_mz_count >= RTE_MAX_MEMZONE) {
+	if (ecore_mz_count >= rte_memzone_max_get()) {
 		DP_ERR(p_dev, "Memzone allocation count exceeds %u\n",
-		       RTE_MAX_MEMZONE);
+		       rte_memzone_max_get());
 		*phys = 0;
 		return OSAL_NULL;
 	}
diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 67e7f75..97e261d 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -477,4 +477,7 @@ enum dbg_status	qed_dbg_alloc_user_data(struct ecore_hwfn *p_hwfn,
 	qed_dbg_alloc_user_data(p_hwfn, user_data_ptr)
 #define OSAL_DB_REC_OCCURRED(p_hwfn) nothing
 
+int ecore_mz_mapping_alloc(void);
+void ecore_mz_mapping_free(void);
+
 #endif /* __BCM_OSAL_H */
diff --git a/drivers/net/qede/qede_main.c b/drivers/net/qede/qede_main.c
index 0303903..f116e86 100644
--- a/drivers/net/qede/qede_main.c
+++ b/drivers/net/qede/qede_main.c
@@ -78,6 +78,12 @@ qed_probe(struct ecore_dev *edev, struct rte_pci_device *pci_dev,
 		return rc;
 	}
 
+	rc = ecore_mz_mapping_alloc();
+	if (rc) {
+		DP_ERR(edev, "mem zones array allocation failed\n");
+		return rc;
+	}
+
 	return rc;
 }
 
@@ -721,6 +727,7 @@ static void qed_remove(struct ecore_dev *edev)
 	if (!edev)
 		return;
 
+	ecore_mz_mapping_free();
 	ecore_hw_remove(edev);
 }
 
diff --git a/lib/eal/common/eal_common_memzone.c b/lib/eal/common/eal_common_memzone.c
index a9cd91f..6c43b7f 100644
--- a/lib/eal/common/eal_common_memzone.c
+++ b/lib/eal/common/eal_common_memzone.c
@@ -22,6 +22,10 @@
 #include "eal_private.h"
 #include "eal_memcfg.h"
 
+#define RTE_DEFAULT_MAX_MEMZONE 2560
+
+static uint32_t memzone_max = RTE_DEFAULT_MAX_MEMZONE;
+
 static inline const struct rte_memzone *
 memzone_lookup_thread_unsafe(const char *name)
 {
@@ -81,8 +85,9 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 	/* no more room in config */
 	if (arr->count >= arr->len) {
 		RTE_LOG(ERR, EAL,
-		"%s(): Number of requested memzone segments exceeds RTE_MAX_MEMZONE\n",
-			__func__);
+		"%s(): Number of requested memzone segments exceeds max "
+		"memzone segments (%d >= %d)\n",
+			__func__, arr->count, arr->len);
 		rte_errno = ENOSPC;
 		return NULL;
 	}
@@ -396,7 +401,7 @@ rte_eal_memzone_init(void)
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
 			rte_fbarray_init(&mcfg->memzones, "memzone",
-			RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) {
+			rte_memzone_max_get(), sizeof(struct rte_memzone))) {
 		RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n");
 		ret = -1;
 	} else if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
@@ -430,3 +435,20 @@ void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
 	}
 	rte_rwlock_read_unlock(&mcfg->mlock);
 }
+
+int
+rte_memzone_max_set(uint32_t max)
+{
+	/* Setting max memzone must occur befaore calling rte_eal_init() */
+	if (eal_get_internal_configuration()->init_complete > 0)
+		return -1;
+
+	memzone_max = max;
+	return 0;
+}
+
+uint32_t
+rte_memzone_max_get(void)
+{
+	return memzone_max;
+}
diff --git a/lib/eal/include/rte_memzone.h b/lib/eal/include/rte_memzone.h
index 5302caa..ca60409 100644
--- a/lib/eal/include/rte_memzone.h
+++ b/lib/eal/include/rte_memzone.h
@@ -305,6 +305,26 @@ void rte_memzone_dump(FILE *f);
 void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *arg),
 		      void *arg);
 
+/**
+ * Set max memzone value
+ *
+ * @param max
+ *   Value of max memzone allocations
+ * @return
+ *  0 on success, -1 otherwise
+ */
+__rte_experimental
+int rte_memzone_max_set(uint32_t max);
+
+/**
+ * Get max memzone value
+ *
+ * @return
+ *   Value of max memzone allocations
+ */
+__rte_experimental
+uint32_t rte_memzone_max_get(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/version.map b/lib/eal/version.map
index 6d6978f..717c5b2 100644
--- a/lib/eal/version.map
+++ b/lib/eal/version.map
@@ -430,6 +430,10 @@ EXPERIMENTAL {
 	rte_thread_create_control;
 	rte_thread_set_name;
 	__rte_eal_trace_generic_blob;
+
+	# added in 23.07
+	rte_memzone_max_set;
+	rte_memzone_max_get;
 };
 
 INTERNAL {
-- 
2.8.4


^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [RFC] lib: set/get max memzone segments
  2023-04-19  8:36 [RFC] lib: set/get max memzone segments Ophir Munk
@ 2023-04-19  8:48 ` Ophir Munk
  2023-04-19 13:42 ` [EXT] " Devendra Singh Rawat
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 35+ messages in thread
From: Ophir Munk @ 2023-04-19  8:48 UTC (permalink / raw)
  To: dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad
  Cc: Matan Azrad, NBU-Contact-Thomas Monjalon (EXTERNAL), Lior Margalit

Devendra Singh Rawat, Alok Prasad - can you please give your feedback on the qede driver updates?

> -----Original Message-----
> In current DPDK the RTE_MAX_MEMZONE definition is unconditionally hard
> coded as 2560.  For applications requiring different values of this parameter
> – it is more convenient to set the max value via an rte API - rather than
> changing the dpdk source code per application.  In many organizations, the
> possibility to compile a private DPDK library for a particular application does
> not exist at all.  With this option there is no need to recompile DPDK and it
> allows using an in-box packaged DPDK.
> An example usage for updating the RTE_MAX_MEMZONE would be of an
> application that uses the DPDK mempool library which is based on DPDK
> memzone library.  The application may need to create a number of steering
> tables, each of which will require its own mempool allocation.
> This commit is not about how to optimize the application usage of mempool
> nor about how to improve the mempool implementation based on
> memzone.  It is about how to make the max memzone definition - run-time
> customized.
> This commit adds an API which must be called before rte_eal_init():
> rte_memzone_max_set(int max).  If not called, the default memzone
> (RTE_MAX_MEMZONE) is used.  There is also an API to query the effective
> max memzone: rte_memzone_max_get().
> 
> Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
> ---
>  app/test/test_func_reentrancy.c     |  2 +-
>  app/test/test_malloc_perf.c         |  2 +-
>  app/test/test_memzone.c             |  2 +-
>  config/rte_config.h                 |  1 -
>  drivers/net/qede/base/bcm_osal.c    | 26 +++++++++++++++++++++-----
>  drivers/net/qede/base/bcm_osal.h    |  3 +++
>  drivers/net/qede/qede_main.c        |  7 +++++++
>  lib/eal/common/eal_common_memzone.c | 28

^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [EXT] [RFC] lib: set/get max memzone segments
  2023-04-19  8:36 [RFC] lib: set/get max memzone segments Ophir Munk
  2023-04-19  8:48 ` Ophir Munk
@ 2023-04-19 13:42 ` Devendra Singh Rawat
  2023-04-24 21:07   ` Ophir Munk
  2023-04-19 14:42 ` Stephen Hemminger
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 35+ messages in thread
From: Devendra Singh Rawat @ 2023-04-19 13:42 UTC (permalink / raw)
  To: Ophir Munk, dev, Bruce Richardson, Alok Prasad
  Cc: Ophir Munk, Matan Azrad, Thomas Monjalon, Lior Margalit


>diff --git a/drivers/net/qede/base/bcm_osal.c
>b/drivers/net/qede/base/bcm_osal.c
>index 2c59397..f195f2c 100644
>--- a/drivers/net/qede/base/bcm_osal.c
>+++ b/drivers/net/qede/base/bcm_osal.c
>@@ -47,10 +47,26 @@ void osal_poll_mode_dpc(osal_int_ptr_t
>hwfn_cookie)  }
>
> /* Array of memzone pointers */
>-static const struct rte_memzone
>*ecore_mz_mapping[RTE_MAX_MEMZONE];
>+static const struct rte_memzone **ecore_mz_mapping;
> /* Counter to track current memzone allocated */  static uint16_t
>ecore_mz_count;
>
>+int ecore_mz_mapping_alloc(void)
>+{
>+	ecore_mz_mapping = rte_malloc("ecore_mz_map", 0,
>+		rte_memzone_max_get() * sizeof(struct rte_memzone *));

Second parameter of rte_malloc() should be size and Third parameter should be alignment 0 in this case.

Check 
https://doc.dpdk.org/api/rte__malloc_8h.html#a247c99e8d36300c52729c9ee58c2b489

>diff --git a/drivers/net/qede/base/bcm_osal.h
>b/drivers/net/qede/base/bcm_osal.h
>index 67e7f75..97e261d 100644
>--- a/drivers/net/qede/base/bcm_osal.h
>+++ b/drivers/net/qede/base/bcm_osal.h
>@@ -477,4 +477,7 @@ enum dbg_status
>	qed_dbg_alloc_user_data(struct ecore_hwfn *p_hwfn,
> 	qed_dbg_alloc_user_data(p_hwfn, user_data_ptr)  #define
>OSAL_DB_REC_OCCURRED(p_hwfn) nothing
>
>+int ecore_mz_mapping_alloc(void);
>+void ecore_mz_mapping_free(void);
>+
> #endif /* __BCM_OSAL_H */
>diff --git a/drivers/net/qede/qede_main.c b/drivers/net/qede/qede_main.c
>index 0303903..f116e86 100644
>--- a/drivers/net/qede/qede_main.c
>+++ b/drivers/net/qede/qede_main.c
>@@ -78,6 +78,12 @@ qed_probe(struct ecore_dev *edev, struct
>rte_pci_device *pci_dev,
> 		return rc;
> 	}
>
>+	rc = ecore_mz_mapping_alloc();

ecore_mz_mapping_alloc() should be called prior to calling ecore_hw_prepare().

>+	if (rc) {
>+		DP_ERR(edev, "mem zones array allocation failed\n");
>+		return rc;
>+	}
>+
> 	return rc;
> }
>
>@@ -721,6 +727,7 @@ static void qed_remove(struct ecore_dev *edev)
> 	if (!edev)
> 		return;
>
>+	ecore_mz_mapping_free();
> 	ecore_hw_remove(edev);
> }

ecore_mz_mapping_free() should be called after ecore_hw_remove();


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [RFC] lib: set/get max memzone segments
  2023-04-19  8:36 [RFC] lib: set/get max memzone segments Ophir Munk
  2023-04-19  8:48 ` Ophir Munk
  2023-04-19 13:42 ` [EXT] " Devendra Singh Rawat
@ 2023-04-19 14:42 ` Stephen Hemminger
  2023-04-24 21:43   ` Ophir Munk
  2023-04-19 14:51 ` Tyler Retzlaff
  2023-04-25 16:40 ` [RFC V2] " Ophir Munk
  4 siblings, 1 reply; 35+ messages in thread
From: Stephen Hemminger @ 2023-04-19 14:42 UTC (permalink / raw)
  To: Ophir Munk
  Cc: dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad,
	Ophir Munk, Matan Azrad, Thomas Monjalon, Lior Margalit

On Wed, 19 Apr 2023 11:36:34 +0300
Ophir Munk <ophirmu@nvidia.com> wrote:

> +int ecore_mz_mapping_alloc(void)
> +{
> +	ecore_mz_mapping = rte_malloc("ecore_mz_map", 0,
> +		rte_memzone_max_get() * sizeof(struct rte_memzone *));

Why not use rte_calloc(), and devices should be using NUMA aware
allocation to put the memzone on same NUMA node as the PCI device.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [RFC] lib: set/get max memzone segments
  2023-04-19  8:36 [RFC] lib: set/get max memzone segments Ophir Munk
                   ` (2 preceding siblings ...)
  2023-04-19 14:42 ` Stephen Hemminger
@ 2023-04-19 14:51 ` Tyler Retzlaff
  2023-04-20  7:43   ` Thomas Monjalon
  2023-04-25 13:46   ` Ophir Munk
  2023-04-25 16:40 ` [RFC V2] " Ophir Munk
  4 siblings, 2 replies; 35+ messages in thread
From: Tyler Retzlaff @ 2023-04-19 14:51 UTC (permalink / raw)
  To: Ophir Munk
  Cc: dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad,
	Ophir Munk, Matan Azrad, Thomas Monjalon, Lior Margalit

On Wed, Apr 19, 2023 at 11:36:34AM +0300, Ophir Munk wrote:
> In current DPDK the RTE_MAX_MEMZONE definition is unconditionally hard
> coded as 2560.  For applications requiring different values of this
> parameter – it is more convenient to set the max value via an rte API -
> rather than changing the dpdk source code per application.  In many
> organizations, the possibility to compile a private DPDK library for a
> particular application does not exist at all.  With this option there is
> no need to recompile DPDK and it allows using an in-box packaged DPDK.
> An example usage for updating the RTE_MAX_MEMZONE would be of an
> application that uses the DPDK mempool library which is based on DPDK
> memzone library.  The application may need to create a number of
> steering tables, each of which will require its own mempool allocation.
> This commit is not about how to optimize the application usage of
> mempool nor about how to improve the mempool implementation based on
> memzone.  It is about how to make the max memzone definition - run-time
> customized.
> This commit adds an API which must be called before rte_eal_init():
> rte_memzone_max_set(int max).  If not called, the default memzone
> (RTE_MAX_MEMZONE) is used.  There is also an API to query the effective
> max memzone: rte_memzone_max_get().
> 
> Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
> ---

the use case of each application may want a different non-hard coded
value makes sense.

it's less clear to me that requiring it be called before eal init makes
sense over just providing it as configuration to eal init so that it is
composed.

can you elaborate further on why you need get if you have a one-shot
set? why would the application not know the value if you can only ever
call it once before init?

>  app/test/test_func_reentrancy.c     |  2 +-
>  app/test/test_malloc_perf.c         |  2 +-
>  app/test/test_memzone.c             |  2 +-
>  config/rte_config.h                 |  1 -
>  drivers/net/qede/base/bcm_osal.c    | 26 +++++++++++++++++++++-----
>  drivers/net/qede/base/bcm_osal.h    |  3 +++
>  drivers/net/qede/qede_main.c        |  7 +++++++
>  lib/eal/common/eal_common_memzone.c | 28 +++++++++++++++++++++++++---
>  lib/eal/include/rte_memzone.h       | 20 ++++++++++++++++++++
>  lib/eal/version.map                 |  4 ++++
>  10 files changed, 83 insertions(+), 12 deletions(-)
> 
> diff --git a/app/test/test_func_reentrancy.c b/app/test/test_func_reentrancy.c
> index d1ed5d4..ae9de6f 100644
> --- a/app/test/test_func_reentrancy.c
> +++ b/app/test/test_func_reentrancy.c
> @@ -51,7 +51,7 @@ typedef void (*case_clean_t)(unsigned lcore_id);
>  #define MEMPOOL_ELT_SIZE                    (sizeof(uint32_t))
>  #define MEMPOOL_SIZE                        (4)
>  
> -#define MAX_LCORES	(RTE_MAX_MEMZONE / (MAX_ITER_MULTI * 4U))
> +#define MAX_LCORES	(rte_memzone_max_get() / (MAX_ITER_MULTI * 4U))
>  
>  static uint32_t obj_count;
>  static uint32_t synchro;
> diff --git a/app/test/test_malloc_perf.c b/app/test/test_malloc_perf.c
> index ccec43a..9bd1662 100644
> --- a/app/test/test_malloc_perf.c
> +++ b/app/test/test_malloc_perf.c
> @@ -165,7 +165,7 @@ test_malloc_perf(void)
>  		return -1;
>  
>  	if (test_alloc_perf("rte_memzone_reserve", memzone_alloc, memzone_free,
> -			NULL, memset_us_gb, RTE_MAX_MEMZONE - 1) < 0)
> +			NULL, memset_us_gb, rte_memzone_max_get() - 1) < 0)
>  		return -1;
>  
>  	return 0;
> diff --git a/app/test/test_memzone.c b/app/test/test_memzone.c
> index c9255e5..a315826 100644
> --- a/app/test/test_memzone.c
> +++ b/app/test/test_memzone.c
> @@ -871,7 +871,7 @@ test_memzone_bounded(void)
>  static int
>  test_memzone_free(void)
>  {
> -	const struct rte_memzone *mz[RTE_MAX_MEMZONE + 1];
> +	const struct rte_memzone *mz[rte_memzone_max_get() + 1];

please no more VLAs even if in tests.

>  	int i;
>  	char name[20];
>  
> diff --git a/config/rte_config.h b/config/rte_config.h
> index 7b8c85e..400e44e 100644
> --- a/config/rte_config.h
> +++ b/config/rte_config.h
> @@ -34,7 +34,6 @@
>  #define RTE_MAX_MEM_MB_PER_LIST 32768
>  #define RTE_MAX_MEMSEG_PER_TYPE 32768
>  #define RTE_MAX_MEM_MB_PER_TYPE 65536
> -#define RTE_MAX_MEMZONE 2560
>  #define RTE_MAX_TAILQ 32
>  #define RTE_LOG_DP_LEVEL RTE_LOG_INFO
>  #define RTE_MAX_VFIO_CONTAINERS 64
> diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
> index 2c59397..f195f2c 100644
> --- a/drivers/net/qede/base/bcm_osal.c
> +++ b/drivers/net/qede/base/bcm_osal.c
> @@ -47,10 +47,26 @@ void osal_poll_mode_dpc(osal_int_ptr_t hwfn_cookie)
>  }
>  
>  /* Array of memzone pointers */
> -static const struct rte_memzone *ecore_mz_mapping[RTE_MAX_MEMZONE];
> +static const struct rte_memzone **ecore_mz_mapping;
>  /* Counter to track current memzone allocated */
>  static uint16_t ecore_mz_count;
>  
> +int ecore_mz_mapping_alloc(void)
> +{
> +	ecore_mz_mapping = rte_malloc("ecore_mz_map", 0,
> +		rte_memzone_max_get() * sizeof(struct rte_memzone *));
> +
> +	if (!ecore_mz_mapping)
> +		return -ENOMEM;
> +
> +	return 0;
> +}
> +
> +void ecore_mz_mapping_free(void)
> +{
> +	rte_free(ecore_mz_mapping);
> +}
> +
>  unsigned long qede_log2_align(unsigned long n)
>  {
>  	unsigned long ret = n ? 1 : 0;
> @@ -132,9 +148,9 @@ void *osal_dma_alloc_coherent(struct ecore_dev *p_dev,
>  	uint32_t core_id = rte_lcore_id();
>  	unsigned int socket_id;
>  
> -	if (ecore_mz_count >= RTE_MAX_MEMZONE) {
> +	if (ecore_mz_count >= rte_memzone_max_get()) {
>  		DP_ERR(p_dev, "Memzone allocation count exceeds %u\n",
> -		       RTE_MAX_MEMZONE);
> +		       rte_memzone_max_get());
>  		*phys = 0;
>  		return OSAL_NULL;
>  	}
> @@ -171,9 +187,9 @@ void *osal_dma_alloc_coherent_aligned(struct ecore_dev *p_dev,
>  	uint32_t core_id = rte_lcore_id();
>  	unsigned int socket_id;
>  
> -	if (ecore_mz_count >= RTE_MAX_MEMZONE) {
> +	if (ecore_mz_count >= rte_memzone_max_get()) {
>  		DP_ERR(p_dev, "Memzone allocation count exceeds %u\n",
> -		       RTE_MAX_MEMZONE);
> +		       rte_memzone_max_get());
>  		*phys = 0;
>  		return OSAL_NULL;
>  	}
> diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
> index 67e7f75..97e261d 100644
> --- a/drivers/net/qede/base/bcm_osal.h
> +++ b/drivers/net/qede/base/bcm_osal.h
> @@ -477,4 +477,7 @@ enum dbg_status	qed_dbg_alloc_user_data(struct ecore_hwfn *p_hwfn,
>  	qed_dbg_alloc_user_data(p_hwfn, user_data_ptr)
>  #define OSAL_DB_REC_OCCURRED(p_hwfn) nothing
>  
> +int ecore_mz_mapping_alloc(void);
> +void ecore_mz_mapping_free(void);
> +
>  #endif /* __BCM_OSAL_H */
> diff --git a/drivers/net/qede/qede_main.c b/drivers/net/qede/qede_main.c
> index 0303903..f116e86 100644
> --- a/drivers/net/qede/qede_main.c
> +++ b/drivers/net/qede/qede_main.c
> @@ -78,6 +78,12 @@ qed_probe(struct ecore_dev *edev, struct rte_pci_device *pci_dev,
>  		return rc;
>  	}
>  
> +	rc = ecore_mz_mapping_alloc();
> +	if (rc) {
> +		DP_ERR(edev, "mem zones array allocation failed\n");
> +		return rc;
> +	}
> +
>  	return rc;
>  }
>  
> @@ -721,6 +727,7 @@ static void qed_remove(struct ecore_dev *edev)
>  	if (!edev)
>  		return;
>  
> +	ecore_mz_mapping_free();
>  	ecore_hw_remove(edev);
>  }
>  
> diff --git a/lib/eal/common/eal_common_memzone.c b/lib/eal/common/eal_common_memzone.c
> index a9cd91f..6c43b7f 100644
> --- a/lib/eal/common/eal_common_memzone.c
> +++ b/lib/eal/common/eal_common_memzone.c
> @@ -22,6 +22,10 @@
>  #include "eal_private.h"
>  #include "eal_memcfg.h"
>  
> +#define RTE_DEFAULT_MAX_MEMZONE 2560
> +
> +static uint32_t memzone_max = RTE_DEFAULT_MAX_MEMZONE;

should be size_t

> +
>  static inline const struct rte_memzone *
>  memzone_lookup_thread_unsafe(const char *name)
>  {
> @@ -81,8 +85,9 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
>  	/* no more room in config */
>  	if (arr->count >= arr->len) {
>  		RTE_LOG(ERR, EAL,
> -		"%s(): Number of requested memzone segments exceeds RTE_MAX_MEMZONE\n",
> -			__func__);
> +		"%s(): Number of requested memzone segments exceeds max "
> +		"memzone segments (%d >= %d)\n",
> +			__func__, arr->count, arr->len);
>  		rte_errno = ENOSPC;
>  		return NULL;
>  	}
> @@ -396,7 +401,7 @@ rte_eal_memzone_init(void)
>  
>  	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
>  			rte_fbarray_init(&mcfg->memzones, "memzone",
> -			RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) {
> +			rte_memzone_max_get(), sizeof(struct rte_memzone))) {
>  		RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n");
>  		ret = -1;
>  	} else if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
> @@ -430,3 +435,20 @@ void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
>  	}
>  	rte_rwlock_read_unlock(&mcfg->mlock);
>  }
> +
> +int
> +rte_memzone_max_set(uint32_t max)

max should be size_t

> +{
> +	/* Setting max memzone must occur befaore calling rte_eal_init() */
> +	if (eal_get_internal_configuration()->init_complete > 0)
> +		return -1;
> +
> +	memzone_max = max;
> +	return 0;
> +}
> +
> +uint32_t
> +rte_memzone_max_get(void)

should return size_t

> +{
> +	return memzone_max;
> +}


> diff --git a/lib/eal/include/rte_memzone.h b/lib/eal/include/rte_memzone.h
> index 5302caa..ca60409 100644
> --- a/lib/eal/include/rte_memzone.h
> +++ b/lib/eal/include/rte_memzone.h
> @@ -305,6 +305,26 @@ void rte_memzone_dump(FILE *f);
>  void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *arg),
>  		      void *arg);
>  
> +/**
> + * Set max memzone value
> + *
> + * @param max
> + *   Value of max memzone allocations
> + * @return
> + *  0 on success, -1 otherwise
> + */
> +__rte_experimental
> +int rte_memzone_max_set(uint32_t max);
> +
> +/**
> + * Get max memzone value
> + *
> + * @return
> + *   Value of max memzone allocations
> + */
> +__rte_experimental
> +uint32_t rte_memzone_max_get(void);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/lib/eal/version.map b/lib/eal/version.map
> index 6d6978f..717c5b2 100644
> --- a/lib/eal/version.map
> +++ b/lib/eal/version.map
> @@ -430,6 +430,10 @@ EXPERIMENTAL {
>  	rte_thread_create_control;
>  	rte_thread_set_name;
>  	__rte_eal_trace_generic_blob;
> +
> +	# added in 23.07
> +	rte_memzone_max_set;
> +	rte_memzone_max_get;
>  };
>  
>  INTERNAL {
> -- 
> 2.8.4

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [RFC] lib: set/get max memzone segments
  2023-04-19 14:51 ` Tyler Retzlaff
@ 2023-04-20  7:43   ` Thomas Monjalon
  2023-04-20 18:20     ` Tyler Retzlaff
  2023-04-25 13:46   ` Ophir Munk
  1 sibling, 1 reply; 35+ messages in thread
From: Thomas Monjalon @ 2023-04-20  7:43 UTC (permalink / raw)
  To: Tyler Retzlaff
  Cc: Ophir Munk, dev, Bruce Richardson, Devendra Singh Rawat,
	Alok Prasad, Matan Azrad, Lior Margalit

19/04/2023 16:51, Tyler Retzlaff:
> On Wed, Apr 19, 2023 at 11:36:34AM +0300, Ophir Munk wrote:
> > In current DPDK the RTE_MAX_MEMZONE definition is unconditionally hard
> > coded as 2560.  For applications requiring different values of this
> > parameter – it is more convenient to set the max value via an rte API -
> > rather than changing the dpdk source code per application.  In many
> > organizations, the possibility to compile a private DPDK library for a
> > particular application does not exist at all.  With this option there is
> > no need to recompile DPDK and it allows using an in-box packaged DPDK.
> > An example usage for updating the RTE_MAX_MEMZONE would be of an
> > application that uses the DPDK mempool library which is based on DPDK
> > memzone library.  The application may need to create a number of
> > steering tables, each of which will require its own mempool allocation.
> > This commit is not about how to optimize the application usage of
> > mempool nor about how to improve the mempool implementation based on
> > memzone.  It is about how to make the max memzone definition - run-time
> > customized.
> > This commit adds an API which must be called before rte_eal_init():
> > rte_memzone_max_set(int max).  If not called, the default memzone
> > (RTE_MAX_MEMZONE) is used.  There is also an API to query the effective
> > max memzone: rte_memzone_max_get().
> > 
> > Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
> > ---
> 
> the use case of each application may want a different non-hard coded
> value makes sense.
> 
> it's less clear to me that requiring it be called before eal init makes
> sense over just providing it as configuration to eal init so that it is
> composed.

Why do you think it would be better as EAL init option?
From an API perspective, I think it is simpler to call a dedicated function.
And I don't think a user wants to deal with it when starting the application.

> can you elaborate further on why you need get if you have a one-shot
> set? why would the application not know the value if you can only ever
> call it once before init?

The "get" function is used in this patch by test and qede driver.
The application could use it as well, especially to query the default value.




^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [RFC] lib: set/get max memzone segments
  2023-04-20  7:43   ` Thomas Monjalon
@ 2023-04-20 18:20     ` Tyler Retzlaff
  2023-04-21  8:34       ` Thomas Monjalon
  0 siblings, 1 reply; 35+ messages in thread
From: Tyler Retzlaff @ 2023-04-20 18:20 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: Ophir Munk, dev, Bruce Richardson, Devendra Singh Rawat,
	Alok Prasad, Matan Azrad, Lior Margalit

On Thu, Apr 20, 2023 at 09:43:28AM +0200, Thomas Monjalon wrote:
> 19/04/2023 16:51, Tyler Retzlaff:
> > On Wed, Apr 19, 2023 at 11:36:34AM +0300, Ophir Munk wrote:
> > > In current DPDK the RTE_MAX_MEMZONE definition is unconditionally hard
> > > coded as 2560.  For applications requiring different values of this
> > > parameter – it is more convenient to set the max value via an rte API -
> > > rather than changing the dpdk source code per application.  In many
> > > organizations, the possibility to compile a private DPDK library for a
> > > particular application does not exist at all.  With this option there is
> > > no need to recompile DPDK and it allows using an in-box packaged DPDK.
> > > An example usage for updating the RTE_MAX_MEMZONE would be of an
> > > application that uses the DPDK mempool library which is based on DPDK
> > > memzone library.  The application may need to create a number of
> > > steering tables, each of which will require its own mempool allocation.
> > > This commit is not about how to optimize the application usage of
> > > mempool nor about how to improve the mempool implementation based on
> > > memzone.  It is about how to make the max memzone definition - run-time
> > > customized.
> > > This commit adds an API which must be called before rte_eal_init():
> > > rte_memzone_max_set(int max).  If not called, the default memzone
> > > (RTE_MAX_MEMZONE) is used.  There is also an API to query the effective
> > > max memzone: rte_memzone_max_get().
> > > 
> > > Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
> > > ---
> > 
> > the use case of each application may want a different non-hard coded
> > value makes sense.
> > 
> > it's less clear to me that requiring it be called before eal init makes
> > sense over just providing it as configuration to eal init so that it is
> > composed.
> 
> Why do you think it would be better as EAL init option?
> From an API perspective, I think it is simpler to call a dedicated function.
> And I don't think a user wants to deal with it when starting the application.

because a dedicated function that can be called detached from the eal
state enables an opportunity for accidental and confusing use outside
the correct context.

i know the above prescribes not to do this but.

now you can call set after eal init, but we protect about calling it
after init by failing. what do we do sensibly with the failure?

> 
> > can you elaborate further on why you need get if you have a one-shot
> > set? why would the application not know the value if you can only ever
> > call it once before init?
> 
> The "get" function is used in this patch by test and qede driver.
> The application could use it as well, especially to query the default value.

this seems incoherent to me, why does the application not know if it has
called set or not? if it called set it knows what the value is, if it didn't
call set it knows what the default is.

anyway, the use case is valid and i would like to see the ability to
change it dynamically i'd prefer not to see an api like this be introduced
as prescribed but that's for you folks to decide.

anyway, i own a lot of apis that operate just like the proposed and
they're great source of support overhead. i prefer not to rely on
documenting a contract when i can enforce the contract and implicit state
machine mechanically with the api instead.

fwiw a nicer pattern for doing this one of framework influencing config
might look something like this.

struct eal_config config;

eal_config_init(&config); // defaults are set entire state made valid
eal_config_set_max_memzone(&config, 1024); // default is overridden

rte_eal_init(&config);

ty

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [RFC] lib: set/get max memzone segments
  2023-04-20 18:20     ` Tyler Retzlaff
@ 2023-04-21  8:34       ` Thomas Monjalon
  2023-04-21 11:08         ` Morten Brørup
  0 siblings, 1 reply; 35+ messages in thread
From: Thomas Monjalon @ 2023-04-21  8:34 UTC (permalink / raw)
  To: Tyler Retzlaff
  Cc: Ophir Munk, dev, Bruce Richardson, Devendra Singh Rawat,
	Alok Prasad, Matan Azrad, Lior Margalit

20/04/2023 20:20, Tyler Retzlaff:
> On Thu, Apr 20, 2023 at 09:43:28AM +0200, Thomas Monjalon wrote:
> > 19/04/2023 16:51, Tyler Retzlaff:
> > > On Wed, Apr 19, 2023 at 11:36:34AM +0300, Ophir Munk wrote:
> > > > In current DPDK the RTE_MAX_MEMZONE definition is unconditionally hard
> > > > coded as 2560.  For applications requiring different values of this
> > > > parameter – it is more convenient to set the max value via an rte API -
> > > > rather than changing the dpdk source code per application.  In many
> > > > organizations, the possibility to compile a private DPDK library for a
> > > > particular application does not exist at all.  With this option there is
> > > > no need to recompile DPDK and it allows using an in-box packaged DPDK.
> > > > An example usage for updating the RTE_MAX_MEMZONE would be of an
> > > > application that uses the DPDK mempool library which is based on DPDK
> > > > memzone library.  The application may need to create a number of
> > > > steering tables, each of which will require its own mempool allocation.
> > > > This commit is not about how to optimize the application usage of
> > > > mempool nor about how to improve the mempool implementation based on
> > > > memzone.  It is about how to make the max memzone definition - run-time
> > > > customized.
> > > > This commit adds an API which must be called before rte_eal_init():
> > > > rte_memzone_max_set(int max).  If not called, the default memzone
> > > > (RTE_MAX_MEMZONE) is used.  There is also an API to query the effective
> > > > max memzone: rte_memzone_max_get().
> > > > 
> > > > Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
> > > > ---
> > > 
> > > the use case of each application may want a different non-hard coded
> > > value makes sense.
> > > 
> > > it's less clear to me that requiring it be called before eal init makes
> > > sense over just providing it as configuration to eal init so that it is
> > > composed.
> > 
> > Why do you think it would be better as EAL init option?
> > From an API perspective, I think it is simpler to call a dedicated function.
> > And I don't think a user wants to deal with it when starting the application.
> 
> because a dedicated function that can be called detached from the eal
> state enables an opportunity for accidental and confusing use outside
> the correct context.
> 
> i know the above prescribes not to do this but.
> 
> now you can call set after eal init, but we protect about calling it
> after init by failing. what do we do sensibly with the failure?

It would be a developer mistake which could be fix during development stage
very easily. I don't see a problem here.

> > > can you elaborate further on why you need get if you have a one-shot
> > > set? why would the application not know the value if you can only ever
> > > call it once before init?
> > 
> > The "get" function is used in this patch by test and qede driver.
> > The application could use it as well, especially to query the default value.
> 
> this seems incoherent to me, why does the application not know if it has
> called set or not? if it called set it knows what the value is, if it didn't
> call set it knows what the default is.

No the application doesn't know the default, it is an internal value.

> anyway, the use case is valid and i would like to see the ability to
> change it dynamically i'd prefer not to see an api like this be introduced
> as prescribed but that's for you folks to decide.
> 
> anyway, i own a lot of apis that operate just like the proposed and
> they're great source of support overhead. i prefer not to rely on
> documenting a contract when i can enforce the contract and implicit state
> machine mechanically with the api instead.
> 
> fwiw a nicer pattern for doing this one of framework influencing config
> might look something like this.
> 
> struct eal_config config;
> 
> eal_config_init(&config); // defaults are set entire state made valid
> eal_config_set_max_memzone(&config, 1024); // default is overridden
> 
> rte_eal_init(&config);

In general, we discovered that functions doing too much are bad
for usability and for ABI stability.
In the function eal_config_init() that you propose,
any change in the struct eal_config will be an ABI breakage.



^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [RFC] lib: set/get max memzone segments
  2023-04-21  8:34       ` Thomas Monjalon
@ 2023-04-21 11:08         ` Morten Brørup
  2023-04-21 14:57           ` Thomas Monjalon
  0 siblings, 1 reply; 35+ messages in thread
From: Morten Brørup @ 2023-04-21 11:08 UTC (permalink / raw)
  To: Ophir Munk, Thomas Monjalon
  Cc: dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad,
	Matan Azrad, Lior Margalit, Tyler Retzlaff

> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> Sent: Friday, 21 April 2023 10.35
> 
> 20/04/2023 20:20, Tyler Retzlaff:
> > On Thu, Apr 20, 2023 at 09:43:28AM +0200, Thomas Monjalon wrote:
> > > 19/04/2023 16:51, Tyler Retzlaff:
> > > > On Wed, Apr 19, 2023 at 11:36:34AM +0300, Ophir Munk wrote:
> > > > > In current DPDK the RTE_MAX_MEMZONE definition is unconditionally hard
> > > > > coded as 2560.  For applications requiring different values of this
> > > > > parameter – it is more convenient to set the max value via an rte API
> -
> > > > > rather than changing the dpdk source code per application.  In many
> > > > > organizations, the possibility to compile a private DPDK library for a
> > > > > particular application does not exist at all.  With this option there
> is
> > > > > no need to recompile DPDK and it allows using an in-box packaged DPDK.
> > > > > An example usage for updating the RTE_MAX_MEMZONE would be of an
> > > > > application that uses the DPDK mempool library which is based on DPDK
> > > > > memzone library.  The application may need to create a number of
> > > > > steering tables, each of which will require its own mempool
> allocation.
> > > > > This commit is not about how to optimize the application usage of
> > > > > mempool nor about how to improve the mempool implementation based on
> > > > > memzone.  It is about how to make the max memzone definition - run-
> time
> > > > > customized.
> > > > > This commit adds an API which must be called before rte_eal_init():
> > > > > rte_memzone_max_set(int max).  If not called, the default memzone
> > > > > (RTE_MAX_MEMZONE) is used.  There is also an API to query the
> effective
> > > > > max memzone: rte_memzone_max_get().
> > > > >
> > > > > Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
> > > > > ---
> > > >
> > > > the use case of each application may want a different non-hard coded
> > > > value makes sense.
> > > >
> > > > it's less clear to me that requiring it be called before eal init makes
> > > > sense over just providing it as configuration to eal init so that it is
> > > > composed.
> > >
> > > Why do you think it would be better as EAL init option?
> > > From an API perspective, I think it is simpler to call a dedicated
> function.
> > > And I don't think a user wants to deal with it when starting the
> application.
> >
> > because a dedicated function that can be called detached from the eal
> > state enables an opportunity for accidental and confusing use outside
> > the correct context.
> >
> > i know the above prescribes not to do this but.
> >
> > now you can call set after eal init, but we protect about calling it
> > after init by failing. what do we do sensibly with the failure?
> 
> It would be a developer mistake which could be fix during development stage
> very easily. I don't see a problem here.

Why is this not just a command line parameter, like other EAL configuration options?

Do any other pre-init APIs exist, or are you introducing a new design pattern for configuring EAL?

Any application can simply modify the command line parameters before calling EAL init. It doesn't need to pass the command line parameters as-is to EAL init.

In other words: There is an existing design pattern for configuring EAL, why introduce a new design pattern?

If we want to expose APIs for configuring EAL instead of passing command line parameters, such APIs should be added for all EAL configuration parameters. That would be nice, but I dislike that some EAL configuration parameters must be passed using one method and some other passed using another method.


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [RFC] lib: set/get max memzone segments
  2023-04-21 11:08         ` Morten Brørup
@ 2023-04-21 14:57           ` Thomas Monjalon
  2023-04-21 15:19             ` Morten Brørup
  0 siblings, 1 reply; 35+ messages in thread
From: Thomas Monjalon @ 2023-04-21 14:57 UTC (permalink / raw)
  To: Morten Brørup
  Cc: Ophir Munk, dev, Bruce Richardson, Devendra Singh Rawat,
	Alok Prasad, Matan Azrad, Lior Margalit, Tyler Retzlaff

21/04/2023 13:08, Morten Brørup:
> > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > Sent: Friday, 21 April 2023 10.35
> > 20/04/2023 20:20, Tyler Retzlaff:
> > > On Thu, Apr 20, 2023 at 09:43:28AM +0200, Thomas Monjalon wrote:
> > > > 19/04/2023 16:51, Tyler Retzlaff:
> > > > > On Wed, Apr 19, 2023 at 11:36:34AM +0300, Ophir Munk wrote:
> > > > > > In current DPDK the RTE_MAX_MEMZONE definition is unconditionally hard
> > > > > > coded as 2560.  For applications requiring different values of this
> > > > > > parameter – it is more convenient to set the max value via an rte API
> > -
> > > > > > rather than changing the dpdk source code per application.  In many
> > > > > > organizations, the possibility to compile a private DPDK library for a
> > > > > > particular application does not exist at all.  With this option there
> > is
> > > > > > no need to recompile DPDK and it allows using an in-box packaged DPDK.
> > > > > > An example usage for updating the RTE_MAX_MEMZONE would be of an
> > > > > > application that uses the DPDK mempool library which is based on DPDK
> > > > > > memzone library.  The application may need to create a number of
> > > > > > steering tables, each of which will require its own mempool
> > allocation.
> > > > > > This commit is not about how to optimize the application usage of
> > > > > > mempool nor about how to improve the mempool implementation based on
> > > > > > memzone.  It is about how to make the max memzone definition - run-
> > time
> > > > > > customized.
> > > > > > This commit adds an API which must be called before rte_eal_init():
> > > > > > rte_memzone_max_set(int max).  If not called, the default memzone
> > > > > > (RTE_MAX_MEMZONE) is used.  There is also an API to query the
> > effective
> > > > > > max memzone: rte_memzone_max_get().
> > > > > >
> > > > > > Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
> > > > > > ---
> > > > >
> > > > > the use case of each application may want a different non-hard coded
> > > > > value makes sense.
> > > > >
> > > > > it's less clear to me that requiring it be called before eal init makes
> > > > > sense over just providing it as configuration to eal init so that it is
> > > > > composed.
> > > >
> > > > Why do you think it would be better as EAL init option?
> > > > From an API perspective, I think it is simpler to call a dedicated
> > function.
> > > > And I don't think a user wants to deal with it when starting the
> > application.
> > >
> > > because a dedicated function that can be called detached from the eal
> > > state enables an opportunity for accidental and confusing use outside
> > > the correct context.
> > >
> > > i know the above prescribes not to do this but.
> > >
> > > now you can call set after eal init, but we protect about calling it
> > > after init by failing. what do we do sensibly with the failure?
> > 
> > It would be a developer mistake which could be fix during development stage
> > very easily. I don't see a problem here.
> 
> Why is this not just a command line parameter, like other EAL configuration options?
> 
> Do any other pre-init APIs exist, or are you introducing a new design pattern for configuring EAL?

Let's say it is a "new" design pattern, as discussed multiple times in previous years.
But this one is only for the application,
it is not a user configuration as in rte_eal_init(int argc, char **argv).

> Any application can simply modify the command line parameters before calling EAL init. It doesn't need to pass the command line parameters as-is to EAL init.

It is not very easy to use.

> In other words: There is an existing design pattern for configuring EAL, why introduce a new design pattern?

Because argc/argv is a bad pattern.
We had multiple requests to avoid it.
So when introducing a new option, it is better to avoid it.

> If we want to expose APIs for configuring EAL instead of passing command line parameters, such APIs should be added for all EAL configuration parameters.

The memzone parameter is not supposed to be configured by the user,
so it does not make sense to expose it via argc/argv.

> That would be nice, but I dislike that some EAL configuration parameters must be passed using one method and some other passed using another method.

We asked multiple times for such rework.
And the patches from Bruce to split some EAL parts are in this direction.
If you want to propose some new functions to configure EAL, you are welcome.




^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [RFC] lib: set/get max memzone segments
  2023-04-21 14:57           ` Thomas Monjalon
@ 2023-04-21 15:19             ` Morten Brørup
  2023-04-25 16:38               ` Ophir Munk
  0 siblings, 1 reply; 35+ messages in thread
From: Morten Brørup @ 2023-04-21 15:19 UTC (permalink / raw)
  To: Thomas Monjalon
  Cc: Ophir Munk, dev, Bruce Richardson, Devendra Singh Rawat,
	Alok Prasad, Matan Azrad, Lior Margalit, Tyler Retzlaff

> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> Sent: Friday, 21 April 2023 16.57
> 
> 21/04/2023 13:08, Morten Brørup:
> > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > Sent: Friday, 21 April 2023 10.35
> > > 20/04/2023 20:20, Tyler Retzlaff:
> > > > On Thu, Apr 20, 2023 at 09:43:28AM +0200, Thomas Monjalon wrote:
> > > > > 19/04/2023 16:51, Tyler Retzlaff:
> > > > > > On Wed, Apr 19, 2023 at 11:36:34AM +0300, Ophir Munk wrote:
> > > > > > > In current DPDK the RTE_MAX_MEMZONE definition is unconditionally
> hard
> > > > > > > coded as 2560.  For applications requiring different values of
> this
> > > > > > > parameter – it is more convenient to set the max value via an rte
> API
> > > -
> > > > > > > rather than changing the dpdk source code per application.  In
> many
> > > > > > > organizations, the possibility to compile a private DPDK library
> for a
> > > > > > > particular application does not exist at all.  With this option
> there
> > > is
> > > > > > > no need to recompile DPDK and it allows using an in-box packaged
> DPDK.
> > > > > > > An example usage for updating the RTE_MAX_MEMZONE would be of an
> > > > > > > application that uses the DPDK mempool library which is based on
> DPDK
> > > > > > > memzone library.  The application may need to create a number of
> > > > > > > steering tables, each of which will require its own mempool
> > > allocation.
> > > > > > > This commit is not about how to optimize the application usage of
> > > > > > > mempool nor about how to improve the mempool implementation based
> on
> > > > > > > memzone.  It is about how to make the max memzone definition -
> run-
> > > time
> > > > > > > customized.
> > > > > > > This commit adds an API which must be called before
> rte_eal_init():
> > > > > > > rte_memzone_max_set(int max).  If not called, the default memzone
> > > > > > > (RTE_MAX_MEMZONE) is used.  There is also an API to query the
> > > effective
> > > > > > > max memzone: rte_memzone_max_get().
> > > > > > >
> > > > > > > Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
> > > > > > > ---
> > > > > >
> > > > > > the use case of each application may want a different non-hard coded
> > > > > > value makes sense.
> > > > > >
> > > > > > it's less clear to me that requiring it be called before eal init
> makes
> > > > > > sense over just providing it as configuration to eal init so that it
> is
> > > > > > composed.
> > > > >
> > > > > Why do you think it would be better as EAL init option?
> > > > > From an API perspective, I think it is simpler to call a dedicated
> > > function.
> > > > > And I don't think a user wants to deal with it when starting the
> > > application.
> > > >
> > > > because a dedicated function that can be called detached from the eal
> > > > state enables an opportunity for accidental and confusing use outside
> > > > the correct context.
> > > >
> > > > i know the above prescribes not to do this but.
> > > >
> > > > now you can call set after eal init, but we protect about calling it
> > > > after init by failing. what do we do sensibly with the failure?
> > >
> > > It would be a developer mistake which could be fix during development
> stage
> > > very easily. I don't see a problem here.
> >
> > Why is this not just a command line parameter, like other EAL configuration
> options?
> >
> > Do any other pre-init APIs exist, or are you introducing a new design
> pattern for configuring EAL?
> 
> Let's say it is a "new" design pattern, as discussed multiple times in
> previous years.
> But this one is only for the application,
> it is not a user configuration as in rte_eal_init(int argc, char **argv).
> 
> > Any application can simply modify the command line parameters before calling
> EAL init. It doesn't need to pass the command line parameters as-is to EAL
> init.
> 
> It is not very easy to use.
> 
> > In other words: There is an existing design pattern for configuring EAL, why
> introduce a new design pattern?
> 
> Because argc/argv is a bad pattern.
> We had multiple requests to avoid it.
> So when introducing a new option, it is better to avoid it.
> 
> > If we want to expose APIs for configuring EAL instead of passing command
> line parameters, such APIs should be added for all EAL configuration
> parameters.
> 
> The memzone parameter is not supposed to be configured by the user,
> so it does not make sense to expose it via argc/argv.

Good point! I didn't think about that; in hardware appliances, the user has no access to provide EAL command line parameters.

> 
> > That would be nice, but I dislike that some EAL configuration parameters
> must be passed using one method and some other passed using another method.
> 
> We asked multiple times for such rework.

High level directions/goals for DPDK, such as replacing EAL command line parameters with APIs, should be noted on the Roadmap web page.

> And the patches from Bruce to split some EAL parts are in this direction.
> If you want to propose some new functions to configure EAL, you are welcome.

OK. I retract my objection. :-)


^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [EXT] [RFC] lib: set/get max memzone segments
  2023-04-19 13:42 ` [EXT] " Devendra Singh Rawat
@ 2023-04-24 21:07   ` Ophir Munk
  0 siblings, 0 replies; 35+ messages in thread
From: Ophir Munk @ 2023-04-24 21:07 UTC (permalink / raw)
  To: Devendra Singh Rawat, dev, Bruce Richardson, Alok Prasad
  Cc: Matan Azrad, NBU-Contact-Thomas Monjalon (EXTERNAL),
	Lior Margalit, Asaf Penso

Thank you Devendra Singh Rawat for your valuable comments.

> >+int ecore_mz_mapping_alloc(void)
> >+{
> >+	ecore_mz_mapping = rte_malloc("ecore_mz_map", 0,
> >+		rte_memzone_max_get() * sizeof(struct rte_memzone *));
> 
> Second parameter of rte_malloc() should be size and Third parameter should
> be alignment 0 in this case.
> 
> Check
> https://doc.dpdk.org/api/rte__malloc_8h.html#a247c99e8d36300c52729c9e
> e58c2b489

Ack

> >--- a/drivers/net/qede/qede_main.c
> >+++ b/drivers/net/qede/qede_main.c
> >@@ -78,6 +78,12 @@ qed_probe(struct ecore_dev *edev, struct
> >rte_pci_device *pci_dev,
> > 		return rc;
> > 	}
> >
> >+	rc = ecore_mz_mapping_alloc();
> 
> ecore_mz_mapping_alloc() should be called prior to calling
> ecore_hw_prepare().
> 

Ack

> >
> >@@ -721,6 +727,7 @@ static void qed_remove(struct ecore_dev *edev)
> > 	if (!edev)
> > 		return;
> >
> >+	ecore_mz_mapping_free();
> > 	ecore_hw_remove(edev);
> > }
> 
> ecore_mz_mapping_free() should be called after ecore_hw_remove();

Ack

^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [RFC] lib: set/get max memzone segments
  2023-04-19 14:42 ` Stephen Hemminger
@ 2023-04-24 21:43   ` Ophir Munk
  0 siblings, 0 replies; 35+ messages in thread
From: Ophir Munk @ 2023-04-24 21:43 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad,
	Matan Azrad, NBU-Contact-Thomas Monjalon (EXTERNAL),
	Lior Margalit, Asaf Penso

Thank you Stephen Memminger for you comment.

> Subject: Re: [RFC] lib: set/get max memzone segments
> 
> On Wed, 19 Apr 2023 11:36:34 +0300
> Ophir Munk <ophirmu@nvidia.com> wrote:
> 
> > +int ecore_mz_mapping_alloc(void)
> > +{
> > +	ecore_mz_mapping = rte_malloc("ecore_mz_map", 0,
> > +		rte_memzone_max_get() * sizeof(struct rte_memzone *));
> 
> Why not use rte_calloc(), 

rte_malloc() replaced with rte_zmalloc().

> and devices should be using NUMA aware
> allocation to put the memzone on same NUMA node as the PCI device.

I leave this optimization to driver developers. I don't think it should be part of this RFC.


^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [RFC] lib: set/get max memzone segments
  2023-04-19 14:51 ` Tyler Retzlaff
  2023-04-20  7:43   ` Thomas Monjalon
@ 2023-04-25 13:46   ` Ophir Munk
  1 sibling, 0 replies; 35+ messages in thread
From: Ophir Munk @ 2023-04-25 13:46 UTC (permalink / raw)
  To: Tyler Retzlaff
  Cc: dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad,
	Matan Azrad, NBU-Contact-Thomas Monjalon (EXTERNAL),
	Lior Margalit

Thank you, Tyler Retzlaff, for your comments.

> > --- a/app/test/test_memzone.c
> > +++ b/app/test/test_memzone.c
> > @@ -871,7 +871,7 @@ test_memzone_bounded(void)  static int
> >  test_memzone_free(void)
> >  {
> > -	const struct rte_memzone *mz[RTE_MAX_MEMZONE + 1];
> > +	const struct rte_memzone *mz[rte_memzone_max_get() + 1];
> 
> please no more VLAs even if in tests.
> 

VLA replaced with dynamic allocation. 

> > --- a/lib/eal/common/eal_common_memzone.c
> > +++ b/lib/eal/common/eal_common_memzone.c
> > @@ -22,6 +22,10 @@
> >  #include "eal_private.h"
> >  #include "eal_memcfg.h"
> >
> > +#define RTE_DEFAULT_MAX_MEMZONE 2560
> > +
> > +static uint32_t memzone_max = RTE_DEFAULT_MAX_MEMZONE;
> 
> should be size_t
> 

Ack

> > +int
> > +rte_memzone_max_set(uint32_t max)
> 
> max should be size_t
> 
> > +{

Ack

> > +uint32_t
> > +rte_memzone_max_get(void)
> 
> should return size_t
> 

Ack


^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [RFC] lib: set/get max memzone segments
  2023-04-21 15:19             ` Morten Brørup
@ 2023-04-25 16:38               ` Ophir Munk
  0 siblings, 0 replies; 35+ messages in thread
From: Ophir Munk @ 2023-04-25 16:38 UTC (permalink / raw)
  To: Morten Brørup, NBU-Contact-Thomas Monjalon (EXTERNAL)
  Cc: dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad,
	Matan Azrad, Lior Margalit, Tyler Retzlaff, Asaf Penso

Thank you Morten Brorup and Thomas Monjalon for the fruitful discussion.
I am sending a V2 version that meets the understandings in the RFC so far.
If confirmed I will send PATCH V1.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [RFC V2] lib: set/get max memzone segments
  2023-04-19  8:36 [RFC] lib: set/get max memzone segments Ophir Munk
                   ` (3 preceding siblings ...)
  2023-04-19 14:51 ` Tyler Retzlaff
@ 2023-04-25 16:40 ` Ophir Munk
  2023-05-03  7:26   ` [PATCH V3] " Ophir Munk
  4 siblings, 1 reply; 35+ messages in thread
From: Ophir Munk @ 2023-04-25 16:40 UTC (permalink / raw)
  To: dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad
  Cc: Ophir Munk, Matan Azrad, Thomas Monjalon, Lior Margalit, Asaf Penso

In current DPDK the RTE_MAX_MEMZONE definition is unconditionally hard
coded as 2560.  For applications requiring different values of this
parameter – it is more convenient to set the max value via an rte API -
rather than changing the dpdk source code per application.  In many
organizations, the possibility to compile a private DPDK library for a
particular application does not exist at all.  With this option there is
no need to recompile DPDK and it allows using an in-box packaged DPDK.
An example usage for updating the RTE_MAX_MEMZONE would be of an
application that uses the DPDK mempool library which is based on DPDK
memzone library.  The application may need to create a number of
steering tables, each of which will require its own mempool allocation.
This commit is not about how to optimize the application usage of
mempool nor about how to improve the mempool implementation based on
memzone.  It is about how to make the max memzone definition - run-time
customized.
This commit adds an API which must be called before rte_eal_init():
rte_memzone_max_set(int max).  If not called, the default memzone
(RTE_MAX_MEMZONE) is used.  There is also an API to query the effective
max memzone: rte_memzone_max_get().

Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
---
 app/test/test_func_reentrancy.c     |  2 +-
 app/test/test_malloc_perf.c         |  2 +-
 app/test/test_memzone.c             | 43 ++++++++++++++++++++++++-------------
 config/rte_config.h                 |  1 -
 drivers/net/qede/base/bcm_osal.c    | 26 +++++++++++++++++-----
 drivers/net/qede/base/bcm_osal.h    |  3 +++
 drivers/net/qede/qede_main.c        |  7 ++++++
 lib/eal/common/eal_common_memzone.c | 28 +++++++++++++++++++++---
 lib/eal/include/rte_memzone.h       | 20 +++++++++++++++++
 lib/eal/version.map                 |  4 ++++
 10 files changed, 110 insertions(+), 26 deletions(-)

diff --git a/app/test/test_func_reentrancy.c b/app/test/test_func_reentrancy.c
index d1ed5d4..ae9de6f 100644
--- a/app/test/test_func_reentrancy.c
+++ b/app/test/test_func_reentrancy.c
@@ -51,7 +51,7 @@ typedef void (*case_clean_t)(unsigned lcore_id);
 #define MEMPOOL_ELT_SIZE                    (sizeof(uint32_t))
 #define MEMPOOL_SIZE                        (4)
 
-#define MAX_LCORES	(RTE_MAX_MEMZONE / (MAX_ITER_MULTI * 4U))
+#define MAX_LCORES	(rte_memzone_max_get() / (MAX_ITER_MULTI * 4U))
 
 static uint32_t obj_count;
 static uint32_t synchro;
diff --git a/app/test/test_malloc_perf.c b/app/test/test_malloc_perf.c
index ccec43a..9bd1662 100644
--- a/app/test/test_malloc_perf.c
+++ b/app/test/test_malloc_perf.c
@@ -165,7 +165,7 @@ test_malloc_perf(void)
 		return -1;
 
 	if (test_alloc_perf("rte_memzone_reserve", memzone_alloc, memzone_free,
-			NULL, memset_us_gb, RTE_MAX_MEMZONE - 1) < 0)
+			NULL, memset_us_gb, rte_memzone_max_get() - 1) < 0)
 		return -1;
 
 	return 0;
diff --git a/app/test/test_memzone.c b/app/test/test_memzone.c
index c9255e5..36b9790 100644
--- a/app/test/test_memzone.c
+++ b/app/test/test_memzone.c
@@ -871,9 +871,18 @@ test_memzone_bounded(void)
 static int
 test_memzone_free(void)
 {
-	const struct rte_memzone *mz[RTE_MAX_MEMZONE + 1];
+	size_t mz_size;
+	const struct rte_memzone **mz;
 	int i;
 	char name[20];
+	int rc = -1;
+
+	mz_size = (rte_memzone_max_get() + 1) * sizeof(struct rte_memzone *);
+	mz = rte_zmalloc("memzone_test", mz_size, 0);
+	if (!mz) {
+		printf("Fail allocating memzone test array\n");
+		return rc;
+	}
 
 	mz[0] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone0"), 2000,
 			SOCKET_ID_ANY, 0);
@@ -881,42 +890,42 @@ test_memzone_free(void)
 			SOCKET_ID_ANY, 0);
 
 	if (mz[0] > mz[1])
-		return -1;
+		goto exit_test;
 	if (!rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone0")))
-		return -1;
+		goto exit_test;
 	if (!rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone1")))
-		return -1;
+		goto exit_test;
 
 	if (rte_memzone_free(mz[0])) {
 		printf("Fail memzone free - tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone0"))) {
 		printf("Found previously free memzone - tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	mz[2] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone2"), 2000,
 			SOCKET_ID_ANY, 0);
 
 	if (mz[2] > mz[1]) {
 		printf("tempzone2 should have gotten the free entry from tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_free(mz[2])) {
 		printf("Fail memzone free - tempzone2\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone2"))) {
 		printf("Found previously free memzone - tempzone2\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_free(mz[1])) {
 		printf("Fail memzone free - tempzone1\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone1"))) {
 		printf("Found previously free memzone - tempzone1\n");
-		return -1;
+		goto exit_test;
 	}
 
 	i = 0;
@@ -928,7 +937,7 @@ test_memzone_free(void)
 
 	if (rte_memzone_free(mz[0])) {
 		printf("Fail memzone free - tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	mz[0] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone0new"), 0,
 			SOCKET_ID_ANY, 0);
@@ -936,17 +945,21 @@ test_memzone_free(void)
 	if (mz[0] == NULL) {
 		printf("Fail to create memzone - tempzone0new - when MAX memzones were "
 				"created and one was free\n");
-		return -1;
+		goto exit_test;
 	}
 
 	for (i = i - 2; i >= 0; i--) {
 		if (rte_memzone_free(mz[i])) {
 			printf("Fail memzone free - tempzone%d\n", i);
-			return -1;
+			goto exit_test;
 		}
 	}
 
-	return 0;
+	rc = 0;
+
+exit_test:
+	rte_free(mz);
+	return rc;
 }
 
 static int test_memzones_left;
diff --git a/config/rte_config.h b/config/rte_config.h
index 7b8c85e..400e44e 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -34,7 +34,6 @@
 #define RTE_MAX_MEM_MB_PER_LIST 32768
 #define RTE_MAX_MEMSEG_PER_TYPE 32768
 #define RTE_MAX_MEM_MB_PER_TYPE 65536
-#define RTE_MAX_MEMZONE 2560
 #define RTE_MAX_TAILQ 32
 #define RTE_LOG_DP_LEVEL RTE_LOG_INFO
 #define RTE_MAX_VFIO_CONTAINERS 64
diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
index 2c59397..cbeea43 100644
--- a/drivers/net/qede/base/bcm_osal.c
+++ b/drivers/net/qede/base/bcm_osal.c
@@ -47,10 +47,26 @@ void osal_poll_mode_dpc(osal_int_ptr_t hwfn_cookie)
 }
 
 /* Array of memzone pointers */
-static const struct rte_memzone *ecore_mz_mapping[RTE_MAX_MEMZONE];
+static const struct rte_memzone **ecore_mz_mapping;
 /* Counter to track current memzone allocated */
 static uint16_t ecore_mz_count;
 
+int ecore_mz_mapping_alloc(void)
+{
+	ecore_mz_mapping = rte_zmalloc("ecore_mz_map",
+		rte_memzone_max_get() * sizeof(struct rte_memzone *), 0);
+
+	if (!ecore_mz_mapping)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void ecore_mz_mapping_free(void)
+{
+	rte_free(ecore_mz_mapping);
+}
+
 unsigned long qede_log2_align(unsigned long n)
 {
 	unsigned long ret = n ? 1 : 0;
@@ -132,9 +148,9 @@ void *osal_dma_alloc_coherent(struct ecore_dev *p_dev,
 	uint32_t core_id = rte_lcore_id();
 	unsigned int socket_id;
 
-	if (ecore_mz_count >= RTE_MAX_MEMZONE) {
+	if (ecore_mz_count >= rte_memzone_max_get()) {
 		DP_ERR(p_dev, "Memzone allocation count exceeds %u\n",
-		       RTE_MAX_MEMZONE);
+		       rte_memzone_max_get());
 		*phys = 0;
 		return OSAL_NULL;
 	}
@@ -171,9 +187,9 @@ void *osal_dma_alloc_coherent_aligned(struct ecore_dev *p_dev,
 	uint32_t core_id = rte_lcore_id();
 	unsigned int socket_id;
 
-	if (ecore_mz_count >= RTE_MAX_MEMZONE) {
+	if (ecore_mz_count >= rte_memzone_max_get()) {
 		DP_ERR(p_dev, "Memzone allocation count exceeds %u\n",
-		       RTE_MAX_MEMZONE);
+		       rte_memzone_max_get());
 		*phys = 0;
 		return OSAL_NULL;
 	}
diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 67e7f75..97e261d 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -477,4 +477,7 @@ enum dbg_status	qed_dbg_alloc_user_data(struct ecore_hwfn *p_hwfn,
 	qed_dbg_alloc_user_data(p_hwfn, user_data_ptr)
 #define OSAL_DB_REC_OCCURRED(p_hwfn) nothing
 
+int ecore_mz_mapping_alloc(void);
+void ecore_mz_mapping_free(void);
+
 #endif /* __BCM_OSAL_H */
diff --git a/drivers/net/qede/qede_main.c b/drivers/net/qede/qede_main.c
index 0303903..fd63262 100644
--- a/drivers/net/qede/qede_main.c
+++ b/drivers/net/qede/qede_main.c
@@ -72,6 +72,12 @@ qed_probe(struct ecore_dev *edev, struct rte_pci_device *pci_dev,
 	hw_prepare_params.allow_mdump = false;
 	hw_prepare_params.b_en_pacing = false;
 	hw_prepare_params.epoch = OSAL_GET_EPOCH(ECORE_LEADING_HWFN(edev));
+	rc = ecore_mz_mapping_alloc();
+	if (rc) {
+		DP_ERR(edev, "mem zones array allocation failed\n");
+		return rc;
+	}
+
 	rc = ecore_hw_prepare(edev, &hw_prepare_params);
 	if (rc) {
 		DP_ERR(edev, "hw prepare failed\n");
@@ -722,6 +728,7 @@ static void qed_remove(struct ecore_dev *edev)
 		return;
 
 	ecore_hw_remove(edev);
+	ecore_mz_mapping_free();
 }
 
 static int qed_send_drv_state(struct ecore_dev *edev, bool active)
diff --git a/lib/eal/common/eal_common_memzone.c b/lib/eal/common/eal_common_memzone.c
index a9cd91f..f94330b 100644
--- a/lib/eal/common/eal_common_memzone.c
+++ b/lib/eal/common/eal_common_memzone.c
@@ -22,6 +22,10 @@
 #include "eal_private.h"
 #include "eal_memcfg.h"
 
+#define RTE_DEFAULT_MAX_MEMZONE 2560
+
+static size_t memzone_max = RTE_DEFAULT_MAX_MEMZONE;
+
 static inline const struct rte_memzone *
 memzone_lookup_thread_unsafe(const char *name)
 {
@@ -81,8 +85,9 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 	/* no more room in config */
 	if (arr->count >= arr->len) {
 		RTE_LOG(ERR, EAL,
-		"%s(): Number of requested memzone segments exceeds RTE_MAX_MEMZONE\n",
-			__func__);
+		"%s(): Number of requested memzone segments exceeds max "
+		"memzone segments (%d >= %d)\n",
+			__func__, arr->count, arr->len);
 		rte_errno = ENOSPC;
 		return NULL;
 	}
@@ -396,7 +401,7 @@ rte_eal_memzone_init(void)
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
 			rte_fbarray_init(&mcfg->memzones, "memzone",
-			RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) {
+			rte_memzone_max_get(), sizeof(struct rte_memzone))) {
 		RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n");
 		ret = -1;
 	} else if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
@@ -430,3 +435,20 @@ void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
 	}
 	rte_rwlock_read_unlock(&mcfg->mlock);
 }
+
+int
+rte_memzone_max_set(size_t max)
+{
+	/* Setting max memzone must occur befaore calling rte_eal_init() */
+	if (eal_get_internal_configuration()->init_complete > 0)
+		return -1;
+
+	memzone_max = max;
+	return 0;
+}
+
+size_t
+rte_memzone_max_get(void)
+{
+	return memzone_max;
+}
diff --git a/lib/eal/include/rte_memzone.h b/lib/eal/include/rte_memzone.h
index 5302caa..3ff7c73 100644
--- a/lib/eal/include/rte_memzone.h
+++ b/lib/eal/include/rte_memzone.h
@@ -305,6 +305,26 @@ void rte_memzone_dump(FILE *f);
 void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *arg),
 		      void *arg);
 
+/**
+ * Set max memzone value
+ *
+ * @param max
+ *   Value of max memzone allocations
+ * @return
+ *  0 on success, -1 otherwise
+ */
+__rte_experimental
+int rte_memzone_max_set(size_t max);
+
+/**
+ * Get max memzone value
+ *
+ * @return
+ *   Value of max memzone allocations
+ */
+__rte_experimental
+size_t rte_memzone_max_get(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/version.map b/lib/eal/version.map
index 51a820d..b52a83c 100644
--- a/lib/eal/version.map
+++ b/lib/eal/version.map
@@ -430,6 +430,10 @@ EXPERIMENTAL {
 	rte_thread_create_control;
 	rte_thread_set_name;
 	__rte_eal_trace_generic_blob;
+
+	# added in 23.07
+	rte_memzone_max_set;
+	rte_memzone_max_get;
 };
 
 INTERNAL {
-- 
2.8.4


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH V3] lib: set/get max memzone segments
  2023-04-25 16:40 ` [RFC V2] " Ophir Munk
@ 2023-05-03  7:26   ` Ophir Munk
  2023-05-03 21:41     ` Morten Brørup
                       ` (3 more replies)
  0 siblings, 4 replies; 35+ messages in thread
From: Ophir Munk @ 2023-05-03  7:26 UTC (permalink / raw)
  To: dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad
  Cc: Ophir Munk, Matan Azrad, Thomas Monjalon, Lior Margalit

In current DPDK the RTE_MAX_MEMZONE definition is unconditionally hard
coded as 2560.  For applications requiring different values of this
parameter – it is more convenient to set the max value via an rte API -
rather than changing the dpdk source code per application.  In many
organizations, the possibility to compile a private DPDK library for a
particular application does not exist at all.  With this option there is
no need to recompile DPDK and it allows using an in-box packaged DPDK.
An example usage for updating the RTE_MAX_MEMZONE would be of an
application that uses the DPDK mempool library which is based on DPDK
memzone library.  The application may need to create a number of
steering tables, each of which will require its own mempool allocation.
This commit is not about how to optimize the application usage of
mempool nor about how to improve the mempool implementation based on
memzone.  It is about how to make the max memzone definition - run-time
customized.
This commit adds an API which must be called before rte_eal_init():
rte_memzone_max_set(int max).  If not called, the default memzone
(RTE_MAX_MEMZONE) is used.  There is also an API to query the effective
max memzone: rte_memzone_max_get().

Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
---
 app/test/test_func_reentrancy.c     |  2 +-
 app/test/test_malloc_perf.c         |  2 +-
 app/test/test_memzone.c             | 43 ++++++++++++++++++++++++-------------
 config/rte_config.h                 |  1 -
 drivers/net/qede/base/bcm_osal.c    | 30 ++++++++++++++++++++------
 drivers/net/qede/base/bcm_osal.h    |  3 +++
 drivers/net/qede/qede_main.c        |  7 ++++++
 lib/eal/common/eal_common_memzone.c | 28 +++++++++++++++++++++---
 lib/eal/include/rte_memzone.h       | 20 +++++++++++++++++
 lib/eal/version.map                 |  4 ++++
 10 files changed, 112 insertions(+), 28 deletions(-)

diff --git a/app/test/test_func_reentrancy.c b/app/test/test_func_reentrancy.c
index d1ed5d4..ae9de6f 100644
--- a/app/test/test_func_reentrancy.c
+++ b/app/test/test_func_reentrancy.c
@@ -51,7 +51,7 @@ typedef void (*case_clean_t)(unsigned lcore_id);
 #define MEMPOOL_ELT_SIZE                    (sizeof(uint32_t))
 #define MEMPOOL_SIZE                        (4)
 
-#define MAX_LCORES	(RTE_MAX_MEMZONE / (MAX_ITER_MULTI * 4U))
+#define MAX_LCORES	(rte_memzone_max_get() / (MAX_ITER_MULTI * 4U))
 
 static uint32_t obj_count;
 static uint32_t synchro;
diff --git a/app/test/test_malloc_perf.c b/app/test/test_malloc_perf.c
index ccec43a..9bd1662 100644
--- a/app/test/test_malloc_perf.c
+++ b/app/test/test_malloc_perf.c
@@ -165,7 +165,7 @@ test_malloc_perf(void)
 		return -1;
 
 	if (test_alloc_perf("rte_memzone_reserve", memzone_alloc, memzone_free,
-			NULL, memset_us_gb, RTE_MAX_MEMZONE - 1) < 0)
+			NULL, memset_us_gb, rte_memzone_max_get() - 1) < 0)
 		return -1;
 
 	return 0;
diff --git a/app/test/test_memzone.c b/app/test/test_memzone.c
index c9255e5..36b9790 100644
--- a/app/test/test_memzone.c
+++ b/app/test/test_memzone.c
@@ -871,9 +871,18 @@ test_memzone_bounded(void)
 static int
 test_memzone_free(void)
 {
-	const struct rte_memzone *mz[RTE_MAX_MEMZONE + 1];
+	size_t mz_size;
+	const struct rte_memzone **mz;
 	int i;
 	char name[20];
+	int rc = -1;
+
+	mz_size = (rte_memzone_max_get() + 1) * sizeof(struct rte_memzone *);
+	mz = rte_zmalloc("memzone_test", mz_size, 0);
+	if (!mz) {
+		printf("Fail allocating memzone test array\n");
+		return rc;
+	}
 
 	mz[0] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone0"), 2000,
 			SOCKET_ID_ANY, 0);
@@ -881,42 +890,42 @@ test_memzone_free(void)
 			SOCKET_ID_ANY, 0);
 
 	if (mz[0] > mz[1])
-		return -1;
+		goto exit_test;
 	if (!rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone0")))
-		return -1;
+		goto exit_test;
 	if (!rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone1")))
-		return -1;
+		goto exit_test;
 
 	if (rte_memzone_free(mz[0])) {
 		printf("Fail memzone free - tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone0"))) {
 		printf("Found previously free memzone - tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	mz[2] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone2"), 2000,
 			SOCKET_ID_ANY, 0);
 
 	if (mz[2] > mz[1]) {
 		printf("tempzone2 should have gotten the free entry from tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_free(mz[2])) {
 		printf("Fail memzone free - tempzone2\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone2"))) {
 		printf("Found previously free memzone - tempzone2\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_free(mz[1])) {
 		printf("Fail memzone free - tempzone1\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone1"))) {
 		printf("Found previously free memzone - tempzone1\n");
-		return -1;
+		goto exit_test;
 	}
 
 	i = 0;
@@ -928,7 +937,7 @@ test_memzone_free(void)
 
 	if (rte_memzone_free(mz[0])) {
 		printf("Fail memzone free - tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	mz[0] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone0new"), 0,
 			SOCKET_ID_ANY, 0);
@@ -936,17 +945,21 @@ test_memzone_free(void)
 	if (mz[0] == NULL) {
 		printf("Fail to create memzone - tempzone0new - when MAX memzones were "
 				"created and one was free\n");
-		return -1;
+		goto exit_test;
 	}
 
 	for (i = i - 2; i >= 0; i--) {
 		if (rte_memzone_free(mz[i])) {
 			printf("Fail memzone free - tempzone%d\n", i);
-			return -1;
+			goto exit_test;
 		}
 	}
 
-	return 0;
+	rc = 0;
+
+exit_test:
+	rte_free(mz);
+	return rc;
 }
 
 static int test_memzones_left;
diff --git a/config/rte_config.h b/config/rte_config.h
index 7b8c85e..400e44e 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -34,7 +34,6 @@
 #define RTE_MAX_MEM_MB_PER_LIST 32768
 #define RTE_MAX_MEMSEG_PER_TYPE 32768
 #define RTE_MAX_MEM_MB_PER_TYPE 65536
-#define RTE_MAX_MEMZONE 2560
 #define RTE_MAX_TAILQ 32
 #define RTE_LOG_DP_LEVEL RTE_LOG_INFO
 #define RTE_MAX_VFIO_CONTAINERS 64
diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
index 2c59397..0458768 100644
--- a/drivers/net/qede/base/bcm_osal.c
+++ b/drivers/net/qede/base/bcm_osal.c
@@ -47,10 +47,26 @@ void osal_poll_mode_dpc(osal_int_ptr_t hwfn_cookie)
 }
 
 /* Array of memzone pointers */
-static const struct rte_memzone *ecore_mz_mapping[RTE_MAX_MEMZONE];
+static const struct rte_memzone **ecore_mz_mapping;
 /* Counter to track current memzone allocated */
 static uint16_t ecore_mz_count;
 
+int ecore_mz_mapping_alloc(void)
+{
+	ecore_mz_mapping = rte_zmalloc("ecore_mz_map",
+		rte_memzone_max_get() * sizeof(struct rte_memzone *), 0);
+
+	if (!ecore_mz_mapping)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void ecore_mz_mapping_free(void)
+{
+	rte_free(ecore_mz_mapping);
+}
+
 unsigned long qede_log2_align(unsigned long n)
 {
 	unsigned long ret = n ? 1 : 0;
@@ -132,9 +148,9 @@ void *osal_dma_alloc_coherent(struct ecore_dev *p_dev,
 	uint32_t core_id = rte_lcore_id();
 	unsigned int socket_id;
 
-	if (ecore_mz_count >= RTE_MAX_MEMZONE) {
-		DP_ERR(p_dev, "Memzone allocation count exceeds %u\n",
-		       RTE_MAX_MEMZONE);
+	if (ecore_mz_count >= rte_memzone_max_get()) {
+		DP_ERR(p_dev, "Memzone allocation count exceeds %zu\n",
+		       rte_memzone_max_get());
 		*phys = 0;
 		return OSAL_NULL;
 	}
@@ -171,9 +187,9 @@ void *osal_dma_alloc_coherent_aligned(struct ecore_dev *p_dev,
 	uint32_t core_id = rte_lcore_id();
 	unsigned int socket_id;
 
-	if (ecore_mz_count >= RTE_MAX_MEMZONE) {
-		DP_ERR(p_dev, "Memzone allocation count exceeds %u\n",
-		       RTE_MAX_MEMZONE);
+	if (ecore_mz_count >= rte_memzone_max_get()) {
+		DP_ERR(p_dev, "Memzone allocation count exceeds %zu\n",
+		       rte_memzone_max_get());
 		*phys = 0;
 		return OSAL_NULL;
 	}
diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 67e7f75..97e261d 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -477,4 +477,7 @@ enum dbg_status	qed_dbg_alloc_user_data(struct ecore_hwfn *p_hwfn,
 	qed_dbg_alloc_user_data(p_hwfn, user_data_ptr)
 #define OSAL_DB_REC_OCCURRED(p_hwfn) nothing
 
+int ecore_mz_mapping_alloc(void);
+void ecore_mz_mapping_free(void);
+
 #endif /* __BCM_OSAL_H */
diff --git a/drivers/net/qede/qede_main.c b/drivers/net/qede/qede_main.c
index 0303903..fd63262 100644
--- a/drivers/net/qede/qede_main.c
+++ b/drivers/net/qede/qede_main.c
@@ -72,6 +72,12 @@ qed_probe(struct ecore_dev *edev, struct rte_pci_device *pci_dev,
 	hw_prepare_params.allow_mdump = false;
 	hw_prepare_params.b_en_pacing = false;
 	hw_prepare_params.epoch = OSAL_GET_EPOCH(ECORE_LEADING_HWFN(edev));
+	rc = ecore_mz_mapping_alloc();
+	if (rc) {
+		DP_ERR(edev, "mem zones array allocation failed\n");
+		return rc;
+	}
+
 	rc = ecore_hw_prepare(edev, &hw_prepare_params);
 	if (rc) {
 		DP_ERR(edev, "hw prepare failed\n");
@@ -722,6 +728,7 @@ static void qed_remove(struct ecore_dev *edev)
 		return;
 
 	ecore_hw_remove(edev);
+	ecore_mz_mapping_free();
 }
 
 static int qed_send_drv_state(struct ecore_dev *edev, bool active)
diff --git a/lib/eal/common/eal_common_memzone.c b/lib/eal/common/eal_common_memzone.c
index a9cd91f..f94330b 100644
--- a/lib/eal/common/eal_common_memzone.c
+++ b/lib/eal/common/eal_common_memzone.c
@@ -22,6 +22,10 @@
 #include "eal_private.h"
 #include "eal_memcfg.h"
 
+#define RTE_DEFAULT_MAX_MEMZONE 2560
+
+static size_t memzone_max = RTE_DEFAULT_MAX_MEMZONE;
+
 static inline const struct rte_memzone *
 memzone_lookup_thread_unsafe(const char *name)
 {
@@ -81,8 +85,9 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 	/* no more room in config */
 	if (arr->count >= arr->len) {
 		RTE_LOG(ERR, EAL,
-		"%s(): Number of requested memzone segments exceeds RTE_MAX_MEMZONE\n",
-			__func__);
+		"%s(): Number of requested memzone segments exceeds max "
+		"memzone segments (%d >= %d)\n",
+			__func__, arr->count, arr->len);
 		rte_errno = ENOSPC;
 		return NULL;
 	}
@@ -396,7 +401,7 @@ rte_eal_memzone_init(void)
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
 			rte_fbarray_init(&mcfg->memzones, "memzone",
-			RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) {
+			rte_memzone_max_get(), sizeof(struct rte_memzone))) {
 		RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n");
 		ret = -1;
 	} else if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
@@ -430,3 +435,20 @@ void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
 	}
 	rte_rwlock_read_unlock(&mcfg->mlock);
 }
+
+int
+rte_memzone_max_set(size_t max)
+{
+	/* Setting max memzone must occur befaore calling rte_eal_init() */
+	if (eal_get_internal_configuration()->init_complete > 0)
+		return -1;
+
+	memzone_max = max;
+	return 0;
+}
+
+size_t
+rte_memzone_max_get(void)
+{
+	return memzone_max;
+}
diff --git a/lib/eal/include/rte_memzone.h b/lib/eal/include/rte_memzone.h
index 5302caa..3ff7c73 100644
--- a/lib/eal/include/rte_memzone.h
+++ b/lib/eal/include/rte_memzone.h
@@ -305,6 +305,26 @@ void rte_memzone_dump(FILE *f);
 void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *arg),
 		      void *arg);
 
+/**
+ * Set max memzone value
+ *
+ * @param max
+ *   Value of max memzone allocations
+ * @return
+ *  0 on success, -1 otherwise
+ */
+__rte_experimental
+int rte_memzone_max_set(size_t max);
+
+/**
+ * Get max memzone value
+ *
+ * @return
+ *   Value of max memzone allocations
+ */
+__rte_experimental
+size_t rte_memzone_max_get(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/version.map b/lib/eal/version.map
index 51a820d..b52a83c 100644
--- a/lib/eal/version.map
+++ b/lib/eal/version.map
@@ -430,6 +430,10 @@ EXPERIMENTAL {
 	rte_thread_create_control;
 	rte_thread_set_name;
 	__rte_eal_trace_generic_blob;
+
+	# added in 23.07
+	rte_memzone_max_set;
+	rte_memzone_max_get;
 };
 
 INTERNAL {
-- 
2.8.4


^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH V3] lib: set/get max memzone segments
  2023-05-03  7:26   ` [PATCH V3] " Ophir Munk
@ 2023-05-03 21:41     ` Morten Brørup
  2023-05-25  6:47       ` Ophir Munk
  2023-05-04  7:27     ` David Marchand
                       ` (2 subsequent siblings)
  3 siblings, 1 reply; 35+ messages in thread
From: Morten Brørup @ 2023-05-03 21:41 UTC (permalink / raw)
  To: Ophir Munk, dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad
  Cc: Ophir Munk, Matan Azrad, Thomas Monjalon, Lior Margalit

> From: Ophir Munk [mailto:ophirmu@nvidia.com]
> Sent: Wednesday, 3 May 2023 09.27
> 
> In current DPDK the RTE_MAX_MEMZONE definition is unconditionally hard
> coded as 2560.  For applications requiring different values of this
> parameter – it is more convenient to set the max value via an rte API -
> rather than changing the dpdk source code per application.  In many
> organizations, the possibility to compile a private DPDK library for a
> particular application does not exist at all.  With this option there is
> no need to recompile DPDK and it allows using an in-box packaged DPDK.
> An example usage for updating the RTE_MAX_MEMZONE would be of an
> application that uses the DPDK mempool library which is based on DPDK
> memzone library.  The application may need to create a number of
> steering tables, each of which will require its own mempool allocation.
> This commit is not about how to optimize the application usage of
> mempool nor about how to improve the mempool implementation based on
> memzone.  It is about how to make the max memzone definition - run-time
> customized.
> This commit adds an API which must be called before rte_eal_init():
> rte_memzone_max_set(int max).  If not called, the default memzone
> (RTE_MAX_MEMZONE) is used.  There is also an API to query the effective
> max memzone: rte_memzone_max_get().
> 
> Signed-off-by: Ophir Munk <ophirmu@nvidia.com>

I retracted my objection to the RFC, but should also have added:

Acked-by: Morten Brørup <mb@smartsharesystems.com>


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V3] lib: set/get max memzone segments
  2023-05-03  7:26   ` [PATCH V3] " Ophir Munk
  2023-05-03 21:41     ` Morten Brørup
@ 2023-05-04  7:27     ` David Marchand
  2023-05-25  6:35       ` Ophir Munk
  2023-05-18 15:54     ` Burakov, Anatoly
  2023-05-24 22:25     ` [PATCH v4] " Ophir Munk
  3 siblings, 1 reply; 35+ messages in thread
From: David Marchand @ 2023-05-04  7:27 UTC (permalink / raw)
  To: Ophir Munk
  Cc: dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad,
	Ophir Munk, Matan Azrad, Thomas Monjalon, Lior Margalit

Hello Ophir,

Good thing someone is looking into this.
Thanks.

I have a few comments.


This commitlog is a bit compact.
Separating it with some empty lines would help digest it.


On Wed, May 3, 2023 at 9:27 AM Ophir Munk <ophirmu@nvidia.com> wrote:
>
> In current DPDK the RTE_MAX_MEMZONE definition is unconditionally hard
> coded as 2560.
> For applications requiring different values of this
> parameter – it is more convenient to set the max value via an rte API -
> rather than changing the dpdk source code per application.  In many
> organizations, the possibility to compile a private DPDK library for a
> particular application does not exist at all.  With this option there is
> no need to recompile DPDK and it allows using an in-box packaged DPDK.
> An example usage for updating the RTE_MAX_MEMZONE would be of an
> application that uses the DPDK mempool library which is based on DPDK
> memzone library.  The application may need to create a number of
> steering tables, each of which will require its own mempool allocation.
> This commit is not about how to optimize the application usage of
> mempool nor about how to improve the mempool implementation based on
> memzone.  It is about how to make the max memzone definition - run-time
> customized.

The code was using a rather short name "RTE_MAX_MEMZONE".
But I prefer we spell this as "max memzones count" (or a better
wording), in the descriptions/comments.


> This commit adds an API which must be called before rte_eal_init():
> rte_memzone_max_set(int max).  If not called, the default memzone

Afaics, this prototype got unaligned with the patch content, as a
size_t is now taken as input.
You can simply mention rte_memzone_max_set().


> (RTE_MAX_MEMZONE) is used.  There is also an API to query the effective

After the patch RTE_MAX_MEMZONE does not exist anymore.


> max memzone: rte_memzone_max_get().
>
> Signed-off-by: Ophir Munk <ophirmu@nvidia.com>


A global comment on the patch:

rte_calloc provides what you want in all cases below: an array of objects.
I prefer rte_calloc(count, sizeof elem) to rte_zmalloc(count * sizeof elem).

This also avoids a temporary variable to compute the total size of
such an array.


> ---
>  app/test/test_func_reentrancy.c     |  2 +-
>  app/test/test_malloc_perf.c         |  2 +-
>  app/test/test_memzone.c             | 43 ++++++++++++++++++++++++-------------
>  config/rte_config.h                 |  1 -
>  drivers/net/qede/base/bcm_osal.c    | 30 ++++++++++++++++++++------
>  drivers/net/qede/base/bcm_osal.h    |  3 +++
>  drivers/net/qede/qede_main.c        |  7 ++++++
>  lib/eal/common/eal_common_memzone.c | 28 +++++++++++++++++++++---
>  lib/eal/include/rte_memzone.h       | 20 +++++++++++++++++
>  lib/eal/version.map                 |  4 ++++
>  10 files changed, 112 insertions(+), 28 deletions(-)
>
> diff --git a/app/test/test_func_reentrancy.c b/app/test/test_func_reentrancy.c
> index d1ed5d4..ae9de6f 100644
> --- a/app/test/test_func_reentrancy.c
> +++ b/app/test/test_func_reentrancy.c
> @@ -51,7 +51,7 @@ typedef void (*case_clean_t)(unsigned lcore_id);
>  #define MEMPOOL_ELT_SIZE                    (sizeof(uint32_t))
>  #define MEMPOOL_SIZE                        (4)
>
> -#define MAX_LCORES     (RTE_MAX_MEMZONE / (MAX_ITER_MULTI * 4U))
> +#define MAX_LCORES     (rte_memzone_max_get() / (MAX_ITER_MULTI * 4U))
>
>  static uint32_t obj_count;
>  static uint32_t synchro;
> diff --git a/app/test/test_malloc_perf.c b/app/test/test_malloc_perf.c
> index ccec43a..9bd1662 100644
> --- a/app/test/test_malloc_perf.c
> +++ b/app/test/test_malloc_perf.c
> @@ -165,7 +165,7 @@ test_malloc_perf(void)
>                 return -1;
>
>         if (test_alloc_perf("rte_memzone_reserve", memzone_alloc, memzone_free,
> -                       NULL, memset_us_gb, RTE_MAX_MEMZONE - 1) < 0)
> +                       NULL, memset_us_gb, rte_memzone_max_get() - 1) < 0)
>                 return -1;
>
>         return 0;
> diff --git a/app/test/test_memzone.c b/app/test/test_memzone.c
> index c9255e5..36b9790 100644
> --- a/app/test/test_memzone.c
> +++ b/app/test/test_memzone.c
> @@ -871,9 +871,18 @@ test_memzone_bounded(void)
>  static int
>  test_memzone_free(void)
>  {
> -       const struct rte_memzone *mz[RTE_MAX_MEMZONE + 1];
> +       size_t mz_size;
> +       const struct rte_memzone **mz;
>         int i;
>         char name[20];
> +       int rc = -1;
> +
> +       mz_size = (rte_memzone_max_get() + 1) * sizeof(struct rte_memzone *);
> +       mz = rte_zmalloc("memzone_test", mz_size, 0);
> +       if (!mz) {
> +               printf("Fail allocating memzone test array\n");
> +               return rc;
> +       }
>
>         mz[0] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone0"), 2000,
>                         SOCKET_ID_ANY, 0);
> @@ -881,42 +890,42 @@ test_memzone_free(void)
>                         SOCKET_ID_ANY, 0);
>
>         if (mz[0] > mz[1])
> -               return -1;
> +               goto exit_test;
>         if (!rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone0")))
> -               return -1;
> +               goto exit_test;
>         if (!rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone1")))
> -               return -1;
> +               goto exit_test;
>
>         if (rte_memzone_free(mz[0])) {
>                 printf("Fail memzone free - tempzone0\n");
> -               return -1;
> +               goto exit_test;
>         }
>         if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone0"))) {
>                 printf("Found previously free memzone - tempzone0\n");
> -               return -1;
> +               goto exit_test;
>         }
>         mz[2] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone2"), 2000,
>                         SOCKET_ID_ANY, 0);
>
>         if (mz[2] > mz[1]) {
>                 printf("tempzone2 should have gotten the free entry from tempzone0\n");
> -               return -1;
> +               goto exit_test;
>         }
>         if (rte_memzone_free(mz[2])) {
>                 printf("Fail memzone free - tempzone2\n");
> -               return -1;
> +               goto exit_test;
>         }
>         if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone2"))) {
>                 printf("Found previously free memzone - tempzone2\n");
> -               return -1;
> +               goto exit_test;
>         }
>         if (rte_memzone_free(mz[1])) {
>                 printf("Fail memzone free - tempzone1\n");
> -               return -1;
> +               goto exit_test;
>         }
>         if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone1"))) {
>                 printf("Found previously free memzone - tempzone1\n");
> -               return -1;
> +               goto exit_test;
>         }
>
>         i = 0;
> @@ -928,7 +937,7 @@ test_memzone_free(void)
>
>         if (rte_memzone_free(mz[0])) {
>                 printf("Fail memzone free - tempzone0\n");
> -               return -1;
> +               goto exit_test;
>         }
>         mz[0] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone0new"), 0,
>                         SOCKET_ID_ANY, 0);
> @@ -936,17 +945,21 @@ test_memzone_free(void)
>         if (mz[0] == NULL) {
>                 printf("Fail to create memzone - tempzone0new - when MAX memzones were "
>                                 "created and one was free\n");
> -               return -1;
> +               goto exit_test;
>         }
>
>         for (i = i - 2; i >= 0; i--) {
>                 if (rte_memzone_free(mz[i])) {
>                         printf("Fail memzone free - tempzone%d\n", i);
> -                       return -1;
> +                       goto exit_test;
>                 }
>         }
>
> -       return 0;
> +       rc = 0;
> +
> +exit_test:
> +       rte_free(mz);
> +       return rc;
>  }
>
>  static int test_memzones_left;
> diff --git a/config/rte_config.h b/config/rte_config.h
> index 7b8c85e..400e44e 100644
> --- a/config/rte_config.h
> +++ b/config/rte_config.h
> @@ -34,7 +34,6 @@
>  #define RTE_MAX_MEM_MB_PER_LIST 32768
>  #define RTE_MAX_MEMSEG_PER_TYPE 32768
>  #define RTE_MAX_MEM_MB_PER_TYPE 65536
> -#define RTE_MAX_MEMZONE 2560
>  #define RTE_MAX_TAILQ 32
>  #define RTE_LOG_DP_LEVEL RTE_LOG_INFO
>  #define RTE_MAX_VFIO_CONTAINERS 64
> diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
> index 2c59397..0458768 100644
> --- a/drivers/net/qede/base/bcm_osal.c
> +++ b/drivers/net/qede/base/bcm_osal.c
> @@ -47,10 +47,26 @@ void osal_poll_mode_dpc(osal_int_ptr_t hwfn_cookie)
>  }
>
>  /* Array of memzone pointers */
> -static const struct rte_memzone *ecore_mz_mapping[RTE_MAX_MEMZONE];
> +static const struct rte_memzone **ecore_mz_mapping;
>  /* Counter to track current memzone allocated */
>  static uint16_t ecore_mz_count;
>
> +int ecore_mz_mapping_alloc(void)
> +{
> +       ecore_mz_mapping = rte_zmalloc("ecore_mz_map",
> +               rte_memzone_max_get() * sizeof(struct rte_memzone *), 0);

I think we should allocate only for the first call and we are missing
some refcount.


> +
> +       if (!ecore_mz_mapping)
> +               return -ENOMEM;
> +
> +       return 0;
> +}
> +
> +void ecore_mz_mapping_free(void)
> +{
> +       rte_free(ecore_mz_mapping);

Won't we release this array while another qed device is still in use?


> +}
> +
>  unsigned long qede_log2_align(unsigned long n)
>  {
>         unsigned long ret = n ? 1 : 0;
> @@ -132,9 +148,9 @@ void *osal_dma_alloc_coherent(struct ecore_dev *p_dev,
>         uint32_t core_id = rte_lcore_id();
>         unsigned int socket_id;
>
> -       if (ecore_mz_count >= RTE_MAX_MEMZONE) {
> -               DP_ERR(p_dev, "Memzone allocation count exceeds %u\n",
> -                      RTE_MAX_MEMZONE);
> +       if (ecore_mz_count >= rte_memzone_max_get()) {
> +               DP_ERR(p_dev, "Memzone allocation count exceeds %zu\n",
> +                      rte_memzone_max_get());
>                 *phys = 0;
>                 return OSAL_NULL;
>         }
> @@ -171,9 +187,9 @@ void *osal_dma_alloc_coherent_aligned(struct ecore_dev *p_dev,
>         uint32_t core_id = rte_lcore_id();
>         unsigned int socket_id;
>
> -       if (ecore_mz_count >= RTE_MAX_MEMZONE) {
> -               DP_ERR(p_dev, "Memzone allocation count exceeds %u\n",
> -                      RTE_MAX_MEMZONE);
> +       if (ecore_mz_count >= rte_memzone_max_get()) {
> +               DP_ERR(p_dev, "Memzone allocation count exceeds %zu\n",
> +                      rte_memzone_max_get());
>                 *phys = 0;
>                 return OSAL_NULL;
>         }
> diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
> index 67e7f75..97e261d 100644
> --- a/drivers/net/qede/base/bcm_osal.h
> +++ b/drivers/net/qede/base/bcm_osal.h
> @@ -477,4 +477,7 @@ enum dbg_status     qed_dbg_alloc_user_data(struct ecore_hwfn *p_hwfn,
>         qed_dbg_alloc_user_data(p_hwfn, user_data_ptr)
>  #define OSAL_DB_REC_OCCURRED(p_hwfn) nothing
>
> +int ecore_mz_mapping_alloc(void);
> +void ecore_mz_mapping_free(void);
> +
>  #endif /* __BCM_OSAL_H */
> diff --git a/drivers/net/qede/qede_main.c b/drivers/net/qede/qede_main.c
> index 0303903..fd63262 100644
> --- a/drivers/net/qede/qede_main.c
> +++ b/drivers/net/qede/qede_main.c
> @@ -72,6 +72,12 @@ qed_probe(struct ecore_dev *edev, struct rte_pci_device *pci_dev,
>         hw_prepare_params.allow_mdump = false;
>         hw_prepare_params.b_en_pacing = false;
>         hw_prepare_params.epoch = OSAL_GET_EPOCH(ECORE_LEADING_HWFN(edev));
> +       rc = ecore_mz_mapping_alloc();
> +       if (rc) {
> +               DP_ERR(edev, "mem zones array allocation failed\n");
> +               return rc;
> +       }
> +
>         rc = ecore_hw_prepare(edev, &hw_prepare_params);
>         if (rc) {
>                 DP_ERR(edev, "hw prepare failed\n");
> @@ -722,6 +728,7 @@ static void qed_remove(struct ecore_dev *edev)
>                 return;
>
>         ecore_hw_remove(edev);
> +       ecore_mz_mapping_free();
>  }
>
>  static int qed_send_drv_state(struct ecore_dev *edev, bool active)
> diff --git a/lib/eal/common/eal_common_memzone.c b/lib/eal/common/eal_common_memzone.c
> index a9cd91f..f94330b 100644
> --- a/lib/eal/common/eal_common_memzone.c
> +++ b/lib/eal/common/eal_common_memzone.c
> @@ -22,6 +22,10 @@
>  #include "eal_private.h"
>  #include "eal_memcfg.h"
>
> +#define RTE_DEFAULT_MAX_MEMZONE 2560

No need for a RTE_ prefix for a local macro and ...


> +
> +static size_t memzone_max = RTE_DEFAULT_MAX_MEMZONE;

... in the end, I see no need for the RTE_DEFAULT_MAX_MEMZONE macro at
all, it is only used as the default init value, here.


> +
>  static inline const struct rte_memzone *
>  memzone_lookup_thread_unsafe(const char *name)
>  {
> @@ -81,8 +85,9 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
>         /* no more room in config */
>         if (arr->count >= arr->len) {
>                 RTE_LOG(ERR, EAL,
> -               "%s(): Number of requested memzone segments exceeds RTE_MAX_MEMZONE\n",
> -                       __func__);
> +               "%s(): Number of requested memzone segments exceeds max "
> +               "memzone segments (%d >= %d)\n",

Won't we always display this log for the case when arr->count == arr->len ?
It is then pointless to display both arr->count and arr->len (which
should be formatted as %u).

I would simply log:
RTE_LOG(ERR, EAL,
        "%s(): Number of requested memzone segments exceeds maximum %u\n",
        __func__, arr->len);


> +                       __func__, arr->count, arr->len);
>                 rte_errno = ENOSPC;
>                 return NULL;
>         }
> @@ -396,7 +401,7 @@ rte_eal_memzone_init(void)
>
>         if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
>                         rte_fbarray_init(&mcfg->memzones, "memzone",
> -                       RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) {
> +                       rte_memzone_max_get(), sizeof(struct rte_memzone))) {
>                 RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n");
>                 ret = -1;
>         } else if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
> @@ -430,3 +435,20 @@ void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
>         }
>         rte_rwlock_read_unlock(&mcfg->mlock);
>  }
> +
> +int
> +rte_memzone_max_set(size_t max)
> +{
> +       /* Setting max memzone must occur befaore calling rte_eal_init() */

before*

This comment would be better placed in the API description than in the
implementation.


> +       if (eal_get_internal_configuration()->init_complete > 0)
> +               return -1;
> +
> +       memzone_max = max;
> +       return 0;
> +}
> +
> +size_t
> +rte_memzone_max_get(void)
> +{
> +       return memzone_max;
> +}
> diff --git a/lib/eal/include/rte_memzone.h b/lib/eal/include/rte_memzone.h
> index 5302caa..3ff7c73 100644
> --- a/lib/eal/include/rte_memzone.h
> +++ b/lib/eal/include/rte_memzone.h
> @@ -305,6 +305,26 @@ void rte_memzone_dump(FILE *f);
>  void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *arg),
>                       void *arg);
>
> +/**

 * @warning
 * @b EXPERIMENTAL: this API may change without prior notice

> + * Set max memzone value
> + *
> + * @param max
> + *   Value of max memzone allocations

I'd rather describe as:
"Maximum number of memzones"

Please also mention that this function can only be called prior to
rte_eal_init().


> + * @return
> + *  0 on success, -1 otherwise
> + */
> +__rte_experimental
> +int rte_memzone_max_set(size_t max);
> +
> +/**

 * @warning
 * @b EXPERIMENTAL: this API may change without prior notice

> + * Get max memzone value

Get the maximum number of memzones.

And we can remind the developer, here, that this value won't change
after rte_eal_init.


> + *
> + * @return
> + *   Value of max memzone allocations
> + */
> +__rte_experimental
> +size_t rte_memzone_max_get(void);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/lib/eal/version.map b/lib/eal/version.map
> index 51a820d..b52a83c 100644
> --- a/lib/eal/version.map
> +++ b/lib/eal/version.map
> @@ -430,6 +430,10 @@ EXPERIMENTAL {
>         rte_thread_create_control;
>         rte_thread_set_name;
>         __rte_eal_trace_generic_blob;
> +
> +       # added in 23.07
> +       rte_memzone_max_set;
> +       rte_memzone_max_get;
>  };
>
>  INTERNAL {
> --
> 2.8.4
>


-- 
David Marchand


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V3] lib: set/get max memzone segments
  2023-05-03  7:26   ` [PATCH V3] " Ophir Munk
  2023-05-03 21:41     ` Morten Brørup
  2023-05-04  7:27     ` David Marchand
@ 2023-05-18 15:54     ` Burakov, Anatoly
  2023-05-25  6:43       ` Ophir Munk
  2023-05-24 22:25     ` [PATCH v4] " Ophir Munk
  3 siblings, 1 reply; 35+ messages in thread
From: Burakov, Anatoly @ 2023-05-18 15:54 UTC (permalink / raw)
  To: Ophir Munk, dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad
  Cc: Ophir Munk, Matan Azrad, Thomas Monjalon, Lior Margalit

Hi,

On 5/3/2023 8:26 AM, Ophir Munk wrote:
> In current DPDK the RTE_MAX_MEMZONE definition is unconditionally hard
> coded as 2560.  For applications requiring different values of this
> parameter – it is more convenient to set the max value via an rte API -
> rather than changing the dpdk source code per application.  In many
> organizations, the possibility to compile a private DPDK library for a
> particular application does not exist at all.  With this option there is
> no need to recompile DPDK and it allows using an in-box packaged DPDK.
> An example usage for updating the RTE_MAX_MEMZONE would be of an
> application that uses the DPDK mempool library which is based on DPDK
> memzone library.  The application may need to create a number of
> steering tables, each of which will require its own mempool allocation.
> This commit is not about how to optimize the application usage of
> mempool nor about how to improve the mempool implementation based on
> memzone.  It is about how to make the max memzone definition - run-time
> customized.
> This commit adds an API which must be called before rte_eal_init():
> rte_memzone_max_set(int max).  If not called, the default memzone
> (RTE_MAX_MEMZONE) is used.  There is also an API to query the effective
> max memzone: rte_memzone_max_get().

Commit message could use a little rewording and shortening. Suggested:

---

Currently, RTE_MAX_MEMZONE constant is used to decide how many memzones 
a DPDK application can have. This value could technically be changed by 
manually editing `rte_config.h` before compilation, but if DPDK is 
already compiled, that option is not useful. There are certain use cases 
that would benefit from making this value configurable.

This commit addresses the issue by adding a new API to set maximum 
number of memzones before EAL initialization (while using the old 
constant as default value), as well as an API to get current maximum 
number of memzones.

---

What do you think?


>   /* Array of memzone pointers */
> -static const struct rte_memzone *ecore_mz_mapping[RTE_MAX_MEMZONE];
> +static const struct rte_memzone **ecore_mz_mapping;
>   /* Counter to track current memzone allocated */
>   static uint16_t ecore_mz_count;
>   
> +int ecore_mz_mapping_alloc(void)
> +{
> +	ecore_mz_mapping = rte_zmalloc("ecore_mz_map",
> +		rte_memzone_max_get() * sizeof(struct rte_memzone *), 0);

Doesn't this need to check if it's already allocated? Does it need any 
special secondary process handling?

> +
> +	if (!ecore_mz_mapping)
> +		return -ENOMEM;
> +
> +	return 0;
> +}
> +
> +void ecore_mz_mapping_free(void)
> +{
> +	rte_free(ecore_mz_mapping);

Shouldn't this at least set the pointer to NULL to avoid double-free?

> +#define RTE_DEFAULT_MAX_MEMZONE 2560
> +
> +static size_t memzone_max = RTE_DEFAULT_MAX_MEMZONE;
> +
>   static inline const struct rte_memzone *
>   memzone_lookup_thread_unsafe(const char *name)
>   {
> @@ -81,8 +85,9 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
>   	/* no more room in config */
>   	if (arr->count >= arr->len) {
>   		RTE_LOG(ERR, EAL,
> -		"%s(): Number of requested memzone segments exceeds RTE_MAX_MEMZONE\n",
> -			__func__);
> +		"%s(): Number of requested memzone segments exceeds max "
> +		"memzone segments (%d >= %d)\n",

I think the "segments" terminology can be dropped, it is a holdover from 
the times when memzones were not allocated by malloc. The message can 
just say "Number of requested memzones exceeds maximum number of memzones".

> +			__func__, arr->count, arr->len);
>   		rte_errno = ENOSPC;
>   		return NULL;
>   	}
> @@ -396,7 +401,7 @@ rte_eal_memzone_init(void)
>   
>   	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
>   			rte_fbarray_init(&mcfg->memzones, "memzone",
> -			RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) {
> +			rte_memzone_max_get(), sizeof(struct rte_memzone))) {
>   		RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n");
>   		ret = -1;
>   	} else if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
> @@ -430,3 +435,20 @@ void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
>   	}
>   	rte_rwlock_read_unlock(&mcfg->mlock);
>   }
> +
> +int
> +rte_memzone_max_set(size_t max)
> +{
> +	/* Setting max memzone must occur befaore calling rte_eal_init() */
> +	if (eal_get_internal_configuration()->init_complete > 0)
> +		return -1;
> +
> +	memzone_max = max;
> +	return 0;
> +}
> +
> +size_t
> +rte_memzone_max_get(void)
> +{
> +	return memzone_max;
> +}

It seems that this is a local (static) value, which means it is not 
shared between processes, and thus could potentially mismatch between 
two different processes. While this _technically_ would not be a problem 
because secondary process init will not actually use this value, but the 
API will still return incorrect information.

I suggest updating/syncing this value somewhere in 
`eal_mcfg_update_internal()/eal_mcfg_update_from_internal()`, and adding 
this value to mem config. An alternative to that would be to just return 
`mem_config->memzones.count` (instead of the value itself), and return 
-1 (or zero?) if init hasn't yet completed.

-- 
Thanks,
Anatoly


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v4] lib: set/get max memzone segments
  2023-05-03  7:26   ` [PATCH V3] " Ophir Munk
                       ` (2 preceding siblings ...)
  2023-05-18 15:54     ` Burakov, Anatoly
@ 2023-05-24 22:25     ` Ophir Munk
  2023-05-25 14:53       ` Burakov, Anatoly
                         ` (3 more replies)
  3 siblings, 4 replies; 35+ messages in thread
From: Ophir Munk @ 2023-05-24 22:25 UTC (permalink / raw)
  To: dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad
  Cc: Ophir Munk, Matan Azrad, Thomas Monjalon, Lior Margalit

Currently, the max memzones count constat (RTE_MAX_MEMZONE) is used to
decide how many memzones a DPDK application can have. This value could
technically be changed by manually editing `rte_config.h` before
compilation, but if DPDK is already compiled, that option is not useful.
There are certain use cases that would benefit from making this value
configurable.

This commit addresses the issue by adding a new API to set the max
number of memzones before EAL initialization (while using the old
constant as default value), as well as an API to get current maximum
number of memzones.

Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 app/test/test_func_reentrancy.c     |  2 +-
 app/test/test_malloc_perf.c         |  2 +-
 app/test/test_memzone.c             | 42 ++++++++++++++++++++++++-------------
 config/rte_config.h                 |  1 -
 drivers/net/qede/base/bcm_osal.c    | 38 ++++++++++++++++++++++++++-------
 drivers/net/qede/base/bcm_osal.h    |  3 +++
 drivers/net/qede/qede_main.c        |  7 +++++++
 lib/eal/common/eal_common_memzone.c | 38 ++++++++++++++++++++++++++++++---
 lib/eal/common/eal_memcfg.h         |  2 ++
 lib/eal/include/rte_memzone.h       | 30 ++++++++++++++++++++++++++
 lib/eal/version.map                 |  4 ++++
 11 files changed, 141 insertions(+), 28 deletions(-)

diff --git a/app/test/test_func_reentrancy.c b/app/test/test_func_reentrancy.c
index d1ed5d4..ae9de6f 100644
--- a/app/test/test_func_reentrancy.c
+++ b/app/test/test_func_reentrancy.c
@@ -51,7 +51,7 @@ typedef void (*case_clean_t)(unsigned lcore_id);
 #define MEMPOOL_ELT_SIZE                    (sizeof(uint32_t))
 #define MEMPOOL_SIZE                        (4)
 
-#define MAX_LCORES	(RTE_MAX_MEMZONE / (MAX_ITER_MULTI * 4U))
+#define MAX_LCORES	(rte_memzone_max_get() / (MAX_ITER_MULTI * 4U))
 
 static uint32_t obj_count;
 static uint32_t synchro;
diff --git a/app/test/test_malloc_perf.c b/app/test/test_malloc_perf.c
index ccec43a..9bd1662 100644
--- a/app/test/test_malloc_perf.c
+++ b/app/test/test_malloc_perf.c
@@ -165,7 +165,7 @@ test_malloc_perf(void)
 		return -1;
 
 	if (test_alloc_perf("rte_memzone_reserve", memzone_alloc, memzone_free,
-			NULL, memset_us_gb, RTE_MAX_MEMZONE - 1) < 0)
+			NULL, memset_us_gb, rte_memzone_max_get() - 1) < 0)
 		return -1;
 
 	return 0;
diff --git a/app/test/test_memzone.c b/app/test/test_memzone.c
index c9255e5..f10f4fd 100644
--- a/app/test/test_memzone.c
+++ b/app/test/test_memzone.c
@@ -871,9 +871,17 @@ test_memzone_bounded(void)
 static int
 test_memzone_free(void)
 {
-	const struct rte_memzone *mz[RTE_MAX_MEMZONE + 1];
+	const struct rte_memzone **mz;
 	int i;
 	char name[20];
+	int rc = -1;
+
+	mz = rte_calloc("memzone_test", rte_memzone_max_get() + 1,
+			sizeof(struct rte_memzone *), 0);
+	if (!mz) {
+		printf("Fail allocating memzone test array\n");
+		return rc;
+	}
 
 	mz[0] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone0"), 2000,
 			SOCKET_ID_ANY, 0);
@@ -881,42 +889,42 @@ test_memzone_free(void)
 			SOCKET_ID_ANY, 0);
 
 	if (mz[0] > mz[1])
-		return -1;
+		goto exit_test;
 	if (!rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone0")))
-		return -1;
+		goto exit_test;
 	if (!rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone1")))
-		return -1;
+		goto exit_test;
 
 	if (rte_memzone_free(mz[0])) {
 		printf("Fail memzone free - tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone0"))) {
 		printf("Found previously free memzone - tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	mz[2] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone2"), 2000,
 			SOCKET_ID_ANY, 0);
 
 	if (mz[2] > mz[1]) {
 		printf("tempzone2 should have gotten the free entry from tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_free(mz[2])) {
 		printf("Fail memzone free - tempzone2\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone2"))) {
 		printf("Found previously free memzone - tempzone2\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_free(mz[1])) {
 		printf("Fail memzone free - tempzone1\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone1"))) {
 		printf("Found previously free memzone - tempzone1\n");
-		return -1;
+		goto exit_test;
 	}
 
 	i = 0;
@@ -928,7 +936,7 @@ test_memzone_free(void)
 
 	if (rte_memzone_free(mz[0])) {
 		printf("Fail memzone free - tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	mz[0] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone0new"), 0,
 			SOCKET_ID_ANY, 0);
@@ -936,17 +944,21 @@ test_memzone_free(void)
 	if (mz[0] == NULL) {
 		printf("Fail to create memzone - tempzone0new - when MAX memzones were "
 				"created and one was free\n");
-		return -1;
+		goto exit_test;
 	}
 
 	for (i = i - 2; i >= 0; i--) {
 		if (rte_memzone_free(mz[i])) {
 			printf("Fail memzone free - tempzone%d\n", i);
-			return -1;
+			goto exit_test;
 		}
 	}
 
-	return 0;
+	rc = 0;
+
+exit_test:
+	rte_free(mz);
+	return rc;
 }
 
 static int test_memzones_left;
diff --git a/config/rte_config.h b/config/rte_config.h
index 7b8c85e..400e44e 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -34,7 +34,6 @@
 #define RTE_MAX_MEM_MB_PER_LIST 32768
 #define RTE_MAX_MEMSEG_PER_TYPE 32768
 #define RTE_MAX_MEM_MB_PER_TYPE 65536
-#define RTE_MAX_MEMZONE 2560
 #define RTE_MAX_TAILQ 32
 #define RTE_LOG_DP_LEVEL RTE_LOG_INFO
 #define RTE_MAX_VFIO_CONTAINERS 64
diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
index 2c59397..b5249c6 100644
--- a/drivers/net/qede/base/bcm_osal.c
+++ b/drivers/net/qede/base/bcm_osal.c
@@ -47,10 +47,34 @@ void osal_poll_mode_dpc(osal_int_ptr_t hwfn_cookie)
 }
 
 /* Array of memzone pointers */
-static const struct rte_memzone *ecore_mz_mapping[RTE_MAX_MEMZONE];
+static const struct rte_memzone **ecore_mz_mapping = NULL;
 /* Counter to track current memzone allocated */
 static uint16_t ecore_mz_count;
 
+static uint32_t ref_cnt = 0;
+
+int ecore_mz_mapping_alloc(void)
+{
+	if (__atomic_fetch_add(&ref_cnt, 1, __ATOMIC_RELAXED) == 0) {
+		ecore_mz_mapping = rte_calloc("ecore_mz_map",
+				rte_memzone_max_get(), sizeof(struct rte_memzone *), 0);
+	}
+
+	if (!ecore_mz_mapping)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void ecore_mz_mapping_free(void)
+{
+	if (__atomic_fetch_sub(&ref_cnt, 1, __ATOMIC_RELAXED) - 1 == 0) {
+		if (ecore_mz_mapping)
+			rte_free(ecore_mz_mapping);
+		ecore_mz_mapping = NULL;
+	}
+}
+
 unsigned long qede_log2_align(unsigned long n)
 {
 	unsigned long ret = n ? 1 : 0;
@@ -132,9 +156,9 @@ void *osal_dma_alloc_coherent(struct ecore_dev *p_dev,
 	uint32_t core_id = rte_lcore_id();
 	unsigned int socket_id;
 
-	if (ecore_mz_count >= RTE_MAX_MEMZONE) {
-		DP_ERR(p_dev, "Memzone allocation count exceeds %u\n",
-		       RTE_MAX_MEMZONE);
+	if (ecore_mz_count >= rte_memzone_max_get()) {
+		DP_ERR(p_dev, "Memzone allocation count exceeds %zu\n",
+		       rte_memzone_max_get());
 		*phys = 0;
 		return OSAL_NULL;
 	}
@@ -171,9 +195,9 @@ void *osal_dma_alloc_coherent_aligned(struct ecore_dev *p_dev,
 	uint32_t core_id = rte_lcore_id();
 	unsigned int socket_id;
 
-	if (ecore_mz_count >= RTE_MAX_MEMZONE) {
-		DP_ERR(p_dev, "Memzone allocation count exceeds %u\n",
-		       RTE_MAX_MEMZONE);
+	if (ecore_mz_count >= rte_memzone_max_get()) {
+		DP_ERR(p_dev, "Memzone allocation count exceeds %zu\n",
+		       rte_memzone_max_get());
 		*phys = 0;
 		return OSAL_NULL;
 	}
diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 67e7f75..97e261d 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -477,4 +477,7 @@ enum dbg_status	qed_dbg_alloc_user_data(struct ecore_hwfn *p_hwfn,
 	qed_dbg_alloc_user_data(p_hwfn, user_data_ptr)
 #define OSAL_DB_REC_OCCURRED(p_hwfn) nothing
 
+int ecore_mz_mapping_alloc(void);
+void ecore_mz_mapping_free(void);
+
 #endif /* __BCM_OSAL_H */
diff --git a/drivers/net/qede/qede_main.c b/drivers/net/qede/qede_main.c
index 0303903..fd63262 100644
--- a/drivers/net/qede/qede_main.c
+++ b/drivers/net/qede/qede_main.c
@@ -72,6 +72,12 @@ qed_probe(struct ecore_dev *edev, struct rte_pci_device *pci_dev,
 	hw_prepare_params.allow_mdump = false;
 	hw_prepare_params.b_en_pacing = false;
 	hw_prepare_params.epoch = OSAL_GET_EPOCH(ECORE_LEADING_HWFN(edev));
+	rc = ecore_mz_mapping_alloc();
+	if (rc) {
+		DP_ERR(edev, "mem zones array allocation failed\n");
+		return rc;
+	}
+
 	rc = ecore_hw_prepare(edev, &hw_prepare_params);
 	if (rc) {
 		DP_ERR(edev, "hw prepare failed\n");
@@ -722,6 +728,7 @@ static void qed_remove(struct ecore_dev *edev)
 		return;
 
 	ecore_hw_remove(edev);
+	ecore_mz_mapping_free();
 }
 
 static int qed_send_drv_state(struct ecore_dev *edev, bool active)
diff --git a/lib/eal/common/eal_common_memzone.c b/lib/eal/common/eal_common_memzone.c
index a9cd91f..77994b8 100644
--- a/lib/eal/common/eal_common_memzone.c
+++ b/lib/eal/common/eal_common_memzone.c
@@ -22,6 +22,8 @@
 #include "eal_private.h"
 #include "eal_memcfg.h"
 
+#define DEFAULT_MAX_MEMZONE 2560
+
 static inline const struct rte_memzone *
 memzone_lookup_thread_unsafe(const char *name)
 {
@@ -81,8 +83,9 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 	/* no more room in config */
 	if (arr->count >= arr->len) {
 		RTE_LOG(ERR, EAL,
-		"%s(): Number of requested memzone segments exceeds RTE_MAX_MEMZONE\n",
-			__func__);
+		"%s(): Number of requested memzone segments exceeds "
+		"maximum %u\n", __func__, arr->len);
+
 		rte_errno = ENOSPC;
 		return NULL;
 	}
@@ -396,7 +399,7 @@ rte_eal_memzone_init(void)
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
 			rte_fbarray_init(&mcfg->memzones, "memzone",
-			RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) {
+			rte_memzone_max_get(), sizeof(struct rte_memzone))) {
 		RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n");
 		ret = -1;
 	} else if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
@@ -430,3 +433,32 @@ void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
 	}
 	rte_rwlock_read_unlock(&mcfg->mlock);
 }
+
+int
+rte_memzone_max_set(size_t max)
+{
+	struct rte_mem_config *mcfg;
+
+	if (eal_get_internal_configuration()->init_complete > 0)
+		return -1;
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+	if (!mcfg)
+		return -1;
+
+	mcfg->max_memzone = max;
+
+	return 0;
+}
+
+size_t
+rte_memzone_max_get(void)
+{
+	struct rte_mem_config *mcfg;
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+	if (!mcfg || !mcfg->max_memzone)
+		return DEFAULT_MAX_MEMZONE;
+
+	return mcfg->max_memzone;
+}
diff --git a/lib/eal/common/eal_memcfg.h b/lib/eal/common/eal_memcfg.h
index ea013a9..183bb25 100644
--- a/lib/eal/common/eal_memcfg.h
+++ b/lib/eal/common/eal_memcfg.h
@@ -75,6 +75,8 @@ struct rte_mem_config {
 	/**< TSC rate */
 
 	uint8_t dma_maskbits; /**< Keeps the more restricted dma mask. */
+
+	size_t max_memzone; /**< maximum allowed allocated memzones. */
 };
 
 /* update internal config from shared mem config */
diff --git a/lib/eal/include/rte_memzone.h b/lib/eal/include/rte_memzone.h
index 5302caa..13de341 100644
--- a/lib/eal/include/rte_memzone.h
+++ b/lib/eal/include/rte_memzone.h
@@ -305,6 +305,36 @@ void rte_memzone_dump(FILE *f);
 void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *arg),
 		      void *arg);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Set max memzone value
+ *
+ * This function can only be called prior to rte_eal_init().
+ *
+ * @param max
+ *   Maximum number of memzones
+ * @return
+ *  0 on success, -1 otherwise
+ */
+__rte_experimental
+int rte_memzone_max_set(size_t max);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Get the maximum number of memzones.
+ *
+ * @note: The maximum value will not change after calling rte_eal_init().
+ *
+ * @return
+ *   Maximum number of memzones
+ */
+__rte_experimental
+size_t rte_memzone_max_get(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/version.map b/lib/eal/version.map
index 51a820d..b52a83c 100644
--- a/lib/eal/version.map
+++ b/lib/eal/version.map
@@ -430,6 +430,10 @@ EXPERIMENTAL {
 	rte_thread_create_control;
 	rte_thread_set_name;
 	__rte_eal_trace_generic_blob;
+
+	# added in 23.07
+	rte_memzone_max_set;
+	rte_memzone_max_get;
 };
 
 INTERNAL {
-- 
2.8.4


^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH V3] lib: set/get max memzone segments
  2023-05-04  7:27     ` David Marchand
@ 2023-05-25  6:35       ` Ophir Munk
  0 siblings, 0 replies; 35+ messages in thread
From: Ophir Munk @ 2023-05-25  6:35 UTC (permalink / raw)
  To: David Marchand
  Cc: dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad,
	Matan Azrad, NBU-Contact-Thomas Monjalon (EXTERNAL),
	Lior Margalit

Hello David,
Thank you for your review. I noticed a review by Anatoly Burakov that addresses similar points to yours. In V4 I supply a reply to you both.

> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Thursday, 4 May 2023 10:27
> To: Ophir Munk <ophirmu@nvidia.com>
> Cc: dev@dpdk.org; Bruce Richardson <bruce.richardson@intel.com>; Devendra
> Singh Rawat <dsinghrawat@marvell.com>; Alok Prasad <palok@marvell.com>;
> Ophir Munk <ophirmu@mellanox.com>; Matan Azrad <matan@nvidia.com>;
> NBU-Contact-Thomas Monjalon (EXTERNAL) <thomas@monjalon.net>; Lior
> Margalit <lmargalit@nvidia.com>
> Subject: Re: [PATCH V3] lib: set/get max memzone segments
> 
> Hello Ophir,
> 
> Good thing someone is looking into this.
> Thanks.
> 
> I have a few comments.
> 
> 
> This commitlog is a bit compact.
> Separating it with some empty lines would help digest it.
> 
> 
> 
> The code was using a rather short name "RTE_MAX_MEMZONE".
> But I prefer we spell this as "max memzones count" (or a better wording), in
> the descriptions/comments.
> 
> 

Ack. Commit message updated.

> > This commit adds an API which must be called before rte_eal_init():
> > rte_memzone_max_set(int max).  If not called, the default memzone
> 
> Afaics, this prototype got unaligned with the patch content, as a size_t is now
> taken as input.
> You can simply mention rte_memzone_max_set().
> 
> 

Ack

> > (RTE_MAX_MEMZONE) is used.  There is also an API to query the
> > effective
> 
> After the patch RTE_MAX_MEMZONE does not exist anymore.
> 

Ack

> 
> > max memzone: rte_memzone_max_get().
> >
> > Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
> 
> 
> A global comment on the patch:
> 
> rte_calloc provides what you want in all cases below: an array of objects.
> I prefer rte_calloc(count, sizeof elem) to rte_zmalloc(count * sizeof elem).
> 
> This also avoids a temporary variable to compute the total size of such an
> array.
> 

Ack

> >
> >  /* Array of memzone pointers */
> > -static const struct rte_memzone
> *ecore_mz_mapping[RTE_MAX_MEMZONE];
> > +static const struct rte_memzone **ecore_mz_mapping;
> >  /* Counter to track current memzone allocated */  static uint16_t
> > ecore_mz_count;
> >
> > +int ecore_mz_mapping_alloc(void)
> > +{
> > +       ecore_mz_mapping = rte_zmalloc("ecore_mz_map",
> > +               rte_memzone_max_get() * sizeof(struct rte_memzone *),
> > +0);
> 
> I think we should allocate only for the first call and we are missing some
> refcount.
> 
> 

Ack. This allocation occurs as part of qed_probe. I added a ref count.

> > +
> > +       if (!ecore_mz_mapping)
> > +               return -ENOMEM;
> > +
> > +       return 0;
> > +}
> > +
> > +void ecore_mz_mapping_free(void)
> > +{
> > +       rte_free(ecore_mz_mapping);
> 
> Won't we release this array while another qed device is still in use?
> 
> 

Handled with the ref count.

> >
> > +#define RTE_DEFAULT_MAX_MEMZONE 2560
> 
> No need for a RTE_ prefix for a local macro and ...
> 

Ack

> 
> > +
> > +static size_t memzone_max = RTE_DEFAULT_MAX_MEMZONE;
> 
> ... in the end, I see no need for the RTE_DEFAULT_MAX_MEMZONE macro at
> all, it is only used as the default init value, here.
> 
> 

I prefer leaving the macro as it explains the value context.

> > +
> >  static inline const struct rte_memzone *
> > memzone_lookup_thread_unsafe(const char *name)  { @@ -81,8 +85,9 @@
> > memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
> >         /* no more room in config */
> >         if (arr->count >= arr->len) {
> >                 RTE_LOG(ERR, EAL,
> > -               "%s(): Number of requested memzone segments exceeds
> RTE_MAX_MEMZONE\n",
> > -                       __func__);
> > +               "%s(): Number of requested memzone segments exceeds max "
> > +               "memzone segments (%d >= %d)\n",
> 
> Won't we always display this log for the case when arr->count == arr->len ?
> It is then pointless to display both arr->count and arr->len (which should be
> formatted as %u).
> 
> I would simply log:
> RTE_LOG(ERR, EAL,
>         "%s(): Number of requested memzone segments exceeds maximum
> %u\n",
>         __func__, arr->len);
> 

Ack

> 
> > +{
> > +       /* Setting max memzone must occur befaore calling
> > +rte_eal_init() */
> 
> before*
> 

Ack. Thanks.

> This comment would be better placed in the API description than in the
> implementation.
> 

Ack

> 
> > +       if (eal_get_internal_configuration()->init_complete > 0)
> > +               return -1;
> > +
> > diff --git a/lib/eal/include/rte_memzone.h
> > b/lib/eal/include/rte_memzone.h index 5302caa..3ff7c73 100644
> > --- a/lib/eal/include/rte_memzone.h
> > +++ b/lib/eal/include/rte_memzone.h
> > @@ -305,6 +305,26 @@ void rte_memzone_dump(FILE *f);  void
> > rte_memzone_walk(void (*func)(const struct rte_memzone *, void *arg),
> >                       void *arg);
> >
> > +/**
> 
>  * @warning
>  * @b EXPERIMENTAL: this API may change without prior notice
> 

Ack

> > + * Set max memzone value
> > + *
> > + * @param max
> > + *   Value of max memzone allocations
> 
> I'd rather describe as:
> "Maximum number of memzones"
> 
> Please also mention that this function can only be called prior to rte_eal_init().
> 
> 

Ack

> > + * @return
> > + *  0 on success, -1 otherwise
> > + */
> > +__rte_experimental
> > +int rte_memzone_max_set(size_t max);
> > +
> > +/**
> 
>  * @warning
>  * @b EXPERIMENTAL: this API may change without prior notice
> 

Ack

> > + * Get max memzone value
> 
> Get the maximum number of memzones.
> 
> And we can remind the developer, here, that this value won't change after
> rte_eal_init.
> 
> 

Ack

> >  };
> >
> >  INTERNAL {
> > --
> > 2.8.4
> >
> 
> 
> --
> David Marchand


^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH V3] lib: set/get max memzone segments
  2023-05-18 15:54     ` Burakov, Anatoly
@ 2023-05-25  6:43       ` Ophir Munk
  0 siblings, 0 replies; 35+ messages in thread
From: Ophir Munk @ 2023-05-25  6:43 UTC (permalink / raw)
  To: Burakov, Anatoly, dev, Bruce Richardson, Devendra Singh Rawat,
	Alok Prasad
  Cc: Matan Azrad, NBU-Contact-Thomas Monjalon (EXTERNAL), Lior Margalit

Hi Anatoly,
Thank you for your review. I noticed a review by David Marchand that addresses similar points to yours. In V4 I supply a reply to you both.

> 
> Commit message could use a little rewording and shortening. Suggested:
> 
> ---
> 
> Currently, RTE_MAX_MEMZONE constant is used to decide how many
> memzones a DPDK application can have. This value could technically be
> changed by manually editing `rte_config.h` before compilation, but if DPDK is
> already compiled, that option is not useful. There are certain use cases that
> would benefit from making this value configurable.
> 
> This commit addresses the issue by adding a new API to set maximum number
> of memzones before EAL initialization (while using the old constant as default
> value), as well as an API to get current maximum number of memzones.
> 
> ---
> 
> What do you think?
> 

Ack.

> 
> >   /* Array of memzone pointers */
> > -static const struct rte_memzone
> *ecore_mz_mapping[RTE_MAX_MEMZONE];
> > +static const struct rte_memzone **ecore_mz_mapping;
> >   /* Counter to track current memzone allocated */
> >   static uint16_t ecore_mz_count;
> >
> > +int ecore_mz_mapping_alloc(void)
> > +{
> > +	ecore_mz_mapping = rte_zmalloc("ecore_mz_map",
> > +		rte_memzone_max_get() * sizeof(struct rte_memzone *), 0);
> 
> Doesn't this need to check if it's already allocated? 

Ack. Issue is addressed with a ref count.

> Does it need any special secondary process handling?
> 

No. 

> > +
> > +	if (!ecore_mz_mapping)
> > +		return -ENOMEM;
> > +
> > +	return 0;
> > +}
> > +
> > +void ecore_mz_mapping_free(void)
> > +{
> > +	rte_free(ecore_mz_mapping);
> 
> Shouldn't this at least set the pointer to NULL to avoid double-free?
> 

Ack

> > +#define RTE_DEFAULT_MAX_MEMZONE 2560
> > +
> > +static size_t memzone_max = RTE_DEFAULT_MAX_MEMZONE;
> > +
> >   static inline const struct rte_memzone *
> >   memzone_lookup_thread_unsafe(const char *name)
> >   {
> > @@ -81,8 +85,9 @@ memzone_reserve_aligned_thread_unsafe(const char
> *name, size_t len,
> >   	/* no more room in config */
> >   	if (arr->count >= arr->len) {
> >   		RTE_LOG(ERR, EAL,
> > -		"%s(): Number of requested memzone segments exceeds
> RTE_MAX_MEMZONE\n",
> > -			__func__);
> > +		"%s(): Number of requested memzone segments exceeds max
> "
> > +		"memzone segments (%d >= %d)\n",
> 
> I think the "segments" terminology can be dropped, it is a holdover from the
> times when memzones were not allocated by malloc. The message can just say
> "Number of requested memzones exceeds maximum number of memzones".
> 

Ack

> > +rte_memzone_max_set(size_t max)
> > +{
> > +	/* Setting max memzone must occur befaore calling rte_eal_init() */
> > +	if (eal_get_internal_configuration()->init_complete > 0)
> > +		return -1;
> > +
> > +	memzone_max = max;
> > +	return 0;
> > +}
> > +
> > +size_t
> > +rte_memzone_max_get(void)
> > +{
> > +	return memzone_max;
> > +}
> 
> It seems that this is a local (static) value, which means it is not shared between
> processes, and thus could potentially mismatch between two different
> processes. While this _technically_ would not be a problem because secondary
> process init will not actually use this value, but the API will still return incorrect
> information.
> 
> I suggest updating/syncing this value somewhere in
> `eal_mcfg_update_internal()/eal_mcfg_update_from_internal()`, and adding
> this value to mem config. An alternative to that would be to just return
> `mem_config->memzones.count` (instead of the value itself), and return
> -1 (or zero?) if init hasn't yet completed.
> 

Static variable removed.
I opted the second alternative, but if init hasn't yet completed the return value is the default (2560) rather than -1 or 0.

> --
> Thanks,
> Anatoly


^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH V3] lib: set/get max memzone segments
  2023-05-03 21:41     ` Morten Brørup
@ 2023-05-25  6:47       ` Ophir Munk
  0 siblings, 0 replies; 35+ messages in thread
From: Ophir Munk @ 2023-05-25  6:47 UTC (permalink / raw)
  To: Morten Brørup, dev, Bruce Richardson, Devendra Singh Rawat,
	Alok Prasad
  Cc: Matan Azrad, NBU-Contact-Thomas Monjalon (EXTERNAL), Lior Margalit

Hi Morten,

> 
> I retracted my objection to the RFC, but should also have added:
> 
> Acked-by: Morten Brørup <mb@smartsharesystems.com>

Ack. 


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v4] lib: set/get max memzone segments
  2023-05-24 22:25     ` [PATCH v4] " Ophir Munk
@ 2023-05-25 14:53       ` Burakov, Anatoly
  2023-05-30 11:37         ` Ophir Munk
  2023-05-26  9:55       ` David Marchand
                         ` (2 subsequent siblings)
  3 siblings, 1 reply; 35+ messages in thread
From: Burakov, Anatoly @ 2023-05-25 14:53 UTC (permalink / raw)
  To: Ophir Munk, dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad
  Cc: Ophir Munk, Matan Azrad, Thomas Monjalon, Lior Margalit

On 5/24/2023 11:25 PM, Ophir Munk wrote:
> Currently, the max memzones count constat (RTE_MAX_MEMZONE) is used to
> decide how many memzones a DPDK application can have. This value could
> technically be changed by manually editing `rte_config.h` before
> compilation, but if DPDK is already compiled, that option is not useful.
> There are certain use cases that would benefit from making this value
> configurable.
> 
> This commit addresses the issue by adding a new API to set the max
> number of memzones before EAL initialization (while using the old
> constant as default value), as well as an API to get current maximum
> number of memzones.
> 
> Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> ---


> +
> +int
> +rte_memzone_max_set(size_t max)
> +{
> +	struct rte_mem_config *mcfg;
> +
> +	if (eal_get_internal_configuration()->init_complete > 0)
> +		return -1;
> +
> +	mcfg = rte_eal_get_configuration()->mem_config;
> +	if (!mcfg)
> +		return -1;
> +
> +	mcfg->max_memzone = max;
> +
> +	return 0;
> +}

Would this even work? AFAIR mem_config is only available some time 
during EAL init, not before (mem_config pointer will be NULL at that point).

I suggest the following flow:

set():

if init_complete => return -1
else => set local static value

get():

if init_complete => return memzones.count
else => return local static value (set to our default)

That way we don't actually need the memconfig, and multiprocess will 
work because memzones.count is shared between primary and secondary anyway.

-- 
Thanks,
Anatoly


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v4] lib: set/get max memzone segments
  2023-05-24 22:25     ` [PATCH v4] " Ophir Munk
  2023-05-25 14:53       ` Burakov, Anatoly
@ 2023-05-26  9:55       ` David Marchand
  2023-05-28 12:09         ` [EXT] " Alok Prasad
  2023-05-30 13:32       ` Thomas Monjalon
  2023-05-31  7:52       ` [PATCH V5] " Ophir Munk
  3 siblings, 1 reply; 35+ messages in thread
From: David Marchand @ 2023-05-26  9:55 UTC (permalink / raw)
  To: Devendra Singh Rawat, Alok Prasad
  Cc: dev, Bruce Richardson, Ophir Munk, Matan Azrad, Thomas Monjalon,
	Lior Margalit

On Thu, May 25, 2023 at 12:26 AM Ophir Munk <ophirmu@nvidia.com> wrote:
>
> Currently, the max memzones count constat (RTE_MAX_MEMZONE) is used to
> decide how many memzones a DPDK application can have. This value could
> technically be changed by manually editing `rte_config.h` before
> compilation, but if DPDK is already compiled, that option is not useful.
> There are certain use cases that would benefit from making this value
> configurable.
>
> This commit addresses the issue by adding a new API to set the max
> number of memzones before EAL initialization (while using the old
> constant as default value), as well as an API to get current maximum
> number of memzones.
>
> Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>

qede maintainers, can you double check the change on your driver please?
Thanks.


-- 
David Marchand


^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [EXT] Re: [PATCH v4] lib: set/get max memzone segments
  2023-05-26  9:55       ` David Marchand
@ 2023-05-28 12:09         ` Alok Prasad
  0 siblings, 0 replies; 35+ messages in thread
From: Alok Prasad @ 2023-05-28 12:09 UTC (permalink / raw)
  To: David Marchand, Devendra Singh Rawat
  Cc: dev, Bruce Richardson, Ophir Munk, Matan Azrad, Thomas Monjalon,
	Lior Margalit



> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: 26 May 2023 15:25
> To: Devendra Singh Rawat <dsinghrawat@marvell.com>; Alok Prasad <palok@marvell.com>
> Cc: dev@dpdk.org; Bruce Richardson <bruce.richardson@intel.com>; Ophir Munk <ophirmu@nvidia.com>; Matan Azrad
> <matan@nvidia.com>; Thomas Monjalon <thomas@monjalon.net>; Lior Margalit <lmargalit@nvidia.com>
> Subject: [EXT] Re: [PATCH v4] lib: set/get max memzone segments
> 
> External Email
> 
> ----------------------------------------------------------------------
> On Thu, May 25, 2023 at 12:26 AM Ophir Munk <ophirmu@nvidia.com> wrote:
> >
> > Currently, the max memzones count constat (RTE_MAX_MEMZONE) is used to
> > decide how many memzones a DPDK application can have. This value could
> > technically be changed by manually editing `rte_config.h` before
> > compilation, but if DPDK is already compiled, that option is not useful.
> > There are certain use cases that would benefit from making this value
> > configurable.
> >
> > This commit addresses the issue by adding a new API to set the max
> > number of memzones before EAL initialization (while using the old
> > constant as default value), as well as an API to get current maximum
> > number of memzones.
> >
> > Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
> > Acked-by: Morten Brørup <mb@smartsharesystems.com>
> 
> qede maintainers, can you double check the change on your driver please?
> Thanks.
> 
> 
> --
> David Marchand

Acked-by: Alok Prasad <palok@marvell.com>

^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH v4] lib: set/get max memzone segments
  2023-05-25 14:53       ` Burakov, Anatoly
@ 2023-05-30 11:37         ` Ophir Munk
  0 siblings, 0 replies; 35+ messages in thread
From: Ophir Munk @ 2023-05-30 11:37 UTC (permalink / raw)
  To: Burakov, Anatoly, dev, Bruce Richardson, Devendra Singh Rawat,
	Alok Prasad
  Cc: Matan Azrad, NBU-Contact-Thomas Monjalon (EXTERNAL),
	Lior Margalit, Asaf Penso, Ophir Munk



> -----Original Message-----
> Subject: Re: [PATCH v4] lib: set/get max memzone segments
> 
> On 5/24/2023 11:25 PM, Ophir Munk wrote:
> > Currently, the max memzones count constat (RTE_MAX_MEMZONE) is used
> to
> > decide how many memzones a DPDK application can have. This value could
> > technically be changed by manually editing `rte_config.h` before
> > compilation, but if DPDK is already compiled, that option is not useful.
> > There are certain use cases that would benefit from making this value
> > configurable.
> >
> > This commit addresses the issue by adding a new API to set the max
> > number of memzones before EAL initialization (while using the old
> > constant as default value), as well as an API to get current maximum
> > number of memzones.
> >
> > Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
> > Acked-by: Morten Brørup <mb@smartsharesystems.com>
> > ---
> 
> > +
> > +int
> > +rte_memzone_max_set(size_t max)
> > +{
> > +	struct rte_mem_config *mcfg;
> > +
> > +	if (eal_get_internal_configuration()->init_complete > 0)
> > +		return -1;
> > +
> > +	mcfg = rte_eal_get_configuration()->mem_config;
> > +	if (!mcfg)
> > +		return -1;
> > +
> > +	mcfg->max_memzone = max;
> > +
> > +	return 0;
> > +}
> 
> Would this even work? 

Yes. It's working. 
I successfully ran the following test:

int max = rte_memzone_max_get(); 
printf("Max memzone before eal_init: %d\n", max);      // Printing the default max value of 2560
rte_memzone_max_set(1000);
max = rte_memzone_max_get();
printf("Max memzone before eal_init after set to 1000: %d\n", max);    // Printing the new max value of 1000
rte_eal_init(argc, argv); 
rte_memzone_max_set(2000); // Here we fail with -1 since we set after eal init
max = rte_memzone_max_get();
printf("Max memzone after eal_init and after set to 2000: %d\n", max);   // Printing the correct max value of 1000

> AFAIR mem_config is only available some time during
> EAL init, not before (mem_config pointer will be NULL at that point).
> 

Please note that DPDK supports early memory config (lib/eal/common/eal_common_config.c):

/* early configuration structure, when memory config is not mmapped */             
static struct rte_mem_config early_mem_config;  

So max memzone is saved in the early memory and later this memory becomes mapped (shared).

Having said that - I think the current patch is correct.
Please confirm.

> I suggest the following flow:
> 
> set():
> 
> if init_complete => return -1

Also if mem_config is NULL => return -1

> else => set local static value
> 

> get():
> 
> if init_complete => return memzones.count else => return local static value (set
> to our default)
> 
> That way we don't actually need the memconfig, and multiprocess will work
> because memzones.count is shared between primary and secondary anyway.
> 
> --
> Thanks,
> Anatoly


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v4] lib: set/get max memzone segments
  2023-05-24 22:25     ` [PATCH v4] " Ophir Munk
  2023-05-25 14:53       ` Burakov, Anatoly
  2023-05-26  9:55       ` David Marchand
@ 2023-05-30 13:32       ` Thomas Monjalon
  2023-05-31  7:56         ` Ophir Munk
  2023-05-31  7:52       ` [PATCH V5] " Ophir Munk
  3 siblings, 1 reply; 35+ messages in thread
From: Thomas Monjalon @ 2023-05-30 13:32 UTC (permalink / raw)
  To: Ophir Munk
  Cc: dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad,
	Matan Azrad, Lior Margalit

25/05/2023 00:25, Ophir Munk:
> --- a/config/rte_config.h
> +++ b/config/rte_config.h
> -#define RTE_MAX_MEMZONE 2560

Good to be able to remove this compilation-time configuration.


> --- a/lib/eal/common/eal_common_memzone.c
> +++ b/lib/eal/common/eal_common_memzone.c
> +#define DEFAULT_MAX_MEMZONE 2560

Maybe add "_COUNT" at the end to make clear it is not about the size of a memzone.
We should add a comment here to explain the meaning of this default:
used until the "set" function is called.


> -		"%s(): Number of requested memzone segments exceeds RTE_MAX_MEMZONE\n",
> -			__func__);
> +		"%s(): Number of requested memzone segments exceeds "
> +		"maximum %u\n", __func__, arr->len);

We should keep "maximum" on the first line to ease "grep" in the code.

> +int
> +rte_memzone_max_set(size_t max)
> +{
> +	struct rte_mem_config *mcfg;
> +
> +	if (eal_get_internal_configuration()->init_complete > 0)
> +		return -1;

An error log would be needed here I think.

> +
> +	mcfg = rte_eal_get_configuration()->mem_config;
> +	if (!mcfg)

Better to use "== NULL" for pointers.

> +		return -1;

Do we need an error log as well?

> +
> +	mcfg->max_memzone = max;
> +
> +	return 0;
> +}
> +
> +size_t
> +rte_memzone_max_get(void)
> +{
> +	struct rte_mem_config *mcfg;
> +
> +	mcfg = rte_eal_get_configuration()->mem_config;
> +	if (!mcfg || !mcfg->max_memzone)

Same comment as above: don't use boolean operator for pointer or value.
 
> +		return DEFAULT_MAX_MEMZONE;
> +
> +	return mcfg->max_memzone;
> +}
> diff --git a/lib/eal/common/eal_memcfg.h b/lib/eal/common/eal_memcfg.h
> index ea013a9..183bb25 100644
> --- a/lib/eal/common/eal_memcfg.h
> +++ b/lib/eal/common/eal_memcfg.h
> @@ -75,6 +75,8 @@ struct rte_mem_config {
>  	/**< TSC rate */
>  
>  	uint8_t dma_maskbits; /**< Keeps the more restricted dma mask. */
> +
> +	size_t max_memzone; /**< maximum allowed allocated memzones. */

Uppercase for first work, and we may remove "allowed"?
Suggestion: "Maximum number of allocated memzones."

[...]
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Set max memzone value

Add a dot at the end.
Instead of "value", we should mention "number" or "count".

> + *
> + * This function can only be called prior to rte_eal_init().
> + *
> + * @param max
> + *   Maximum number of memzones
> + * @return
> + *  0 on success, -1 otherwise
> + */
> +__rte_experimental
> +int rte_memzone_max_set(size_t max);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Get the maximum number of memzones.
> + *
> + * @note: The maximum value will not change after calling rte_eal_init().
> + *
> + * @return
> + *   Maximum number of memzones
> + */
> +__rte_experimental
> +size_t rte_memzone_max_get(void);

Good, thank you.



^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH V5] lib: set/get max memzone segments
  2023-05-24 22:25     ` [PATCH v4] " Ophir Munk
                         ` (2 preceding siblings ...)
  2023-05-30 13:32       ` Thomas Monjalon
@ 2023-05-31  7:52       ` Ophir Munk
  2023-05-31  8:41         ` [PATCH V6] " Ophir Munk
  3 siblings, 1 reply; 35+ messages in thread
From: Ophir Munk @ 2023-05-31  7:52 UTC (permalink / raw)
  To: dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad
  Cc: Ophir Munk, Matan Azrad, Thomas Monjalon, Lior Margalit,
	David Marchand, Anatoly Burakov

Currently, the max memzones count constat (RTE_MAX_MEMZONE) is used to
decide how many memzones a DPDK application can have. This value could
technically be changed by manually editing `rte_config.h` before
compilation, but if DPDK is already compiled, that option is not useful.
There are certain use cases that would benefit from making this value
configurable.

This commit addresses the issue by adding a new API to set the max
number of memzones before EAL initialization (while using the old
constant as default value), as well as an API to get current maximum
number of memzones.

Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
Reviewed-by: Thomas Monjalon <thomas@monjalon.net>
Reviewed-by: David Marchand <david.marchand@redhat.com>
Reviewed-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Alok Prasad <palok@marvell.com>
---
 app/test/test_func_reentrancy.c     |  2 +-
 app/test/test_malloc_perf.c         |  2 +-
 app/test/test_memzone.c             | 42 ++++++++++++++++++++++-------------
 config/rte_config.h                 |  1 -
 drivers/net/qede/base/bcm_osal.c    | 38 ++++++++++++++++++++++++++------
 drivers/net/qede/base/bcm_osal.h    |  3 +++
 drivers/net/qede/qede_main.c        |  7 ++++++
 lib/eal/common/eal_common_memzone.c | 44 ++++++++++++++++++++++++++++++++++---
 lib/eal/common/eal_memcfg.h         |  2 ++
 lib/eal/include/rte_memzone.h       | 30 +++++++++++++++++++++++++
 lib/eal/version.map                 |  4 ++++
 11 files changed, 147 insertions(+), 28 deletions(-)

diff --git a/app/test/test_func_reentrancy.c b/app/test/test_func_reentrancy.c
index d1ed5d4..ae9de6f 100644
--- a/app/test/test_func_reentrancy.c
+++ b/app/test/test_func_reentrancy.c
@@ -51,7 +51,7 @@ typedef void (*case_clean_t)(unsigned lcore_id);
 #define MEMPOOL_ELT_SIZE                    (sizeof(uint32_t))
 #define MEMPOOL_SIZE                        (4)
 
-#define MAX_LCORES	(RTE_MAX_MEMZONE / (MAX_ITER_MULTI * 4U))
+#define MAX_LCORES	(rte_memzone_max_get() / (MAX_ITER_MULTI * 4U))
 
 static uint32_t obj_count;
 static uint32_t synchro;
diff --git a/app/test/test_malloc_perf.c b/app/test/test_malloc_perf.c
index ccec43a..9bd1662 100644
--- a/app/test/test_malloc_perf.c
+++ b/app/test/test_malloc_perf.c
@@ -165,7 +165,7 @@ test_malloc_perf(void)
 		return -1;
 
 	if (test_alloc_perf("rte_memzone_reserve", memzone_alloc, memzone_free,
-			NULL, memset_us_gb, RTE_MAX_MEMZONE - 1) < 0)
+			NULL, memset_us_gb, rte_memzone_max_get() - 1) < 0)
 		return -1;
 
 	return 0;
diff --git a/app/test/test_memzone.c b/app/test/test_memzone.c
index c9255e5..f10f4fd 100644
--- a/app/test/test_memzone.c
+++ b/app/test/test_memzone.c
@@ -871,9 +871,17 @@ test_memzone_bounded(void)
 static int
 test_memzone_free(void)
 {
-	const struct rte_memzone *mz[RTE_MAX_MEMZONE + 1];
+	const struct rte_memzone **mz;
 	int i;
 	char name[20];
+	int rc = -1;
+
+	mz = rte_calloc("memzone_test", rte_memzone_max_get() + 1,
+			sizeof(struct rte_memzone *), 0);
+	if (!mz) {
+		printf("Fail allocating memzone test array\n");
+		return rc;
+	}
 
 	mz[0] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone0"), 2000,
 			SOCKET_ID_ANY, 0);
@@ -881,42 +889,42 @@ test_memzone_free(void)
 			SOCKET_ID_ANY, 0);
 
 	if (mz[0] > mz[1])
-		return -1;
+		goto exit_test;
 	if (!rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone0")))
-		return -1;
+		goto exit_test;
 	if (!rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone1")))
-		return -1;
+		goto exit_test;
 
 	if (rte_memzone_free(mz[0])) {
 		printf("Fail memzone free - tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone0"))) {
 		printf("Found previously free memzone - tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	mz[2] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone2"), 2000,
 			SOCKET_ID_ANY, 0);
 
 	if (mz[2] > mz[1]) {
 		printf("tempzone2 should have gotten the free entry from tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_free(mz[2])) {
 		printf("Fail memzone free - tempzone2\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone2"))) {
 		printf("Found previously free memzone - tempzone2\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_free(mz[1])) {
 		printf("Fail memzone free - tempzone1\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone1"))) {
 		printf("Found previously free memzone - tempzone1\n");
-		return -1;
+		goto exit_test;
 	}
 
 	i = 0;
@@ -928,7 +936,7 @@ test_memzone_free(void)
 
 	if (rte_memzone_free(mz[0])) {
 		printf("Fail memzone free - tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	mz[0] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone0new"), 0,
 			SOCKET_ID_ANY, 0);
@@ -936,17 +944,21 @@ test_memzone_free(void)
 	if (mz[0] == NULL) {
 		printf("Fail to create memzone - tempzone0new - when MAX memzones were "
 				"created and one was free\n");
-		return -1;
+		goto exit_test;
 	}
 
 	for (i = i - 2; i >= 0; i--) {
 		if (rte_memzone_free(mz[i])) {
 			printf("Fail memzone free - tempzone%d\n", i);
-			return -1;
+			goto exit_test;
 		}
 	}
 
-	return 0;
+	rc = 0;
+
+exit_test:
+	rte_free(mz);
+	return rc;
 }
 
 static int test_memzones_left;
diff --git a/config/rte_config.h b/config/rte_config.h
index 7b8c85e..400e44e 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -34,7 +34,6 @@
 #define RTE_MAX_MEM_MB_PER_LIST 32768
 #define RTE_MAX_MEMSEG_PER_TYPE 32768
 #define RTE_MAX_MEM_MB_PER_TYPE 65536
-#define RTE_MAX_MEMZONE 2560
 #define RTE_MAX_TAILQ 32
 #define RTE_LOG_DP_LEVEL RTE_LOG_INFO
 #define RTE_MAX_VFIO_CONTAINERS 64
diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
index 2c59397..638d006 100644
--- a/drivers/net/qede/base/bcm_osal.c
+++ b/drivers/net/qede/base/bcm_osal.c
@@ -47,10 +47,34 @@ void osal_poll_mode_dpc(osal_int_ptr_t hwfn_cookie)
 }
 
 /* Array of memzone pointers */
-static const struct rte_memzone *ecore_mz_mapping[RTE_MAX_MEMZONE];
+static const struct rte_memzone **ecore_mz_mapping;
 /* Counter to track current memzone allocated */
 static uint16_t ecore_mz_count;
 
+static uint32_t ref_cnt;
+
+int ecore_mz_mapping_alloc(void)
+{
+	if (__atomic_fetch_add(&ref_cnt, 1, __ATOMIC_RELAXED) == 0) {
+		ecore_mz_mapping = rte_calloc("ecore_mz_map",
+				rte_memzone_max_get(), sizeof(struct rte_memzone *), 0);
+	}
+
+	if (!ecore_mz_mapping)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void ecore_mz_mapping_free(void)
+{
+	if (__atomic_fetch_sub(&ref_cnt, 1, __ATOMIC_RELAXED) - 1 == 0) {
+		if (ecore_mz_mapping)
+			rte_free(ecore_mz_mapping);
+		ecore_mz_mapping = NULL;
+	}
+}
+
 unsigned long qede_log2_align(unsigned long n)
 {
 	unsigned long ret = n ? 1 : 0;
@@ -132,9 +156,9 @@ void *osal_dma_alloc_coherent(struct ecore_dev *p_dev,
 	uint32_t core_id = rte_lcore_id();
 	unsigned int socket_id;
 
-	if (ecore_mz_count >= RTE_MAX_MEMZONE) {
-		DP_ERR(p_dev, "Memzone allocation count exceeds %u\n",
-		       RTE_MAX_MEMZONE);
+	if (ecore_mz_count >= rte_memzone_max_get()) {
+		DP_ERR(p_dev, "Memzone allocation count exceeds %zu\n",
+		       rte_memzone_max_get());
 		*phys = 0;
 		return OSAL_NULL;
 	}
@@ -171,9 +195,9 @@ void *osal_dma_alloc_coherent_aligned(struct ecore_dev *p_dev,
 	uint32_t core_id = rte_lcore_id();
 	unsigned int socket_id;
 
-	if (ecore_mz_count >= RTE_MAX_MEMZONE) {
-		DP_ERR(p_dev, "Memzone allocation count exceeds %u\n",
-		       RTE_MAX_MEMZONE);
+	if (ecore_mz_count >= rte_memzone_max_get()) {
+		DP_ERR(p_dev, "Memzone allocation count exceeds %zu\n",
+		       rte_memzone_max_get());
 		*phys = 0;
 		return OSAL_NULL;
 	}
diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 67e7f75..97e261d 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -477,4 +477,7 @@ enum dbg_status	qed_dbg_alloc_user_data(struct ecore_hwfn *p_hwfn,
 	qed_dbg_alloc_user_data(p_hwfn, user_data_ptr)
 #define OSAL_DB_REC_OCCURRED(p_hwfn) nothing
 
+int ecore_mz_mapping_alloc(void);
+void ecore_mz_mapping_free(void);
+
 #endif /* __BCM_OSAL_H */
diff --git a/drivers/net/qede/qede_main.c b/drivers/net/qede/qede_main.c
index 0303903..fd63262 100644
--- a/drivers/net/qede/qede_main.c
+++ b/drivers/net/qede/qede_main.c
@@ -72,6 +72,12 @@ qed_probe(struct ecore_dev *edev, struct rte_pci_device *pci_dev,
 	hw_prepare_params.allow_mdump = false;
 	hw_prepare_params.b_en_pacing = false;
 	hw_prepare_params.epoch = OSAL_GET_EPOCH(ECORE_LEADING_HWFN(edev));
+	rc = ecore_mz_mapping_alloc();
+	if (rc) {
+		DP_ERR(edev, "mem zones array allocation failed\n");
+		return rc;
+	}
+
 	rc = ecore_hw_prepare(edev, &hw_prepare_params);
 	if (rc) {
 		DP_ERR(edev, "hw prepare failed\n");
@@ -722,6 +728,7 @@ static void qed_remove(struct ecore_dev *edev)
 		return;
 
 	ecore_hw_remove(edev);
+	ecore_mz_mapping_free();
 }
 
 static int qed_send_drv_state(struct ecore_dev *edev, bool active)
diff --git a/lib/eal/common/eal_common_memzone.c b/lib/eal/common/eal_common_memzone.c
index a9cd91f..9e3eedd 100644
--- a/lib/eal/common/eal_common_memzone.c
+++ b/lib/eal/common/eal_common_memzone.c
@@ -22,6 +22,9 @@
 #include "eal_private.h"
 #include "eal_memcfg.h"
 
+/* Default count used until rte_memzone_max_set() is called */
+#define DEFAULT_MAX_MEMZONE_COUNT 2560
+
 static inline const struct rte_memzone *
 memzone_lookup_thread_unsafe(const char *name)
 {
@@ -81,8 +84,9 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 	/* no more room in config */
 	if (arr->count >= arr->len) {
 		RTE_LOG(ERR, EAL,
-		"%s(): Number of requested memzone segments exceeds RTE_MAX_MEMZONE\n",
-			__func__);
+		"%s(): Number of requested memzone segments exceeds maximum "
+		"%u\n", __func__, arr->len);
+
 		rte_errno = ENOSPC;
 		return NULL;
 	}
@@ -396,7 +400,7 @@ rte_eal_memzone_init(void)
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
 			rte_fbarray_init(&mcfg->memzones, "memzone",
-			RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) {
+			rte_memzone_max_get(), sizeof(struct rte_memzone))) {
 		RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n");
 		ret = -1;
 	} else if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
@@ -430,3 +434,37 @@ void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
 	}
 	rte_rwlock_read_unlock(&mcfg->mlock);
 }
+
+int
+rte_memzone_max_set(size_t max)
+{
+	struct rte_mem_config *mcfg;
+
+	if (eal_get_internal_configuration()->init_complete > 0) {
+		RTE_LOG(ERR, EAL, "Max memzone cannot be set after calling "
+				  "eal init\n");
+		return -1;
+	}
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+	if (mcfg == NULL) {
+		RTE_LOG(ERR, EAL, "Failed to set max memzone\n");
+		return -1;
+	}
+
+	mcfg->max_memzone = max;
+
+	return 0;
+}
+
+size_t
+rte_memzone_max_get(void)
+{
+	struct rte_mem_config *mcfg;
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+	if (mcfg == NULL || mcfg->max_memzone ==0)
+		return DEFAULT_MAX_MEMZONE_COUNT;
+
+	return mcfg->max_memzone;
+}
diff --git a/lib/eal/common/eal_memcfg.h b/lib/eal/common/eal_memcfg.h
index ea013a9..8889ba0 100644
--- a/lib/eal/common/eal_memcfg.h
+++ b/lib/eal/common/eal_memcfg.h
@@ -75,6 +75,8 @@ struct rte_mem_config {
 	/**< TSC rate */
 
 	uint8_t dma_maskbits; /**< Keeps the more restricted dma mask. */
+
+	size_t max_memzone; /**< Maximum number of allocated memzones. */
 };
 
 /* update internal config from shared mem config */
diff --git a/lib/eal/include/rte_memzone.h b/lib/eal/include/rte_memzone.h
index 5302caa..44dcd22 100644
--- a/lib/eal/include/rte_memzone.h
+++ b/lib/eal/include/rte_memzone.h
@@ -305,6 +305,36 @@ void rte_memzone_dump(FILE *f);
 void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *arg),
 		      void *arg);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Set max memzone count.
+ *
+ * This function can only be called prior to rte_eal_init().
+ *
+ * @param max
+ *   Maximum number of memzones
+ * @return
+ *  0 on success, -1 otherwise
+ */
+__rte_experimental
+int rte_memzone_max_set(size_t max);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Get the maximum number of memzones.
+ *
+ * @note: The maximum value will not change after calling rte_eal_init().
+ *
+ * @return
+ *   Maximum number of memzones
+ */
+__rte_experimental
+size_t rte_memzone_max_get(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/version.map b/lib/eal/version.map
index 51a820d..b52a83c 100644
--- a/lib/eal/version.map
+++ b/lib/eal/version.map
@@ -430,6 +430,10 @@ EXPERIMENTAL {
 	rte_thread_create_control;
 	rte_thread_set_name;
 	__rte_eal_trace_generic_blob;
+
+	# added in 23.07
+	rte_memzone_max_set;
+	rte_memzone_max_get;
 };
 
 INTERNAL {
-- 
2.8.4


^ permalink raw reply	[flat|nested] 35+ messages in thread

* RE: [PATCH v4] lib: set/get max memzone segments
  2023-05-30 13:32       ` Thomas Monjalon
@ 2023-05-31  7:56         ` Ophir Munk
  0 siblings, 0 replies; 35+ messages in thread
From: Ophir Munk @ 2023-05-31  7:56 UTC (permalink / raw)
  To: NBU-Contact-Thomas Monjalon (EXTERNAL)
  Cc: dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad,
	Matan Azrad, Lior Margalit

> > --- a/lib/eal/common/eal_common_memzone.c
> > +++ b/lib/eal/common/eal_common_memzone.c
> > +#define DEFAULT_MAX_MEMZONE 2560
> 
> Maybe add "_COUNT" at the end to make clear it is not about the size of a
> memzone.
> We should add a comment here to explain the meaning of this default:
> used until the "set" function is called.
> 
> 

Ack

> > -		"%s(): Number of requested memzone segments exceeds
> RTE_MAX_MEMZONE\n",
> > -			__func__);
> > +		"%s(): Number of requested memzone segments exceeds "
> > +		"maximum %u\n", __func__, arr->len);
> 
> We should keep "maximum" on the first line to ease "grep" in the code.
> 

Ack

> > +int
> > +rte_memzone_max_set(size_t max)
> > +{
> > +	struct rte_mem_config *mcfg;
> > +
> > +	if (eal_get_internal_configuration()->init_complete > 0)
> > +		return -1;
> 
> An error log would be needed here I think.
> 

Ack

> > +
> > +	mcfg = rte_eal_get_configuration()->mem_config;
> > +	if (!mcfg)
> 
> Better to use "== NULL" for pointers.
> 
> > +		return -1;
> 

Ack

> Do we need an error log as well?
> 

Yes. Log was added.

> > +
> > +	mcfg = rte_eal_get_configuration()->mem_config;
> > +	if (!mcfg || !mcfg->max_memzone)
> 
> Same comment as above: don't use boolean operator for pointer or value.
> 

Ack

> > --- a/lib/eal/common/eal_memcfg.h
> > +++ b/lib/eal/common/eal_memcfg.h
> > @@ -75,6 +75,8 @@ struct rte_mem_config {
> >  	/**< TSC rate */
> >
> >  	uint8_t dma_maskbits; /**< Keeps the more restricted dma mask. */
> > +
> > +	size_t max_memzone; /**< maximum allowed allocated memzones.
> */
> 
> Uppercase for first work, and we may remove "allowed"?
> Suggestion: "Maximum number of allocated memzones."
> 

Ack

> [...]
> > +/**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change without prior notice
> > + *
> > + * Set max memzone value
> 
> Add a dot at the end.
> Instead of "value", we should mention "number" or "count".
> 

Ack


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH V6] lib: set/get max memzone segments
  2023-05-31  7:52       ` [PATCH V5] " Ophir Munk
@ 2023-05-31  8:41         ` Ophir Munk
  2023-06-05  8:52           ` [PATCH V7] " Ophir Munk
  0 siblings, 1 reply; 35+ messages in thread
From: Ophir Munk @ 2023-05-31  8:41 UTC (permalink / raw)
  To: dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad
  Cc: Ophir Munk, Matan Azrad, Thomas Monjalon, Lior Margalit,
	David Marchand, Anatoly Burakov

Currently, the max memzones count constat (RTE_MAX_MEMZONE) is used to
decide how many memzones a DPDK application can have. This value could
technically be changed by manually editing `rte_config.h` before
compilation, but if DPDK is already compiled, that option is not useful.
There are certain use cases that would benefit from making this value
configurable.

This commit addresses the issue by adding a new API to set the max
number of memzones before EAL initialization (while using the old
constant as default value), as well as an API to get current maximum
number of memzones.

Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
Reviewed-by: Thomas Monjalon <thomas@monjalon.net>
Reviewed-by: David Marchand <david.marchand@redhat.com>
Reviewed-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Alok Prasad <palok@marvell.com>
---
 app/test/test_func_reentrancy.c     |  2 +-
 app/test/test_malloc_perf.c         |  2 +-
 app/test/test_memzone.c             | 42 ++++++++++++++++++++++-------------
 config/rte_config.h                 |  1 -
 drivers/net/qede/base/bcm_osal.c    | 38 ++++++++++++++++++++++++++------
 drivers/net/qede/base/bcm_osal.h    |  3 +++
 drivers/net/qede/qede_main.c        |  7 ++++++
 lib/eal/common/eal_common_memzone.c | 44 ++++++++++++++++++++++++++++++++++---
 lib/eal/common/eal_memcfg.h         |  2 ++
 lib/eal/include/rte_memzone.h       | 30 +++++++++++++++++++++++++
 lib/eal/version.map                 |  4 ++++
 11 files changed, 147 insertions(+), 28 deletions(-)

diff --git a/app/test/test_func_reentrancy.c b/app/test/test_func_reentrancy.c
index d1ed5d4..ae9de6f 100644

v6: Style error fix
--- a/app/test/test_func_reentrancy.c
+++ b/app/test/test_func_reentrancy.c
@@ -51,7 +51,7 @@ typedef void (*case_clean_t)(unsigned lcore_id);
 #define MEMPOOL_ELT_SIZE                    (sizeof(uint32_t))
 #define MEMPOOL_SIZE                        (4)
 
-#define MAX_LCORES	(RTE_MAX_MEMZONE / (MAX_ITER_MULTI * 4U))
+#define MAX_LCORES	(rte_memzone_max_get() / (MAX_ITER_MULTI * 4U))
 
 static uint32_t obj_count;
 static uint32_t synchro;
diff --git a/app/test/test_malloc_perf.c b/app/test/test_malloc_perf.c
index ccec43a..9bd1662 100644
--- a/app/test/test_malloc_perf.c
+++ b/app/test/test_malloc_perf.c
@@ -165,7 +165,7 @@ test_malloc_perf(void)
 		return -1;
 
 	if (test_alloc_perf("rte_memzone_reserve", memzone_alloc, memzone_free,
-			NULL, memset_us_gb, RTE_MAX_MEMZONE - 1) < 0)
+			NULL, memset_us_gb, rte_memzone_max_get() - 1) < 0)
 		return -1;
 
 	return 0;
diff --git a/app/test/test_memzone.c b/app/test/test_memzone.c
index c9255e5..f10f4fd 100644
--- a/app/test/test_memzone.c
+++ b/app/test/test_memzone.c
@@ -871,9 +871,17 @@ test_memzone_bounded(void)
 static int
 test_memzone_free(void)
 {
-	const struct rte_memzone *mz[RTE_MAX_MEMZONE + 1];
+	const struct rte_memzone **mz;
 	int i;
 	char name[20];
+	int rc = -1;
+
+	mz = rte_calloc("memzone_test", rte_memzone_max_get() + 1,
+			sizeof(struct rte_memzone *), 0);
+	if (!mz) {
+		printf("Fail allocating memzone test array\n");
+		return rc;
+	}
 
 	mz[0] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone0"), 2000,
 			SOCKET_ID_ANY, 0);
@@ -881,42 +889,42 @@ test_memzone_free(void)
 			SOCKET_ID_ANY, 0);
 
 	if (mz[0] > mz[1])
-		return -1;
+		goto exit_test;
 	if (!rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone0")))
-		return -1;
+		goto exit_test;
 	if (!rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone1")))
-		return -1;
+		goto exit_test;
 
 	if (rte_memzone_free(mz[0])) {
 		printf("Fail memzone free - tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone0"))) {
 		printf("Found previously free memzone - tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	mz[2] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone2"), 2000,
 			SOCKET_ID_ANY, 0);
 
 	if (mz[2] > mz[1]) {
 		printf("tempzone2 should have gotten the free entry from tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_free(mz[2])) {
 		printf("Fail memzone free - tempzone2\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone2"))) {
 		printf("Found previously free memzone - tempzone2\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_free(mz[1])) {
 		printf("Fail memzone free - tempzone1\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone1"))) {
 		printf("Found previously free memzone - tempzone1\n");
-		return -1;
+		goto exit_test;
 	}
 
 	i = 0;
@@ -928,7 +936,7 @@ test_memzone_free(void)
 
 	if (rte_memzone_free(mz[0])) {
 		printf("Fail memzone free - tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	mz[0] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone0new"), 0,
 			SOCKET_ID_ANY, 0);
@@ -936,17 +944,21 @@ test_memzone_free(void)
 	if (mz[0] == NULL) {
 		printf("Fail to create memzone - tempzone0new - when MAX memzones were "
 				"created and one was free\n");
-		return -1;
+		goto exit_test;
 	}
 
 	for (i = i - 2; i >= 0; i--) {
 		if (rte_memzone_free(mz[i])) {
 			printf("Fail memzone free - tempzone%d\n", i);
-			return -1;
+			goto exit_test;
 		}
 	}
 
-	return 0;
+	rc = 0;
+
+exit_test:
+	rte_free(mz);
+	return rc;
 }
 
 static int test_memzones_left;
diff --git a/config/rte_config.h b/config/rte_config.h
index 7b8c85e..400e44e 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -34,7 +34,6 @@
 #define RTE_MAX_MEM_MB_PER_LIST 32768
 #define RTE_MAX_MEMSEG_PER_TYPE 32768
 #define RTE_MAX_MEM_MB_PER_TYPE 65536
-#define RTE_MAX_MEMZONE 2560
 #define RTE_MAX_TAILQ 32
 #define RTE_LOG_DP_LEVEL RTE_LOG_INFO
 #define RTE_MAX_VFIO_CONTAINERS 64
diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
index 2c59397..638d006 100644
--- a/drivers/net/qede/base/bcm_osal.c
+++ b/drivers/net/qede/base/bcm_osal.c
@@ -47,10 +47,34 @@ void osal_poll_mode_dpc(osal_int_ptr_t hwfn_cookie)
 }
 
 /* Array of memzone pointers */
-static const struct rte_memzone *ecore_mz_mapping[RTE_MAX_MEMZONE];
+static const struct rte_memzone **ecore_mz_mapping;
 /* Counter to track current memzone allocated */
 static uint16_t ecore_mz_count;
 
+static uint32_t ref_cnt;
+
+int ecore_mz_mapping_alloc(void)
+{
+	if (__atomic_fetch_add(&ref_cnt, 1, __ATOMIC_RELAXED) == 0) {
+		ecore_mz_mapping = rte_calloc("ecore_mz_map",
+				rte_memzone_max_get(), sizeof(struct rte_memzone *), 0);
+	}
+
+	if (!ecore_mz_mapping)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void ecore_mz_mapping_free(void)
+{
+	if (__atomic_fetch_sub(&ref_cnt, 1, __ATOMIC_RELAXED) - 1 == 0) {
+		if (ecore_mz_mapping)
+			rte_free(ecore_mz_mapping);
+		ecore_mz_mapping = NULL;
+	}
+}
+
 unsigned long qede_log2_align(unsigned long n)
 {
 	unsigned long ret = n ? 1 : 0;
@@ -132,9 +156,9 @@ void *osal_dma_alloc_coherent(struct ecore_dev *p_dev,
 	uint32_t core_id = rte_lcore_id();
 	unsigned int socket_id;
 
-	if (ecore_mz_count >= RTE_MAX_MEMZONE) {
-		DP_ERR(p_dev, "Memzone allocation count exceeds %u\n",
-		       RTE_MAX_MEMZONE);
+	if (ecore_mz_count >= rte_memzone_max_get()) {
+		DP_ERR(p_dev, "Memzone allocation count exceeds %zu\n",
+		       rte_memzone_max_get());
 		*phys = 0;
 		return OSAL_NULL;
 	}
@@ -171,9 +195,9 @@ void *osal_dma_alloc_coherent_aligned(struct ecore_dev *p_dev,
 	uint32_t core_id = rte_lcore_id();
 	unsigned int socket_id;
 
-	if (ecore_mz_count >= RTE_MAX_MEMZONE) {
-		DP_ERR(p_dev, "Memzone allocation count exceeds %u\n",
-		       RTE_MAX_MEMZONE);
+	if (ecore_mz_count >= rte_memzone_max_get()) {
+		DP_ERR(p_dev, "Memzone allocation count exceeds %zu\n",
+		       rte_memzone_max_get());
 		*phys = 0;
 		return OSAL_NULL;
 	}
diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 67e7f75..97e261d 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -477,4 +477,7 @@ enum dbg_status	qed_dbg_alloc_user_data(struct ecore_hwfn *p_hwfn,
 	qed_dbg_alloc_user_data(p_hwfn, user_data_ptr)
 #define OSAL_DB_REC_OCCURRED(p_hwfn) nothing
 
+int ecore_mz_mapping_alloc(void);
+void ecore_mz_mapping_free(void);
+
 #endif /* __BCM_OSAL_H */
diff --git a/drivers/net/qede/qede_main.c b/drivers/net/qede/qede_main.c
index 0303903..fd63262 100644
--- a/drivers/net/qede/qede_main.c
+++ b/drivers/net/qede/qede_main.c
@@ -72,6 +72,12 @@ qed_probe(struct ecore_dev *edev, struct rte_pci_device *pci_dev,
 	hw_prepare_params.allow_mdump = false;
 	hw_prepare_params.b_en_pacing = false;
 	hw_prepare_params.epoch = OSAL_GET_EPOCH(ECORE_LEADING_HWFN(edev));
+	rc = ecore_mz_mapping_alloc();
+	if (rc) {
+		DP_ERR(edev, "mem zones array allocation failed\n");
+		return rc;
+	}
+
 	rc = ecore_hw_prepare(edev, &hw_prepare_params);
 	if (rc) {
 		DP_ERR(edev, "hw prepare failed\n");
@@ -722,6 +728,7 @@ static void qed_remove(struct ecore_dev *edev)
 		return;
 
 	ecore_hw_remove(edev);
+	ecore_mz_mapping_free();
 }
 
 static int qed_send_drv_state(struct ecore_dev *edev, bool active)
diff --git a/lib/eal/common/eal_common_memzone.c b/lib/eal/common/eal_common_memzone.c
index a9cd91f..d024410 100644
--- a/lib/eal/common/eal_common_memzone.c
+++ b/lib/eal/common/eal_common_memzone.c
@@ -22,6 +22,9 @@
 #include "eal_private.h"
 #include "eal_memcfg.h"
 
+/* Default count used until rte_memzone_max_set() is called */
+#define DEFAULT_MAX_MEMZONE_COUNT 2560
+
 static inline const struct rte_memzone *
 memzone_lookup_thread_unsafe(const char *name)
 {
@@ -81,8 +84,9 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 	/* no more room in config */
 	if (arr->count >= arr->len) {
 		RTE_LOG(ERR, EAL,
-		"%s(): Number of requested memzone segments exceeds RTE_MAX_MEMZONE\n",
-			__func__);
+		"%s(): Number of requested memzone segments exceeds maximum "
+		"%u\n", __func__, arr->len);
+
 		rte_errno = ENOSPC;
 		return NULL;
 	}
@@ -396,7 +400,7 @@ rte_eal_memzone_init(void)
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
 			rte_fbarray_init(&mcfg->memzones, "memzone",
-			RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) {
+			rte_memzone_max_get(), sizeof(struct rte_memzone))) {
 		RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n");
 		ret = -1;
 	} else if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
@@ -430,3 +434,37 @@ void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
 	}
 	rte_rwlock_read_unlock(&mcfg->mlock);
 }
+
+int
+rte_memzone_max_set(size_t max)
+{
+	struct rte_mem_config *mcfg;
+
+	if (eal_get_internal_configuration()->init_complete > 0) {
+		RTE_LOG(ERR, EAL, "Max memzone cannot be set after calling "
+				  "eal init\n");
+		return -1;
+	}
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+	if (mcfg == NULL) {
+		RTE_LOG(ERR, EAL, "Failed to set max memzone\n");
+		return -1;
+	}
+
+	mcfg->max_memzone = max;
+
+	return 0;
+}
+
+size_t
+rte_memzone_max_get(void)
+{
+	struct rte_mem_config *mcfg;
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+	if (mcfg == NULL || mcfg->max_memzone == 0)
+		return DEFAULT_MAX_MEMZONE_COUNT;
+
+	return mcfg->max_memzone;
+}
diff --git a/lib/eal/common/eal_memcfg.h b/lib/eal/common/eal_memcfg.h
index ea013a9..8889ba0 100644
--- a/lib/eal/common/eal_memcfg.h
+++ b/lib/eal/common/eal_memcfg.h
@@ -75,6 +75,8 @@ struct rte_mem_config {
 	/**< TSC rate */
 
 	uint8_t dma_maskbits; /**< Keeps the more restricted dma mask. */
+
+	size_t max_memzone; /**< Maximum number of allocated memzones. */
 };
 
 /* update internal config from shared mem config */
diff --git a/lib/eal/include/rte_memzone.h b/lib/eal/include/rte_memzone.h
index 5302caa..44dcd22 100644
--- a/lib/eal/include/rte_memzone.h
+++ b/lib/eal/include/rte_memzone.h
@@ -305,6 +305,36 @@ void rte_memzone_dump(FILE *f);
 void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *arg),
 		      void *arg);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Set max memzone count.
+ *
+ * This function can only be called prior to rte_eal_init().
+ *
+ * @param max
+ *   Maximum number of memzones
+ * @return
+ *  0 on success, -1 otherwise
+ */
+__rte_experimental
+int rte_memzone_max_set(size_t max);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Get the maximum number of memzones.
+ *
+ * @note: The maximum value will not change after calling rte_eal_init().
+ *
+ * @return
+ *   Maximum number of memzones
+ */
+__rte_experimental
+size_t rte_memzone_max_get(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/version.map b/lib/eal/version.map
index 51a820d..b52a83c 100644
--- a/lib/eal/version.map
+++ b/lib/eal/version.map
@@ -430,6 +430,10 @@ EXPERIMENTAL {
 	rte_thread_create_control;
 	rte_thread_set_name;
 	__rte_eal_trace_generic_blob;
+
+	# added in 23.07
+	rte_memzone_max_set;
+	rte_memzone_max_get;
 };
 
 INTERNAL {
-- 
2.8.4


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH V7] lib: set/get max memzone segments
  2023-05-31  8:41         ` [PATCH V6] " Ophir Munk
@ 2023-06-05  8:52           ` Ophir Munk
  2023-06-05 10:50             ` [PATCH V8] " Ophir Munk
  0 siblings, 1 reply; 35+ messages in thread
From: Ophir Munk @ 2023-06-05  8:52 UTC (permalink / raw)
  To: dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad, Maayan Kashani
  Cc: Ophir Munk, Matan Azrad, Thomas Monjalon, Lior Margalit,
	David Marchand, Anatoly Burakov

Currently, the max memzones count constat (RTE_MAX_MEMZONE) is used to
decide how many memzones a DPDK application can have. This value could
technically be changed by manually editing `rte_config.h` before
compilation, but if DPDK is already compiled, that option is not useful.
There are certain use cases that would benefit from making this value
configurable.

This commit addresses the issue by adding a new API to set the max
number of memzones before EAL initialization (while using the old
constant as default value), as well as an API to get current maximum
number of memzones.

Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
Reviewed-by: Thomas Monjalon <thomas@monjalon.net>
Reviewed-by: David Marchand <david.marchand@redhat.com>
Reviewed-by: Anatoly Burakov <anatoly.burakov@intel.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Alok Prasad <palok@marvell.com>
---
 app/test/test_func_reentrancy.c     |  2 +-
 app/test/test_malloc_perf.c         |  2 +-
 app/test/test_memzone.c             | 42 ++++++++++++++++++++++-------------
 config/rte_config.h                 |  1 -
 drivers/net/qede/base/bcm_osal.c    | 38 ++++++++++++++++++++++++++------
 drivers/net/qede/base/bcm_osal.h    |  3 +++
 drivers/net/qede/qede_main.c        |  7 ++++++
 lib/eal/common/eal_common_memzone.c | 44 ++++++++++++++++++++++++++++++++++---
 lib/eal/common/eal_memcfg.h         |  2 ++
 lib/eal/include/rte_memzone.h       | 30 +++++++++++++++++++++++++
 lib/eal/version.map                 |  4 ++++
 11 files changed, 147 insertions(+), 28 deletions(-)

diff --git a/app/test/test_func_reentrancy.c b/app/test/test_func_reentrancy.c
index d1ed5d4..ae9de6f 100644

V7: Rebased on latest main branch

--- a/app/test/test_func_reentrancy.c
+++ b/app/test/test_func_reentrancy.c
@@ -51,7 +51,7 @@ typedef void (*case_clean_t)(unsigned lcore_id);
 #define MEMPOOL_ELT_SIZE                    (sizeof(uint32_t))
 #define MEMPOOL_SIZE                        (4)
 
-#define MAX_LCORES	(RTE_MAX_MEMZONE / (MAX_ITER_MULTI * 4U))
+#define MAX_LCORES	(rte_memzone_max_get() / (MAX_ITER_MULTI * 4U))
 
 static uint32_t obj_count;
 static uint32_t synchro;
diff --git a/app/test/test_malloc_perf.c b/app/test/test_malloc_perf.c
index ccec43a..9bd1662 100644
--- a/app/test/test_malloc_perf.c
+++ b/app/test/test_malloc_perf.c
@@ -165,7 +165,7 @@ test_malloc_perf(void)
 		return -1;
 
 	if (test_alloc_perf("rte_memzone_reserve", memzone_alloc, memzone_free,
-			NULL, memset_us_gb, RTE_MAX_MEMZONE - 1) < 0)
+			NULL, memset_us_gb, rte_memzone_max_get() - 1) < 0)
 		return -1;
 
 	return 0;
diff --git a/app/test/test_memzone.c b/app/test/test_memzone.c
index c9255e5..f10f4fd 100644
--- a/app/test/test_memzone.c
+++ b/app/test/test_memzone.c
@@ -871,9 +871,17 @@ test_memzone_bounded(void)
 static int
 test_memzone_free(void)
 {
-	const struct rte_memzone *mz[RTE_MAX_MEMZONE + 1];
+	const struct rte_memzone **mz;
 	int i;
 	char name[20];
+	int rc = -1;
+
+	mz = rte_calloc("memzone_test", rte_memzone_max_get() + 1,
+			sizeof(struct rte_memzone *), 0);
+	if (!mz) {
+		printf("Fail allocating memzone test array\n");
+		return rc;
+	}
 
 	mz[0] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone0"), 2000,
 			SOCKET_ID_ANY, 0);
@@ -881,42 +889,42 @@ test_memzone_free(void)
 			SOCKET_ID_ANY, 0);
 
 	if (mz[0] > mz[1])
-		return -1;
+		goto exit_test;
 	if (!rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone0")))
-		return -1;
+		goto exit_test;
 	if (!rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone1")))
-		return -1;
+		goto exit_test;
 
 	if (rte_memzone_free(mz[0])) {
 		printf("Fail memzone free - tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone0"))) {
 		printf("Found previously free memzone - tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	mz[2] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone2"), 2000,
 			SOCKET_ID_ANY, 0);
 
 	if (mz[2] > mz[1]) {
 		printf("tempzone2 should have gotten the free entry from tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_free(mz[2])) {
 		printf("Fail memzone free - tempzone2\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone2"))) {
 		printf("Found previously free memzone - tempzone2\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_free(mz[1])) {
 		printf("Fail memzone free - tempzone1\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone1"))) {
 		printf("Found previously free memzone - tempzone1\n");
-		return -1;
+		goto exit_test;
 	}
 
 	i = 0;
@@ -928,7 +936,7 @@ test_memzone_free(void)
 
 	if (rte_memzone_free(mz[0])) {
 		printf("Fail memzone free - tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	mz[0] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone0new"), 0,
 			SOCKET_ID_ANY, 0);
@@ -936,17 +944,21 @@ test_memzone_free(void)
 	if (mz[0] == NULL) {
 		printf("Fail to create memzone - tempzone0new - when MAX memzones were "
 				"created and one was free\n");
-		return -1;
+		goto exit_test;
 	}
 
 	for (i = i - 2; i >= 0; i--) {
 		if (rte_memzone_free(mz[i])) {
 			printf("Fail memzone free - tempzone%d\n", i);
-			return -1;
+			goto exit_test;
 		}
 	}
 
-	return 0;
+	rc = 0;
+
+exit_test:
+	rte_free(mz);
+	return rc;
 }
 
 static int test_memzones_left;
diff --git a/config/rte_config.h b/config/rte_config.h
index 7b8c85e..400e44e 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -34,7 +34,6 @@
 #define RTE_MAX_MEM_MB_PER_LIST 32768
 #define RTE_MAX_MEMSEG_PER_TYPE 32768
 #define RTE_MAX_MEM_MB_PER_TYPE 65536
-#define RTE_MAX_MEMZONE 2560
 #define RTE_MAX_TAILQ 32
 #define RTE_LOG_DP_LEVEL RTE_LOG_INFO
 #define RTE_MAX_VFIO_CONTAINERS 64
diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
index 2c59397..638d006 100644
--- a/drivers/net/qede/base/bcm_osal.c
+++ b/drivers/net/qede/base/bcm_osal.c
@@ -47,10 +47,34 @@ void osal_poll_mode_dpc(osal_int_ptr_t hwfn_cookie)
 }
 
 /* Array of memzone pointers */
-static const struct rte_memzone *ecore_mz_mapping[RTE_MAX_MEMZONE];
+static const struct rte_memzone **ecore_mz_mapping;
 /* Counter to track current memzone allocated */
 static uint16_t ecore_mz_count;
 
+static uint32_t ref_cnt;
+
+int ecore_mz_mapping_alloc(void)
+{
+	if (__atomic_fetch_add(&ref_cnt, 1, __ATOMIC_RELAXED) == 0) {
+		ecore_mz_mapping = rte_calloc("ecore_mz_map",
+				rte_memzone_max_get(), sizeof(struct rte_memzone *), 0);
+	}
+
+	if (!ecore_mz_mapping)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void ecore_mz_mapping_free(void)
+{
+	if (__atomic_fetch_sub(&ref_cnt, 1, __ATOMIC_RELAXED) - 1 == 0) {
+		if (ecore_mz_mapping)
+			rte_free(ecore_mz_mapping);
+		ecore_mz_mapping = NULL;
+	}
+}
+
 unsigned long qede_log2_align(unsigned long n)
 {
 	unsigned long ret = n ? 1 : 0;
@@ -132,9 +156,9 @@ void *osal_dma_alloc_coherent(struct ecore_dev *p_dev,
 	uint32_t core_id = rte_lcore_id();
 	unsigned int socket_id;
 
-	if (ecore_mz_count >= RTE_MAX_MEMZONE) {
-		DP_ERR(p_dev, "Memzone allocation count exceeds %u\n",
-		       RTE_MAX_MEMZONE);
+	if (ecore_mz_count >= rte_memzone_max_get()) {
+		DP_ERR(p_dev, "Memzone allocation count exceeds %zu\n",
+		       rte_memzone_max_get());
 		*phys = 0;
 		return OSAL_NULL;
 	}
@@ -171,9 +195,9 @@ void *osal_dma_alloc_coherent_aligned(struct ecore_dev *p_dev,
 	uint32_t core_id = rte_lcore_id();
 	unsigned int socket_id;
 
-	if (ecore_mz_count >= RTE_MAX_MEMZONE) {
-		DP_ERR(p_dev, "Memzone allocation count exceeds %u\n",
-		       RTE_MAX_MEMZONE);
+	if (ecore_mz_count >= rte_memzone_max_get()) {
+		DP_ERR(p_dev, "Memzone allocation count exceeds %zu\n",
+		       rte_memzone_max_get());
 		*phys = 0;
 		return OSAL_NULL;
 	}
diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 67e7f75..97e261d 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -477,4 +477,7 @@ enum dbg_status	qed_dbg_alloc_user_data(struct ecore_hwfn *p_hwfn,
 	qed_dbg_alloc_user_data(p_hwfn, user_data_ptr)
 #define OSAL_DB_REC_OCCURRED(p_hwfn) nothing
 
+int ecore_mz_mapping_alloc(void);
+void ecore_mz_mapping_free(void);
+
 #endif /* __BCM_OSAL_H */
diff --git a/drivers/net/qede/qede_main.c b/drivers/net/qede/qede_main.c
index 0303903..fd63262 100644
--- a/drivers/net/qede/qede_main.c
+++ b/drivers/net/qede/qede_main.c
@@ -72,6 +72,12 @@ qed_probe(struct ecore_dev *edev, struct rte_pci_device *pci_dev,
 	hw_prepare_params.allow_mdump = false;
 	hw_prepare_params.b_en_pacing = false;
 	hw_prepare_params.epoch = OSAL_GET_EPOCH(ECORE_LEADING_HWFN(edev));
+	rc = ecore_mz_mapping_alloc();
+	if (rc) {
+		DP_ERR(edev, "mem zones array allocation failed\n");
+		return rc;
+	}
+
 	rc = ecore_hw_prepare(edev, &hw_prepare_params);
 	if (rc) {
 		DP_ERR(edev, "hw prepare failed\n");
@@ -722,6 +728,7 @@ static void qed_remove(struct ecore_dev *edev)
 		return;
 
 	ecore_hw_remove(edev);
+	ecore_mz_mapping_free();
 }
 
 static int qed_send_drv_state(struct ecore_dev *edev, bool active)
diff --git a/lib/eal/common/eal_common_memzone.c b/lib/eal/common/eal_common_memzone.c
index a9cd91f..d024410 100644
--- a/lib/eal/common/eal_common_memzone.c
+++ b/lib/eal/common/eal_common_memzone.c
@@ -22,6 +22,9 @@
 #include "eal_private.h"
 #include "eal_memcfg.h"
 
+/* Default count used until rte_memzone_max_set() is called */
+#define DEFAULT_MAX_MEMZONE_COUNT 2560
+
 static inline const struct rte_memzone *
 memzone_lookup_thread_unsafe(const char *name)
 {
@@ -81,8 +84,9 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 	/* no more room in config */
 	if (arr->count >= arr->len) {
 		RTE_LOG(ERR, EAL,
-		"%s(): Number of requested memzone segments exceeds RTE_MAX_MEMZONE\n",
-			__func__);
+		"%s(): Number of requested memzone segments exceeds maximum "
+		"%u\n", __func__, arr->len);
+
 		rte_errno = ENOSPC;
 		return NULL;
 	}
@@ -396,7 +400,7 @@ rte_eal_memzone_init(void)
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
 			rte_fbarray_init(&mcfg->memzones, "memzone",
-			RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) {
+			rte_memzone_max_get(), sizeof(struct rte_memzone))) {
 		RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n");
 		ret = -1;
 	} else if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
@@ -430,3 +434,37 @@ void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
 	}
 	rte_rwlock_read_unlock(&mcfg->mlock);
 }
+
+int
+rte_memzone_max_set(size_t max)
+{
+	struct rte_mem_config *mcfg;
+
+	if (eal_get_internal_configuration()->init_complete > 0) {
+		RTE_LOG(ERR, EAL, "Max memzone cannot be set after calling "
+				  "eal init\n");
+		return -1;
+	}
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+	if (mcfg == NULL) {
+		RTE_LOG(ERR, EAL, "Failed to set max memzone\n");
+		return -1;
+	}
+
+	mcfg->max_memzone = max;
+
+	return 0;
+}
+
+size_t
+rte_memzone_max_get(void)
+{
+	struct rte_mem_config *mcfg;
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+	if (mcfg == NULL || mcfg->max_memzone == 0)
+		return DEFAULT_MAX_MEMZONE_COUNT;
+
+	return mcfg->max_memzone;
+}
diff --git a/lib/eal/common/eal_memcfg.h b/lib/eal/common/eal_memcfg.h
index ea013a9..8889ba0 100644
--- a/lib/eal/common/eal_memcfg.h
+++ b/lib/eal/common/eal_memcfg.h
@@ -75,6 +75,8 @@ struct rte_mem_config {
 	/**< TSC rate */
 
 	uint8_t dma_maskbits; /**< Keeps the more restricted dma mask. */
+
+	size_t max_memzone; /**< Maximum number of allocated memzones. */
 };
 
 /* update internal config from shared mem config */
diff --git a/lib/eal/include/rte_memzone.h b/lib/eal/include/rte_memzone.h
index 5302caa..44dcd22 100644
--- a/lib/eal/include/rte_memzone.h
+++ b/lib/eal/include/rte_memzone.h
@@ -305,6 +305,36 @@ void rte_memzone_dump(FILE *f);
 void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *arg),
 		      void *arg);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Set max memzone count.
+ *
+ * This function can only be called prior to rte_eal_init().
+ *
+ * @param max
+ *   Maximum number of memzones
+ * @return
+ *  0 on success, -1 otherwise
+ */
+__rte_experimental
+int rte_memzone_max_set(size_t max);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Get the maximum number of memzones.
+ *
+ * @note: The maximum value will not change after calling rte_eal_init().
+ *
+ * @return
+ *   Maximum number of memzones
+ */
+__rte_experimental
+size_t rte_memzone_max_get(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/version.map b/lib/eal/version.map
index 51a820d..b52a83c 100644
--- a/lib/eal/version.map
+++ b/lib/eal/version.map
@@ -430,6 +430,10 @@ EXPERIMENTAL {
 	rte_thread_create_control;
 	rte_thread_set_name;
 	__rte_eal_trace_generic_blob;
+
+	# added in 23.07
+	rte_memzone_max_set;
+	rte_memzone_max_get;
 };
 
 INTERNAL {
-- 
2.8.4


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH V8] lib: set/get max memzone segments
  2023-06-05  8:52           ` [PATCH V7] " Ophir Munk
@ 2023-06-05 10:50             ` Ophir Munk
  2023-06-05 16:50               ` Thomas Monjalon
  0 siblings, 1 reply; 35+ messages in thread
From: Ophir Munk @ 2023-06-05 10:50 UTC (permalink / raw)
  To: dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad, Maayan Kashani
  Cc: Ophir Munk, Matan Azrad, Thomas Monjalon, Lior Margalit,
	David Marchand, Anatoly Burakov

Currently, the max memzones count constat (RTE_MAX_MEMZONE) is used to
decide how many memzones a DPDK application can have. This value could
technically be changed by manually editing `rte_config.h` before
compilation, but if DPDK is already compiled, that option is not useful.
There are certain use cases that would benefit from making this value
configurable.

This commit addresses the issue by adding a new API to set the max
number of memzones before EAL initialization (while using the old
constant as default value), as well as an API to get current maximum
number of memzones.

Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Alok Prasad <palok@marvell.com>
---
V1-V5 updates based on review comments
V6 Style erorr fix
V7: rebased
V8: positioning changelog in the right place
 app/test/test_func_reentrancy.c     |  2 +-
 app/test/test_malloc_perf.c         |  2 +-
 app/test/test_memzone.c             | 42 ++++++++++++++++++++++-------------
 config/rte_config.h                 |  1 -
 drivers/net/qede/base/bcm_osal.c    | 38 ++++++++++++++++++++++++++------
 drivers/net/qede/base/bcm_osal.h    |  3 +++
 drivers/net/qede/qede_main.c        |  7 ++++++
 lib/eal/common/eal_common_memzone.c | 44 ++++++++++++++++++++++++++++++++++---
 lib/eal/common/eal_memcfg.h         |  2 ++
 lib/eal/include/rte_memzone.h       | 30 +++++++++++++++++++++++++
 lib/eal/version.map                 |  4 ++++
 11 files changed, 147 insertions(+), 28 deletions(-)

diff --git a/app/test/test_func_reentrancy.c b/app/test/test_func_reentrancy.c
index d1ed5d4..ae9de6f 100644
--- a/app/test/test_func_reentrancy.c
+++ b/app/test/test_func_reentrancy.c
@@ -51,7 +51,7 @@ typedef void (*case_clean_t)(unsigned lcore_id);
 #define MEMPOOL_ELT_SIZE                    (sizeof(uint32_t))
 #define MEMPOOL_SIZE                        (4)
 
-#define MAX_LCORES	(RTE_MAX_MEMZONE / (MAX_ITER_MULTI * 4U))
+#define MAX_LCORES	(rte_memzone_max_get() / (MAX_ITER_MULTI * 4U))
 
 static uint32_t obj_count;
 static uint32_t synchro;
diff --git a/app/test/test_malloc_perf.c b/app/test/test_malloc_perf.c
index ccec43a..9bd1662 100644
--- a/app/test/test_malloc_perf.c
+++ b/app/test/test_malloc_perf.c
@@ -165,7 +165,7 @@ test_malloc_perf(void)
 		return -1;
 
 	if (test_alloc_perf("rte_memzone_reserve", memzone_alloc, memzone_free,
-			NULL, memset_us_gb, RTE_MAX_MEMZONE - 1) < 0)
+			NULL, memset_us_gb, rte_memzone_max_get() - 1) < 0)
 		return -1;
 
 	return 0;
diff --git a/app/test/test_memzone.c b/app/test/test_memzone.c
index c9255e5..f10f4fd 100644
--- a/app/test/test_memzone.c
+++ b/app/test/test_memzone.c
@@ -871,9 +871,17 @@ test_memzone_bounded(void)
 static int
 test_memzone_free(void)
 {
-	const struct rte_memzone *mz[RTE_MAX_MEMZONE + 1];
+	const struct rte_memzone **mz;
 	int i;
 	char name[20];
+	int rc = -1;
+
+	mz = rte_calloc("memzone_test", rte_memzone_max_get() + 1,
+			sizeof(struct rte_memzone *), 0);
+	if (!mz) {
+		printf("Fail allocating memzone test array\n");
+		return rc;
+	}
 
 	mz[0] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone0"), 2000,
 			SOCKET_ID_ANY, 0);
@@ -881,42 +889,42 @@ test_memzone_free(void)
 			SOCKET_ID_ANY, 0);
 
 	if (mz[0] > mz[1])
-		return -1;
+		goto exit_test;
 	if (!rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone0")))
-		return -1;
+		goto exit_test;
 	if (!rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone1")))
-		return -1;
+		goto exit_test;
 
 	if (rte_memzone_free(mz[0])) {
 		printf("Fail memzone free - tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone0"))) {
 		printf("Found previously free memzone - tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	mz[2] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone2"), 2000,
 			SOCKET_ID_ANY, 0);
 
 	if (mz[2] > mz[1]) {
 		printf("tempzone2 should have gotten the free entry from tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_free(mz[2])) {
 		printf("Fail memzone free - tempzone2\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone2"))) {
 		printf("Found previously free memzone - tempzone2\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_free(mz[1])) {
 		printf("Fail memzone free - tempzone1\n");
-		return -1;
+		goto exit_test;
 	}
 	if (rte_memzone_lookup(TEST_MEMZONE_NAME("tempzone1"))) {
 		printf("Found previously free memzone - tempzone1\n");
-		return -1;
+		goto exit_test;
 	}
 
 	i = 0;
@@ -928,7 +936,7 @@ test_memzone_free(void)
 
 	if (rte_memzone_free(mz[0])) {
 		printf("Fail memzone free - tempzone0\n");
-		return -1;
+		goto exit_test;
 	}
 	mz[0] = rte_memzone_reserve(TEST_MEMZONE_NAME("tempzone0new"), 0,
 			SOCKET_ID_ANY, 0);
@@ -936,17 +944,21 @@ test_memzone_free(void)
 	if (mz[0] == NULL) {
 		printf("Fail to create memzone - tempzone0new - when MAX memzones were "
 				"created and one was free\n");
-		return -1;
+		goto exit_test;
 	}
 
 	for (i = i - 2; i >= 0; i--) {
 		if (rte_memzone_free(mz[i])) {
 			printf("Fail memzone free - tempzone%d\n", i);
-			return -1;
+			goto exit_test;
 		}
 	}
 
-	return 0;
+	rc = 0;
+
+exit_test:
+	rte_free(mz);
+	return rc;
 }
 
 static int test_memzones_left;
diff --git a/config/rte_config.h b/config/rte_config.h
index 7b8c85e..400e44e 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -34,7 +34,6 @@
 #define RTE_MAX_MEM_MB_PER_LIST 32768
 #define RTE_MAX_MEMSEG_PER_TYPE 32768
 #define RTE_MAX_MEM_MB_PER_TYPE 65536
-#define RTE_MAX_MEMZONE 2560
 #define RTE_MAX_TAILQ 32
 #define RTE_LOG_DP_LEVEL RTE_LOG_INFO
 #define RTE_MAX_VFIO_CONTAINERS 64
diff --git a/drivers/net/qede/base/bcm_osal.c b/drivers/net/qede/base/bcm_osal.c
index 2c59397..638d006 100644
--- a/drivers/net/qede/base/bcm_osal.c
+++ b/drivers/net/qede/base/bcm_osal.c
@@ -47,10 +47,34 @@ void osal_poll_mode_dpc(osal_int_ptr_t hwfn_cookie)
 }
 
 /* Array of memzone pointers */
-static const struct rte_memzone *ecore_mz_mapping[RTE_MAX_MEMZONE];
+static const struct rte_memzone **ecore_mz_mapping;
 /* Counter to track current memzone allocated */
 static uint16_t ecore_mz_count;
 
+static uint32_t ref_cnt;
+
+int ecore_mz_mapping_alloc(void)
+{
+	if (__atomic_fetch_add(&ref_cnt, 1, __ATOMIC_RELAXED) == 0) {
+		ecore_mz_mapping = rte_calloc("ecore_mz_map",
+				rte_memzone_max_get(), sizeof(struct rte_memzone *), 0);
+	}
+
+	if (!ecore_mz_mapping)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void ecore_mz_mapping_free(void)
+{
+	if (__atomic_fetch_sub(&ref_cnt, 1, __ATOMIC_RELAXED) - 1 == 0) {
+		if (ecore_mz_mapping)
+			rte_free(ecore_mz_mapping);
+		ecore_mz_mapping = NULL;
+	}
+}
+
 unsigned long qede_log2_align(unsigned long n)
 {
 	unsigned long ret = n ? 1 : 0;
@@ -132,9 +156,9 @@ void *osal_dma_alloc_coherent(struct ecore_dev *p_dev,
 	uint32_t core_id = rte_lcore_id();
 	unsigned int socket_id;
 
-	if (ecore_mz_count >= RTE_MAX_MEMZONE) {
-		DP_ERR(p_dev, "Memzone allocation count exceeds %u\n",
-		       RTE_MAX_MEMZONE);
+	if (ecore_mz_count >= rte_memzone_max_get()) {
+		DP_ERR(p_dev, "Memzone allocation count exceeds %zu\n",
+		       rte_memzone_max_get());
 		*phys = 0;
 		return OSAL_NULL;
 	}
@@ -171,9 +195,9 @@ void *osal_dma_alloc_coherent_aligned(struct ecore_dev *p_dev,
 	uint32_t core_id = rte_lcore_id();
 	unsigned int socket_id;
 
-	if (ecore_mz_count >= RTE_MAX_MEMZONE) {
-		DP_ERR(p_dev, "Memzone allocation count exceeds %u\n",
-		       RTE_MAX_MEMZONE);
+	if (ecore_mz_count >= rte_memzone_max_get()) {
+		DP_ERR(p_dev, "Memzone allocation count exceeds %zu\n",
+		       rte_memzone_max_get());
 		*phys = 0;
 		return OSAL_NULL;
 	}
diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 67e7f75..97e261d 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -477,4 +477,7 @@ enum dbg_status	qed_dbg_alloc_user_data(struct ecore_hwfn *p_hwfn,
 	qed_dbg_alloc_user_data(p_hwfn, user_data_ptr)
 #define OSAL_DB_REC_OCCURRED(p_hwfn) nothing
 
+int ecore_mz_mapping_alloc(void);
+void ecore_mz_mapping_free(void);
+
 #endif /* __BCM_OSAL_H */
diff --git a/drivers/net/qede/qede_main.c b/drivers/net/qede/qede_main.c
index 0303903..fd63262 100644
--- a/drivers/net/qede/qede_main.c
+++ b/drivers/net/qede/qede_main.c
@@ -72,6 +72,12 @@ qed_probe(struct ecore_dev *edev, struct rte_pci_device *pci_dev,
 	hw_prepare_params.allow_mdump = false;
 	hw_prepare_params.b_en_pacing = false;
 	hw_prepare_params.epoch = OSAL_GET_EPOCH(ECORE_LEADING_HWFN(edev));
+	rc = ecore_mz_mapping_alloc();
+	if (rc) {
+		DP_ERR(edev, "mem zones array allocation failed\n");
+		return rc;
+	}
+
 	rc = ecore_hw_prepare(edev, &hw_prepare_params);
 	if (rc) {
 		DP_ERR(edev, "hw prepare failed\n");
@@ -722,6 +728,7 @@ static void qed_remove(struct ecore_dev *edev)
 		return;
 
 	ecore_hw_remove(edev);
+	ecore_mz_mapping_free();
 }
 
 static int qed_send_drv_state(struct ecore_dev *edev, bool active)
diff --git a/lib/eal/common/eal_common_memzone.c b/lib/eal/common/eal_common_memzone.c
index a9cd91f..d024410 100644
--- a/lib/eal/common/eal_common_memzone.c
+++ b/lib/eal/common/eal_common_memzone.c
@@ -22,6 +22,9 @@
 #include "eal_private.h"
 #include "eal_memcfg.h"
 
+/* Default count used until rte_memzone_max_set() is called */
+#define DEFAULT_MAX_MEMZONE_COUNT 2560
+
 static inline const struct rte_memzone *
 memzone_lookup_thread_unsafe(const char *name)
 {
@@ -81,8 +84,9 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
 	/* no more room in config */
 	if (arr->count >= arr->len) {
 		RTE_LOG(ERR, EAL,
-		"%s(): Number of requested memzone segments exceeds RTE_MAX_MEMZONE\n",
-			__func__);
+		"%s(): Number of requested memzone segments exceeds maximum "
+		"%u\n", __func__, arr->len);
+
 		rte_errno = ENOSPC;
 		return NULL;
 	}
@@ -396,7 +400,7 @@ rte_eal_memzone_init(void)
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
 			rte_fbarray_init(&mcfg->memzones, "memzone",
-			RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) {
+			rte_memzone_max_get(), sizeof(struct rte_memzone))) {
 		RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n");
 		ret = -1;
 	} else if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
@@ -430,3 +434,37 @@ void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
 	}
 	rte_rwlock_read_unlock(&mcfg->mlock);
 }
+
+int
+rte_memzone_max_set(size_t max)
+{
+	struct rte_mem_config *mcfg;
+
+	if (eal_get_internal_configuration()->init_complete > 0) {
+		RTE_LOG(ERR, EAL, "Max memzone cannot be set after calling "
+				  "eal init\n");
+		return -1;
+	}
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+	if (mcfg == NULL) {
+		RTE_LOG(ERR, EAL, "Failed to set max memzone\n");
+		return -1;
+	}
+
+	mcfg->max_memzone = max;
+
+	return 0;
+}
+
+size_t
+rte_memzone_max_get(void)
+{
+	struct rte_mem_config *mcfg;
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+	if (mcfg == NULL || mcfg->max_memzone == 0)
+		return DEFAULT_MAX_MEMZONE_COUNT;
+
+	return mcfg->max_memzone;
+}
diff --git a/lib/eal/common/eal_memcfg.h b/lib/eal/common/eal_memcfg.h
index ea013a9..8889ba0 100644
--- a/lib/eal/common/eal_memcfg.h
+++ b/lib/eal/common/eal_memcfg.h
@@ -75,6 +75,8 @@ struct rte_mem_config {
 	/**< TSC rate */
 
 	uint8_t dma_maskbits; /**< Keeps the more restricted dma mask. */
+
+	size_t max_memzone; /**< Maximum number of allocated memzones. */
 };
 
 /* update internal config from shared mem config */
diff --git a/lib/eal/include/rte_memzone.h b/lib/eal/include/rte_memzone.h
index 5302caa..44dcd22 100644
--- a/lib/eal/include/rte_memzone.h
+++ b/lib/eal/include/rte_memzone.h
@@ -305,6 +305,36 @@ void rte_memzone_dump(FILE *f);
 void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *arg),
 		      void *arg);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Set max memzone count.
+ *
+ * This function can only be called prior to rte_eal_init().
+ *
+ * @param max
+ *   Maximum number of memzones
+ * @return
+ *  0 on success, -1 otherwise
+ */
+__rte_experimental
+int rte_memzone_max_set(size_t max);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Get the maximum number of memzones.
+ *
+ * @note: The maximum value will not change after calling rte_eal_init().
+ *
+ * @return
+ *   Maximum number of memzones
+ */
+__rte_experimental
+size_t rte_memzone_max_get(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/eal/version.map b/lib/eal/version.map
index 51a820d..b52a83c 100644
--- a/lib/eal/version.map
+++ b/lib/eal/version.map
@@ -430,6 +430,10 @@ EXPERIMENTAL {
 	rte_thread_create_control;
 	rte_thread_set_name;
 	__rte_eal_trace_generic_blob;
+
+	# added in 23.07
+	rte_memzone_max_set;
+	rte_memzone_max_get;
 };
 
 INTERNAL {
-- 
2.8.4


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH V8] lib: set/get max memzone segments
  2023-06-05 10:50             ` [PATCH V8] " Ophir Munk
@ 2023-06-05 16:50               ` Thomas Monjalon
  0 siblings, 0 replies; 35+ messages in thread
From: Thomas Monjalon @ 2023-06-05 16:50 UTC (permalink / raw)
  To: Ophir Munk
  Cc: dev, Bruce Richardson, Devendra Singh Rawat, Alok Prasad,
	Maayan Kashani, Matan Azrad, Lior Margalit, David Marchand,
	Anatoly Burakov

05/06/2023 12:50, Ophir Munk:
> Currently, the max memzones count constat (RTE_MAX_MEMZONE) is used to
> decide how many memzones a DPDK application can have. This value could
> technically be changed by manually editing `rte_config.h` before
> compilation, but if DPDK is already compiled, that option is not useful.
> There are certain use cases that would benefit from making this value
> configurable.
> 
> This commit addresses the issue by adding a new API to set the max
> number of memzones before EAL initialization (while using the old
> constant as default value), as well as an API to get current maximum
> number of memzones.
> 
> Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> Acked-by: Alok Prasad <palok@marvell.com>

Did minor changes in comments and function sorting.
Applied, thanks.




^ permalink raw reply	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2023-06-05 16:51 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-19  8:36 [RFC] lib: set/get max memzone segments Ophir Munk
2023-04-19  8:48 ` Ophir Munk
2023-04-19 13:42 ` [EXT] " Devendra Singh Rawat
2023-04-24 21:07   ` Ophir Munk
2023-04-19 14:42 ` Stephen Hemminger
2023-04-24 21:43   ` Ophir Munk
2023-04-19 14:51 ` Tyler Retzlaff
2023-04-20  7:43   ` Thomas Monjalon
2023-04-20 18:20     ` Tyler Retzlaff
2023-04-21  8:34       ` Thomas Monjalon
2023-04-21 11:08         ` Morten Brørup
2023-04-21 14:57           ` Thomas Monjalon
2023-04-21 15:19             ` Morten Brørup
2023-04-25 16:38               ` Ophir Munk
2023-04-25 13:46   ` Ophir Munk
2023-04-25 16:40 ` [RFC V2] " Ophir Munk
2023-05-03  7:26   ` [PATCH V3] " Ophir Munk
2023-05-03 21:41     ` Morten Brørup
2023-05-25  6:47       ` Ophir Munk
2023-05-04  7:27     ` David Marchand
2023-05-25  6:35       ` Ophir Munk
2023-05-18 15:54     ` Burakov, Anatoly
2023-05-25  6:43       ` Ophir Munk
2023-05-24 22:25     ` [PATCH v4] " Ophir Munk
2023-05-25 14:53       ` Burakov, Anatoly
2023-05-30 11:37         ` Ophir Munk
2023-05-26  9:55       ` David Marchand
2023-05-28 12:09         ` [EXT] " Alok Prasad
2023-05-30 13:32       ` Thomas Monjalon
2023-05-31  7:56         ` Ophir Munk
2023-05-31  7:52       ` [PATCH V5] " Ophir Munk
2023-05-31  8:41         ` [PATCH V6] " Ophir Munk
2023-06-05  8:52           ` [PATCH V7] " Ophir Munk
2023-06-05 10:50             ` [PATCH V8] " Ophir Munk
2023-06-05 16:50               ` Thomas Monjalon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).