* [PATCH] mempool perf test: test random bulk sizes
@ 2025-02-28 16:48 Morten Brørup
2025-03-13 8:23 ` Morten Brørup
` (4 more replies)
0 siblings, 5 replies; 22+ messages in thread
From: Morten Brørup @ 2025-02-28 16:48 UTC (permalink / raw)
To: Andrew Rybchenko, Bruce Richardson, dev; +Cc: Morten Brørup
Bulk requests to get or put objects in a mempool often vary in size.
A series of tests with pseudo random request sizes, to mitigate the
benefits of the CPU's dynamic branch predictor, was added.
Also, various other minor changes:
- Improved the output formatting for readability.
- Added test for the "default" mempool with cache.
- Skip the tests for the "default" mempool, if it happens to use the same
driver (i.e. operations) as already tested.
- Replaced bare use of "unsigned" with "unsigned int",
to make checkpatches happy.
Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
---
app/test/test_mempool_perf.c | 219 +++++++++++++++++++++++++++--------
1 file changed, 172 insertions(+), 47 deletions(-)
diff --git a/app/test/test_mempool_perf.c b/app/test/test_mempool_perf.c
index 4dd74ef75a..5e29797f02 100644
--- a/app/test/test_mempool_perf.c
+++ b/app/test/test_mempool_perf.c
@@ -33,6 +33,13 @@
* Mempool performance
* =======
*
+ * Each core get *n_keep* objects per bulk of a pseudorandom number
+ * between 1 and *n_max_bulk*.
+ * Objects are put back in the pool per bulk of a similar pseudorandom number.
+ * Note: The very low entropy of the randomization algorithm is harmless, because
+ * the sole purpose of randomization is to prevent the CPU's dynamic branch
+ * predictor from enhancing the test results.
+ *
* Each core get *n_keep* objects per bulk of *n_get_bulk*. Then,
* objects are put back in the pool per bulk of *n_put_bulk*.
*
@@ -52,7 +59,12 @@
* - Two cores with user-owned cache
* - Max. cores with user-owned cache
*
- * - Bulk size (*n_get_bulk*, *n_put_bulk*)
+ * - Pseudorandom max bulk size (*n_max_bulk*)
+ *
+ * - Max bulk from CACHE_LINE_BURST to 256, and RTE_MEMPOOL_CACHE_MAX_SIZE,
+ * where CACHE_LINE_BURST is the number of pointers fitting into one CPU cache line.
+ *
+ * - Fixed bulk size (*n_get_bulk*, *n_put_bulk*)
*
* - Bulk get from 1 to 256, and RTE_MEMPOOL_CACHE_MAX_SIZE
* - Bulk put from 1 to 256, and RTE_MEMPOOL_CACHE_MAX_SIZE
@@ -89,16 +101,19 @@
} while (0)
static int use_external_cache;
-static unsigned external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
+static unsigned int external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
static RTE_ATOMIC(uint32_t) synchro;
+/* max random number of objects in one bulk operation (get and put) */
+static unsigned int n_max_bulk;
+
/* number of objects in one bulk operation (get or put) */
-static unsigned n_get_bulk;
-static unsigned n_put_bulk;
+static unsigned int n_get_bulk;
+static unsigned int n_put_bulk;
/* number of objects retrieved from mempool before putting them back */
-static unsigned n_keep;
+static unsigned int n_keep;
/* true if we want to test with constant n_get_bulk and n_put_bulk */
static int use_constant_values;
@@ -118,7 +133,7 @@ static struct mempool_test_stats stats[RTE_MAX_LCORE];
*/
static void
my_obj_init(struct rte_mempool *mp, __rte_unused void *arg,
- void *obj, unsigned i)
+ void *obj, unsigned int i)
{
uint32_t *objnum = obj;
memset(obj, 0, mp->elt_size);
@@ -159,11 +174,55 @@ test_loop(struct rte_mempool *mp, struct rte_mempool_cache *cache,
return 0;
}
+static __rte_always_inline int
+test_loop_random(struct rte_mempool *mp, struct rte_mempool_cache *cache,
+ unsigned int x_keep, unsigned int x_max_bulk)
+{
+ alignas(RTE_CACHE_LINE_SIZE) void *obj_table[MAX_KEEP];
+ unsigned int idx;
+ unsigned int i;
+ unsigned int r = 0;
+ unsigned int x_bulk;
+ int ret;
+
+ for (i = 0; likely(i < (N / x_keep)); i++) {
+ /* get x_keep objects by bulk of random [1 .. x_max_bulk] */
+ for (idx = 0; idx < x_keep; idx += x_bulk, r++) {
+ /* Generate a pseudorandom number [1 .. x_max_bulk]. */
+ x_bulk = ((r ^ (r >> 2) ^ (r << 3)) & (x_max_bulk - 1)) + 1;
+ if (unlikely(idx + x_bulk > x_keep))
+ x_bulk = x_keep - idx;
+ ret = rte_mempool_generic_get(mp,
+ &obj_table[idx],
+ x_bulk,
+ cache);
+ if (unlikely(ret < 0)) {
+ rte_mempool_dump(stdout, mp);
+ return ret;
+ }
+ }
+
+ /* put the objects back by bulk of random [1 .. x_max_bulk] */
+ for (idx = 0; idx < x_keep; idx += x_bulk, r++) {
+ /* Generate a pseudorandom number [1 .. x_max_bulk]. */
+ x_bulk = ((r ^ (r >> 2) ^ (r << 3)) & (x_max_bulk - 1)) + 1;
+ if (unlikely(idx + x_bulk > x_keep))
+ x_bulk = x_keep - idx;
+ rte_mempool_generic_put(mp,
+ &obj_table[idx],
+ x_bulk,
+ cache);
+ }
+ }
+
+ return 0;
+}
+
static int
per_lcore_mempool_test(void *arg)
{
struct rte_mempool *mp = arg;
- unsigned lcore_id = rte_lcore_id();
+ unsigned int lcore_id = rte_lcore_id();
int ret = 0;
uint64_t start_cycles, end_cycles;
uint64_t time_diff = 0, hz = rte_get_timer_hz();
@@ -181,9 +240,9 @@ per_lcore_mempool_test(void *arg)
}
/* n_get_bulk and n_put_bulk must be divisors of n_keep */
- if (((n_keep / n_get_bulk) * n_get_bulk) != n_keep)
+ if (!n_max_bulk && (((n_keep / n_get_bulk) * n_get_bulk) != n_keep))
GOTO_ERR(ret, out);
- if (((n_keep / n_put_bulk) * n_put_bulk) != n_keep)
+ if (!n_max_bulk && (((n_keep / n_put_bulk) * n_put_bulk) != n_keep))
GOTO_ERR(ret, out);
/* for constant n, n_get_bulk and n_put_bulk must be the same */
if (use_constant_values && n_put_bulk != n_get_bulk)
@@ -200,7 +259,9 @@ per_lcore_mempool_test(void *arg)
start_cycles = rte_get_timer_cycles();
while (time_diff/hz < TIME_S) {
- if (!use_constant_values)
+ if (n_max_bulk)
+ ret = test_loop_random(mp, cache, n_keep, n_max_bulk);
+ else if (!use_constant_values)
ret = test_loop(mp, cache, n_keep, n_get_bulk, n_put_bulk);
else if (n_get_bulk == 1)
ret = test_loop(mp, cache, n_keep, 1, 1);
@@ -246,10 +307,10 @@ per_lcore_mempool_test(void *arg)
static int
launch_cores(struct rte_mempool *mp, unsigned int cores)
{
- unsigned lcore_id;
+ unsigned int lcore_id;
uint64_t rate;
int ret;
- unsigned cores_save = cores;
+ unsigned int cores_save = cores;
double hz = rte_get_timer_hz();
rte_atomic_store_explicit(&synchro, 0, rte_memory_order_relaxed);
@@ -257,11 +318,18 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
/* reset stats */
memset(stats, 0, sizeof(stats));
- printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u "
- "n_put_bulk=%u n_keep=%u constant_n=%u ",
+ printf("mempool_autotest cache=%u cores=%u n_keep=%5u ",
use_external_cache ?
external_cache_size : (unsigned) mp->cache_size,
- cores, n_get_bulk, n_put_bulk, n_keep, use_constant_values);
+ cores,
+ n_keep);
+ if (n_max_bulk)
+ printf("n_max_bulk=%3u ",
+ n_max_bulk);
+ else
+ printf("n_get_bulk=%3u n_put_bulk=%3u constant_n=%u ",
+ n_get_bulk, n_put_bulk,
+ use_constant_values);
if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) {
printf("mempool is not full\n");
@@ -301,7 +369,7 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
rate += (double)stats[lcore_id].enq_count * hz /
(double)stats[lcore_id].duration_cycles;
- printf("rate_persec=%" PRIu64 "\n", rate);
+ printf("rate_persec=%10" PRIu64 "\n", rate);
return 0;
}
@@ -310,25 +378,47 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
static int
do_one_mempool_test(struct rte_mempool *mp, unsigned int cores, int external_cache)
{
+ unsigned int bulk_tab_max[] = { CACHE_LINE_BURST, 32, 64, 128, 256,
+ RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
unsigned int bulk_tab_get[] = { 1, 4, CACHE_LINE_BURST, 32, 64, 128, 256,
RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
unsigned int bulk_tab_put[] = { 1, 4, CACHE_LINE_BURST, 32, 64, 128, 256,
RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
unsigned int keep_tab[] = { 32, 128, 512, 2048, 8192, 32768, 0 };
- unsigned *get_bulk_ptr;
- unsigned *put_bulk_ptr;
- unsigned *keep_ptr;
+ unsigned int *max_bulk_ptr;
+ unsigned int *get_bulk_ptr;
+ unsigned int *put_bulk_ptr;
+ unsigned int *keep_ptr;
int ret;
- for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
- for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) {
- for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
+ for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
+ for (max_bulk_ptr = bulk_tab_max; *max_bulk_ptr; max_bulk_ptr++) {
+
+ if (*keep_ptr < *max_bulk_ptr)
+ continue;
+
+ use_external_cache = external_cache;
+ use_constant_values = 0;
+ n_max_bulk = *max_bulk_ptr;
+ n_get_bulk = 0;
+ n_put_bulk = 0;
+ n_keep = *keep_ptr;
+ ret = launch_cores(mp, cores);
+ if (ret < 0)
+ return -1;
+ }
+ }
+
+ for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
+ for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
+ for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) {
if (*keep_ptr < *get_bulk_ptr || *keep_ptr < *put_bulk_ptr)
continue;
use_external_cache = external_cache;
use_constant_values = 0;
+ n_max_bulk = 0;
n_get_bulk = *get_bulk_ptr;
n_put_bulk = *put_bulk_ptr;
n_keep = *keep_ptr;
@@ -346,6 +436,7 @@ do_one_mempool_test(struct rte_mempool *mp, unsigned int cores, int external_cac
}
}
}
+
return 0;
}
@@ -354,7 +445,10 @@ do_all_mempool_perf_tests(unsigned int cores)
{
struct rte_mempool *mp_cache = NULL;
struct rte_mempool *mp_nocache = NULL;
- struct rte_mempool *default_pool = NULL;
+ struct rte_mempool *default_pool_cache = NULL;
+ struct rte_mempool *default_pool_nocache = NULL;
+ const char *mp_cache_ops;
+ const char *mp_nocache_ops;
const char *default_pool_ops;
int ret = -1;
@@ -368,6 +462,7 @@ do_all_mempool_perf_tests(unsigned int cores)
printf("cannot allocate mempool (without cache)\n");
goto err;
}
+ mp_nocache_ops = rte_mempool_get_ops(mp_nocache->ops_index)->name;
/* create a mempool (with cache) */
mp_cache = rte_mempool_create("perf_test_cache", MEMPOOL_SIZE,
@@ -380,47 +475,76 @@ do_all_mempool_perf_tests(unsigned int cores)
printf("cannot allocate mempool (with cache)\n");
goto err;
}
+ mp_cache_ops = rte_mempool_get_ops(mp_cache->ops_index)->name;
default_pool_ops = rte_mbuf_best_mempool_ops();
- /* Create a mempool based on Default handler */
- default_pool = rte_mempool_create_empty("default_pool",
- MEMPOOL_SIZE,
- MEMPOOL_ELT_SIZE,
- 0, 0,
- SOCKET_ID_ANY, 0);
-
- if (default_pool == NULL) {
- printf("cannot allocate %s mempool\n", default_pool_ops);
+
+ /* Create a mempool (without cache) based on Default handler */
+ default_pool_nocache = rte_mempool_create_empty("default_pool_nocache",
+ MEMPOOL_SIZE,
+ MEMPOOL_ELT_SIZE,
+ 0, 0,
+ SOCKET_ID_ANY, 0);
+ if (default_pool_nocache == NULL) {
+ printf("cannot allocate %s mempool (without cache)\n", default_pool_ops);
goto err;
}
-
- if (rte_mempool_set_ops_byname(default_pool, default_pool_ops, NULL)
- < 0) {
+ if (rte_mempool_set_ops_byname(default_pool_nocache, default_pool_ops, NULL) < 0) {
printf("cannot set %s handler\n", default_pool_ops);
goto err;
}
-
- if (rte_mempool_populate_default(default_pool) < 0) {
+ if (rte_mempool_populate_default(default_pool_nocache) < 0) {
printf("cannot populate %s mempool\n", default_pool_ops);
goto err;
}
+ rte_mempool_obj_iter(default_pool_nocache, my_obj_init, NULL);
+
+ /* Create a mempool (with cache) based on Default handler */
+ default_pool_cache = rte_mempool_create_empty("default_pool_cache",
+ MEMPOOL_SIZE,
+ MEMPOOL_ELT_SIZE,
+ RTE_MEMPOOL_CACHE_MAX_SIZE, 0,
+ SOCKET_ID_ANY, 0);
+ if (default_pool_cache == NULL) {
+ printf("cannot allocate %s mempool (with cache)\n", default_pool_ops);
+ goto err;
+ }
+ if (rte_mempool_set_ops_byname(default_pool_cache, default_pool_ops, NULL) < 0) {
+ printf("cannot set %s handler\n", default_pool_ops);
+ goto err;
+ }
+ if (rte_mempool_populate_default(default_pool_cache) < 0) {
+ printf("cannot populate %s mempool\n", default_pool_ops);
+ goto err;
+ }
+ rte_mempool_obj_iter(default_pool_cache, my_obj_init, NULL);
- rte_mempool_obj_iter(default_pool, my_obj_init, NULL);
-
- printf("start performance test (without cache)\n");
+ printf("start performance test (using %s, without cache)\n",
+ mp_nocache_ops);
if (do_one_mempool_test(mp_nocache, cores, 0) < 0)
goto err;
- printf("start performance test for %s (without cache)\n",
- default_pool_ops);
- if (do_one_mempool_test(default_pool, cores, 0) < 0)
- goto err;
+ if (strcmp(default_pool_ops, mp_nocache_ops) != 0) {
+ printf("start performance test for %s (without cache)\n",
+ default_pool_ops);
+ if (do_one_mempool_test(default_pool_nocache, cores, 0) < 0)
+ goto err;
+ }
- printf("start performance test (with cache)\n");
+ printf("start performance test (using %s, with cache)\n",
+ mp_cache_ops);
if (do_one_mempool_test(mp_cache, cores, 0) < 0)
goto err;
- printf("start performance test (with user-owned cache)\n");
+ if (strcmp(default_pool_ops, mp_cache_ops) != 0) {
+ printf("start performance test for %s (with cache)\n",
+ default_pool_ops);
+ if (do_one_mempool_test(default_pool_cache, cores, 0) < 0)
+ goto err;
+ }
+
+ printf("start performance test (using %s, with user-owned cache)\n",
+ mp_nocache_ops);
if (do_one_mempool_test(mp_nocache, cores, 1) < 0)
goto err;
@@ -431,7 +555,8 @@ do_all_mempool_perf_tests(unsigned int cores)
err:
rte_mempool_free(mp_cache);
rte_mempool_free(mp_nocache);
- rte_mempool_free(default_pool);
+ rte_mempool_free(default_pool_cache);
+ rte_mempool_free(default_pool_nocache);
return ret;
}
--
2.43.0
^ permalink raw reply [flat|nested] 22+ messages in thread
* RE: [PATCH] mempool perf test: test random bulk sizes
2025-02-28 16:48 [PATCH] mempool perf test: test random bulk sizes Morten Brørup
@ 2025-03-13 8:23 ` Morten Brørup
2025-03-25 7:15 ` Morten Brørup
2025-03-30 8:29 ` Andrew Rybchenko
` (3 subsequent siblings)
4 siblings, 1 reply; 22+ messages in thread
From: Morten Brørup @ 2025-03-13 8:23 UTC (permalink / raw)
To: Andrew Rybchenko, Bruce Richardson, dev, Artem V. Andreev,
Ashwin Sekhar T K, Pavan Nikhilesh, Harman Kalra, Hemant Agrawal,
Sachin Saxena
PING for review.
This could still make it into 25.03-rc3 (deadline: 14 March 2025).
Med venlig hilsen / Kind regards,
-Morten Brørup
> -----Original Message-----
> From: Morten Brørup [mailto:mb@smartsharesystems.com]
> Sent: Friday, 28 February 2025 17.49
> To: Andrew Rybchenko; Bruce Richardson; dev@dpdk.org
> Cc: Morten Brørup
> Subject: [PATCH] mempool perf test: test random bulk sizes
>
> Bulk requests to get or put objects in a mempool often vary in size.
> A series of tests with pseudo random request sizes, to mitigate the
> benefits of the CPU's dynamic branch predictor, was added.
>
> Also, various other minor changes:
> - Improved the output formatting for readability.
> - Added test for the "default" mempool with cache.
> - Skip the tests for the "default" mempool, if it happens to use the
> same
> driver (i.e. operations) as already tested.
> - Replaced bare use of "unsigned" with "unsigned int",
> to make checkpatches happy.
>
> Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> ---
> app/test/test_mempool_perf.c | 219 +++++++++++++++++++++++++++--------
> 1 file changed, 172 insertions(+), 47 deletions(-)
>
> diff --git a/app/test/test_mempool_perf.c
> b/app/test/test_mempool_perf.c
> index 4dd74ef75a..5e29797f02 100644
> --- a/app/test/test_mempool_perf.c
> +++ b/app/test/test_mempool_perf.c
> @@ -33,6 +33,13 @@
> * Mempool performance
> * =======
> *
> + * Each core get *n_keep* objects per bulk of a pseudorandom number
> + * between 1 and *n_max_bulk*.
> + * Objects are put back in the pool per bulk of a similar
> pseudorandom number.
> + * Note: The very low entropy of the randomization algorithm is
> harmless, because
> + * the sole purpose of randomization is to prevent the CPU's
> dynamic branch
> + * predictor from enhancing the test results.
> + *
> * Each core get *n_keep* objects per bulk of *n_get_bulk*. Then,
> * objects are put back in the pool per bulk of *n_put_bulk*.
> *
> @@ -52,7 +59,12 @@
> * - Two cores with user-owned cache
> * - Max. cores with user-owned cache
> *
> - * - Bulk size (*n_get_bulk*, *n_put_bulk*)
> + * - Pseudorandom max bulk size (*n_max_bulk*)
> + *
> + * - Max bulk from CACHE_LINE_BURST to 256, and
> RTE_MEMPOOL_CACHE_MAX_SIZE,
> + * where CACHE_LINE_BURST is the number of pointers fitting
> into one CPU cache line.
> + *
> + * - Fixed bulk size (*n_get_bulk*, *n_put_bulk*)
> *
> * - Bulk get from 1 to 256, and RTE_MEMPOOL_CACHE_MAX_SIZE
> * - Bulk put from 1 to 256, and RTE_MEMPOOL_CACHE_MAX_SIZE
> @@ -89,16 +101,19 @@
> } while (0)
>
> static int use_external_cache;
> -static unsigned external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
> +static unsigned int external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
>
> static RTE_ATOMIC(uint32_t) synchro;
>
> +/* max random number of objects in one bulk operation (get and put) */
> +static unsigned int n_max_bulk;
> +
> /* number of objects in one bulk operation (get or put) */
> -static unsigned n_get_bulk;
> -static unsigned n_put_bulk;
> +static unsigned int n_get_bulk;
> +static unsigned int n_put_bulk;
>
> /* number of objects retrieved from mempool before putting them back
> */
> -static unsigned n_keep;
> +static unsigned int n_keep;
>
> /* true if we want to test with constant n_get_bulk and n_put_bulk */
> static int use_constant_values;
> @@ -118,7 +133,7 @@ static struct mempool_test_stats
> stats[RTE_MAX_LCORE];
> */
> static void
> my_obj_init(struct rte_mempool *mp, __rte_unused void *arg,
> - void *obj, unsigned i)
> + void *obj, unsigned int i)
> {
> uint32_t *objnum = obj;
> memset(obj, 0, mp->elt_size);
> @@ -159,11 +174,55 @@ test_loop(struct rte_mempool *mp, struct
> rte_mempool_cache *cache,
> return 0;
> }
>
> +static __rte_always_inline int
> +test_loop_random(struct rte_mempool *mp, struct rte_mempool_cache
> *cache,
> + unsigned int x_keep, unsigned int x_max_bulk)
> +{
> + alignas(RTE_CACHE_LINE_SIZE) void *obj_table[MAX_KEEP];
> + unsigned int idx;
> + unsigned int i;
> + unsigned int r = 0;
> + unsigned int x_bulk;
> + int ret;
> +
> + for (i = 0; likely(i < (N / x_keep)); i++) {
> + /* get x_keep objects by bulk of random [1 .. x_max_bulk]
> */
> + for (idx = 0; idx < x_keep; idx += x_bulk, r++) {
> + /* Generate a pseudorandom number [1 .. x_max_bulk].
> */
> + x_bulk = ((r ^ (r >> 2) ^ (r << 3)) & (x_max_bulk -
> 1)) + 1;
> + if (unlikely(idx + x_bulk > x_keep))
> + x_bulk = x_keep - idx;
> + ret = rte_mempool_generic_get(mp,
> + &obj_table[idx],
> + x_bulk,
> + cache);
> + if (unlikely(ret < 0)) {
> + rte_mempool_dump(stdout, mp);
> + return ret;
> + }
> + }
> +
> + /* put the objects back by bulk of random [1 .. x_max_bulk]
> */
> + for (idx = 0; idx < x_keep; idx += x_bulk, r++) {
> + /* Generate a pseudorandom number [1 .. x_max_bulk].
> */
> + x_bulk = ((r ^ (r >> 2) ^ (r << 3)) & (x_max_bulk -
> 1)) + 1;
> + if (unlikely(idx + x_bulk > x_keep))
> + x_bulk = x_keep - idx;
> + rte_mempool_generic_put(mp,
> + &obj_table[idx],
> + x_bulk,
> + cache);
> + }
> + }
> +
> + return 0;
> +}
> +
> static int
> per_lcore_mempool_test(void *arg)
> {
> struct rte_mempool *mp = arg;
> - unsigned lcore_id = rte_lcore_id();
> + unsigned int lcore_id = rte_lcore_id();
> int ret = 0;
> uint64_t start_cycles, end_cycles;
> uint64_t time_diff = 0, hz = rte_get_timer_hz();
> @@ -181,9 +240,9 @@ per_lcore_mempool_test(void *arg)
> }
>
> /* n_get_bulk and n_put_bulk must be divisors of n_keep */
> - if (((n_keep / n_get_bulk) * n_get_bulk) != n_keep)
> + if (!n_max_bulk && (((n_keep / n_get_bulk) * n_get_bulk) !=
> n_keep))
> GOTO_ERR(ret, out);
> - if (((n_keep / n_put_bulk) * n_put_bulk) != n_keep)
> + if (!n_max_bulk && (((n_keep / n_put_bulk) * n_put_bulk) !=
> n_keep))
> GOTO_ERR(ret, out);
> /* for constant n, n_get_bulk and n_put_bulk must be the same */
> if (use_constant_values && n_put_bulk != n_get_bulk)
> @@ -200,7 +259,9 @@ per_lcore_mempool_test(void *arg)
> start_cycles = rte_get_timer_cycles();
>
> while (time_diff/hz < TIME_S) {
> - if (!use_constant_values)
> + if (n_max_bulk)
> + ret = test_loop_random(mp, cache, n_keep,
> n_max_bulk);
> + else if (!use_constant_values)
> ret = test_loop(mp, cache, n_keep, n_get_bulk,
> n_put_bulk);
> else if (n_get_bulk == 1)
> ret = test_loop(mp, cache, n_keep, 1, 1);
> @@ -246,10 +307,10 @@ per_lcore_mempool_test(void *arg)
> static int
> launch_cores(struct rte_mempool *mp, unsigned int cores)
> {
> - unsigned lcore_id;
> + unsigned int lcore_id;
> uint64_t rate;
> int ret;
> - unsigned cores_save = cores;
> + unsigned int cores_save = cores;
> double hz = rte_get_timer_hz();
>
> rte_atomic_store_explicit(&synchro, 0, rte_memory_order_relaxed);
> @@ -257,11 +318,18 @@ launch_cores(struct rte_mempool *mp, unsigned int
> cores)
> /* reset stats */
> memset(stats, 0, sizeof(stats));
>
> - printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u "
> - "n_put_bulk=%u n_keep=%u constant_n=%u ",
> + printf("mempool_autotest cache=%u cores=%u n_keep=%5u ",
> use_external_cache ?
> external_cache_size : (unsigned) mp->cache_size,
> - cores, n_get_bulk, n_put_bulk, n_keep,
> use_constant_values);
> + cores,
> + n_keep);
> + if (n_max_bulk)
> + printf("n_max_bulk=%3u ",
> + n_max_bulk);
> + else
> + printf("n_get_bulk=%3u n_put_bulk=%3u constant_n=%u ",
> + n_get_bulk, n_put_bulk,
> + use_constant_values);
>
> if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) {
> printf("mempool is not full\n");
> @@ -301,7 +369,7 @@ launch_cores(struct rte_mempool *mp, unsigned int
> cores)
> rate += (double)stats[lcore_id].enq_count * hz /
> (double)stats[lcore_id].duration_cycles;
>
> - printf("rate_persec=%" PRIu64 "\n", rate);
> + printf("rate_persec=%10" PRIu64 "\n", rate);
>
> return 0;
> }
> @@ -310,25 +378,47 @@ launch_cores(struct rte_mempool *mp, unsigned int
> cores)
> static int
> do_one_mempool_test(struct rte_mempool *mp, unsigned int cores, int
> external_cache)
> {
> + unsigned int bulk_tab_max[] = { CACHE_LINE_BURST, 32, 64, 128,
> 256,
> + RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
> unsigned int bulk_tab_get[] = { 1, 4, CACHE_LINE_BURST, 32, 64,
> 128, 256,
> RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
> unsigned int bulk_tab_put[] = { 1, 4, CACHE_LINE_BURST, 32, 64,
> 128, 256,
> RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
> unsigned int keep_tab[] = { 32, 128, 512, 2048, 8192, 32768, 0 };
> - unsigned *get_bulk_ptr;
> - unsigned *put_bulk_ptr;
> - unsigned *keep_ptr;
> + unsigned int *max_bulk_ptr;
> + unsigned int *get_bulk_ptr;
> + unsigned int *put_bulk_ptr;
> + unsigned int *keep_ptr;
> int ret;
>
> - for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++)
> {
> - for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr;
> put_bulk_ptr++) {
> - for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
> + for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
> + for (max_bulk_ptr = bulk_tab_max; *max_bulk_ptr;
> max_bulk_ptr++) {
> +
> + if (*keep_ptr < *max_bulk_ptr)
> + continue;
> +
> + use_external_cache = external_cache;
> + use_constant_values = 0;
> + n_max_bulk = *max_bulk_ptr;
> + n_get_bulk = 0;
> + n_put_bulk = 0;
> + n_keep = *keep_ptr;
> + ret = launch_cores(mp, cores);
> + if (ret < 0)
> + return -1;
> + }
> + }
> +
> + for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
> + for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr;
> get_bulk_ptr++) {
> + for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr;
> put_bulk_ptr++) {
>
> if (*keep_ptr < *get_bulk_ptr || *keep_ptr <
> *put_bulk_ptr)
> continue;
>
> use_external_cache = external_cache;
> use_constant_values = 0;
> + n_max_bulk = 0;
> n_get_bulk = *get_bulk_ptr;
> n_put_bulk = *put_bulk_ptr;
> n_keep = *keep_ptr;
> @@ -346,6 +436,7 @@ do_one_mempool_test(struct rte_mempool *mp,
> unsigned int cores, int external_cac
> }
> }
> }
> +
> return 0;
> }
>
> @@ -354,7 +445,10 @@ do_all_mempool_perf_tests(unsigned int cores)
> {
> struct rte_mempool *mp_cache = NULL;
> struct rte_mempool *mp_nocache = NULL;
> - struct rte_mempool *default_pool = NULL;
> + struct rte_mempool *default_pool_cache = NULL;
> + struct rte_mempool *default_pool_nocache = NULL;
> + const char *mp_cache_ops;
> + const char *mp_nocache_ops;
> const char *default_pool_ops;
> int ret = -1;
>
> @@ -368,6 +462,7 @@ do_all_mempool_perf_tests(unsigned int cores)
> printf("cannot allocate mempool (without cache)\n");
> goto err;
> }
> + mp_nocache_ops = rte_mempool_get_ops(mp_nocache->ops_index)-
> >name;
>
> /* create a mempool (with cache) */
> mp_cache = rte_mempool_create("perf_test_cache", MEMPOOL_SIZE,
> @@ -380,47 +475,76 @@ do_all_mempool_perf_tests(unsigned int cores)
> printf("cannot allocate mempool (with cache)\n");
> goto err;
> }
> + mp_cache_ops = rte_mempool_get_ops(mp_cache->ops_index)->name;
>
> default_pool_ops = rte_mbuf_best_mempool_ops();
> - /* Create a mempool based on Default handler */
> - default_pool = rte_mempool_create_empty("default_pool",
> - MEMPOOL_SIZE,
> - MEMPOOL_ELT_SIZE,
> - 0, 0,
> - SOCKET_ID_ANY, 0);
> -
> - if (default_pool == NULL) {
> - printf("cannot allocate %s mempool\n", default_pool_ops);
> +
> + /* Create a mempool (without cache) based on Default handler */
> + default_pool_nocache =
> rte_mempool_create_empty("default_pool_nocache",
> + MEMPOOL_SIZE,
> + MEMPOOL_ELT_SIZE,
> + 0, 0,
> + SOCKET_ID_ANY, 0);
> + if (default_pool_nocache == NULL) {
> + printf("cannot allocate %s mempool (without cache)\n",
> default_pool_ops);
> goto err;
> }
> -
> - if (rte_mempool_set_ops_byname(default_pool, default_pool_ops,
> NULL)
> - < 0) {
> + if (rte_mempool_set_ops_byname(default_pool_nocache,
> default_pool_ops, NULL) < 0) {
> printf("cannot set %s handler\n", default_pool_ops);
> goto err;
> }
> -
> - if (rte_mempool_populate_default(default_pool) < 0) {
> + if (rte_mempool_populate_default(default_pool_nocache) < 0) {
> printf("cannot populate %s mempool\n", default_pool_ops);
> goto err;
> }
> + rte_mempool_obj_iter(default_pool_nocache, my_obj_init, NULL);
> +
> + /* Create a mempool (with cache) based on Default handler */
> + default_pool_cache =
> rte_mempool_create_empty("default_pool_cache",
> + MEMPOOL_SIZE,
> + MEMPOOL_ELT_SIZE,
> + RTE_MEMPOOL_CACHE_MAX_SIZE, 0,
> + SOCKET_ID_ANY, 0);
> + if (default_pool_cache == NULL) {
> + printf("cannot allocate %s mempool (with cache)\n",
> default_pool_ops);
> + goto err;
> + }
> + if (rte_mempool_set_ops_byname(default_pool_cache,
> default_pool_ops, NULL) < 0) {
> + printf("cannot set %s handler\n", default_pool_ops);
> + goto err;
> + }
> + if (rte_mempool_populate_default(default_pool_cache) < 0) {
> + printf("cannot populate %s mempool\n", default_pool_ops);
> + goto err;
> + }
> + rte_mempool_obj_iter(default_pool_cache, my_obj_init, NULL);
>
> - rte_mempool_obj_iter(default_pool, my_obj_init, NULL);
> -
> - printf("start performance test (without cache)\n");
> + printf("start performance test (using %s, without cache)\n",
> + mp_nocache_ops);
> if (do_one_mempool_test(mp_nocache, cores, 0) < 0)
> goto err;
>
> - printf("start performance test for %s (without cache)\n",
> - default_pool_ops);
> - if (do_one_mempool_test(default_pool, cores, 0) < 0)
> - goto err;
> + if (strcmp(default_pool_ops, mp_nocache_ops) != 0) {
> + printf("start performance test for %s (without cache)\n",
> + default_pool_ops);
> + if (do_one_mempool_test(default_pool_nocache, cores, 0) <
> 0)
> + goto err;
> + }
>
> - printf("start performance test (with cache)\n");
> + printf("start performance test (using %s, with cache)\n",
> + mp_cache_ops);
> if (do_one_mempool_test(mp_cache, cores, 0) < 0)
> goto err;
>
> - printf("start performance test (with user-owned cache)\n");
> + if (strcmp(default_pool_ops, mp_cache_ops) != 0) {
> + printf("start performance test for %s (with cache)\n",
> + default_pool_ops);
> + if (do_one_mempool_test(default_pool_cache, cores, 0) < 0)
> + goto err;
> + }
> +
> + printf("start performance test (using %s, with user-owned
> cache)\n",
> + mp_nocache_ops);
> if (do_one_mempool_test(mp_nocache, cores, 1) < 0)
> goto err;
>
> @@ -431,7 +555,8 @@ do_all_mempool_perf_tests(unsigned int cores)
> err:
> rte_mempool_free(mp_cache);
> rte_mempool_free(mp_nocache);
> - rte_mempool_free(default_pool);
> + rte_mempool_free(default_pool_cache);
> + rte_mempool_free(default_pool_nocache);
> return ret;
> }
>
> --
> 2.43.0
^ permalink raw reply [flat|nested] 22+ messages in thread
* RE: [PATCH] mempool perf test: test random bulk sizes
2025-03-13 8:23 ` Morten Brørup
@ 2025-03-25 7:15 ` Morten Brørup
0 siblings, 0 replies; 22+ messages in thread
From: Morten Brørup @ 2025-03-25 7:15 UTC (permalink / raw)
To: Andrew Rybchenko, Bruce Richardson, dev, Artem V. Andreev,
Ashwin Sekhar T K, Pavan Nikhilesh, Harman Kalra, Hemant Agrawal,
Sachin Saxena
Second PING for review.
Med venlig hilsen / Kind regards,
-Morten Brørup
> From: Morten Brørup [mailto:mb@smartsharesystems.com]
> Sent: Thursday, 13 March 2025 09.23
>
> PING for review.
>
> This could still make it into 25.03-rc3 (deadline: 14 March 2025).
>
> Med venlig hilsen / Kind regards,
> -Morten Brørup
>
>
> > From: Morten Brørup [mailto:mb@smartsharesystems.com]
> > Sent: Friday, 28 February 2025 17.49
> >
> > Bulk requests to get or put objects in a mempool often vary in size.
> > A series of tests with pseudo random request sizes, to mitigate the
> > benefits of the CPU's dynamic branch predictor, was added.
> >
> > Also, various other minor changes:
> > - Improved the output formatting for readability.
> > - Added test for the "default" mempool with cache.
> > - Skip the tests for the "default" mempool, if it happens to use the
> > same
> > driver (i.e. operations) as already tested.
> > - Replaced bare use of "unsigned" with "unsigned int",
> > to make checkpatches happy.
> >
> > Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> > ---
> > app/test/test_mempool_perf.c | 219 +++++++++++++++++++++++++++------
> --
> > 1 file changed, 172 insertions(+), 47 deletions(-)
> >
> > diff --git a/app/test/test_mempool_perf.c
> > b/app/test/test_mempool_perf.c
> > index 4dd74ef75a..5e29797f02 100644
> > --- a/app/test/test_mempool_perf.c
> > +++ b/app/test/test_mempool_perf.c
> > @@ -33,6 +33,13 @@
> > * Mempool performance
> > * =======
> > *
> > + * Each core get *n_keep* objects per bulk of a pseudorandom
> number
> > + * between 1 and *n_max_bulk*.
> > + * Objects are put back in the pool per bulk of a similar
> > pseudorandom number.
> > + * Note: The very low entropy of the randomization algorithm is
> > harmless, because
> > + * the sole purpose of randomization is to prevent the
> CPU's
> > dynamic branch
> > + * predictor from enhancing the test results.
> > + *
> > * Each core get *n_keep* objects per bulk of *n_get_bulk*. Then,
> > * objects are put back in the pool per bulk of *n_put_bulk*.
> > *
> > @@ -52,7 +59,12 @@
> > * - Two cores with user-owned cache
> > * - Max. cores with user-owned cache
> > *
> > - * - Bulk size (*n_get_bulk*, *n_put_bulk*)
> > + * - Pseudorandom max bulk size (*n_max_bulk*)
> > + *
> > + * - Max bulk from CACHE_LINE_BURST to 256, and
> > RTE_MEMPOOL_CACHE_MAX_SIZE,
> > + * where CACHE_LINE_BURST is the number of pointers fitting
> > into one CPU cache line.
> > + *
> > + * - Fixed bulk size (*n_get_bulk*, *n_put_bulk*)
> > *
> > * - Bulk get from 1 to 256, and RTE_MEMPOOL_CACHE_MAX_SIZE
> > * - Bulk put from 1 to 256, and RTE_MEMPOOL_CACHE_MAX_SIZE
> > @@ -89,16 +101,19 @@
> > } while (0)
> >
> > static int use_external_cache;
> > -static unsigned external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
> > +static unsigned int external_cache_size =
> RTE_MEMPOOL_CACHE_MAX_SIZE;
> >
> > static RTE_ATOMIC(uint32_t) synchro;
> >
> > +/* max random number of objects in one bulk operation (get and put)
> */
> > +static unsigned int n_max_bulk;
> > +
> > /* number of objects in one bulk operation (get or put) */
> > -static unsigned n_get_bulk;
> > -static unsigned n_put_bulk;
> > +static unsigned int n_get_bulk;
> > +static unsigned int n_put_bulk;
> >
> > /* number of objects retrieved from mempool before putting them back
> > */
> > -static unsigned n_keep;
> > +static unsigned int n_keep;
> >
> > /* true if we want to test with constant n_get_bulk and n_put_bulk
> */
> > static int use_constant_values;
> > @@ -118,7 +133,7 @@ static struct mempool_test_stats
> > stats[RTE_MAX_LCORE];
> > */
> > static void
> > my_obj_init(struct rte_mempool *mp, __rte_unused void *arg,
> > - void *obj, unsigned i)
> > + void *obj, unsigned int i)
> > {
> > uint32_t *objnum = obj;
> > memset(obj, 0, mp->elt_size);
> > @@ -159,11 +174,55 @@ test_loop(struct rte_mempool *mp, struct
> > rte_mempool_cache *cache,
> > return 0;
> > }
> >
> > +static __rte_always_inline int
> > +test_loop_random(struct rte_mempool *mp, struct rte_mempool_cache
> > *cache,
> > + unsigned int x_keep, unsigned int x_max_bulk)
> > +{
> > + alignas(RTE_CACHE_LINE_SIZE) void *obj_table[MAX_KEEP];
> > + unsigned int idx;
> > + unsigned int i;
> > + unsigned int r = 0;
> > + unsigned int x_bulk;
> > + int ret;
> > +
> > + for (i = 0; likely(i < (N / x_keep)); i++) {
> > + /* get x_keep objects by bulk of random [1 .. x_max_bulk]
> > */
> > + for (idx = 0; idx < x_keep; idx += x_bulk, r++) {
> > + /* Generate a pseudorandom number [1 .. x_max_bulk].
> > */
> > + x_bulk = ((r ^ (r >> 2) ^ (r << 3)) & (x_max_bulk -
> > 1)) + 1;
> > + if (unlikely(idx + x_bulk > x_keep))
> > + x_bulk = x_keep - idx;
> > + ret = rte_mempool_generic_get(mp,
> > + &obj_table[idx],
> > + x_bulk,
> > + cache);
> > + if (unlikely(ret < 0)) {
> > + rte_mempool_dump(stdout, mp);
> > + return ret;
> > + }
> > + }
> > +
> > + /* put the objects back by bulk of random [1 .. x_max_bulk]
> > */
> > + for (idx = 0; idx < x_keep; idx += x_bulk, r++) {
> > + /* Generate a pseudorandom number [1 .. x_max_bulk].
> > */
> > + x_bulk = ((r ^ (r >> 2) ^ (r << 3)) & (x_max_bulk -
> > 1)) + 1;
> > + if (unlikely(idx + x_bulk > x_keep))
> > + x_bulk = x_keep - idx;
> > + rte_mempool_generic_put(mp,
> > + &obj_table[idx],
> > + x_bulk,
> > + cache);
> > + }
> > + }
> > +
> > + return 0;
> > +}
> > +
> > static int
> > per_lcore_mempool_test(void *arg)
> > {
> > struct rte_mempool *mp = arg;
> > - unsigned lcore_id = rte_lcore_id();
> > + unsigned int lcore_id = rte_lcore_id();
> > int ret = 0;
> > uint64_t start_cycles, end_cycles;
> > uint64_t time_diff = 0, hz = rte_get_timer_hz();
> > @@ -181,9 +240,9 @@ per_lcore_mempool_test(void *arg)
> > }
> >
> > /* n_get_bulk and n_put_bulk must be divisors of n_keep */
> > - if (((n_keep / n_get_bulk) * n_get_bulk) != n_keep)
> > + if (!n_max_bulk && (((n_keep / n_get_bulk) * n_get_bulk) !=
> > n_keep))
> > GOTO_ERR(ret, out);
> > - if (((n_keep / n_put_bulk) * n_put_bulk) != n_keep)
> > + if (!n_max_bulk && (((n_keep / n_put_bulk) * n_put_bulk) !=
> > n_keep))
> > GOTO_ERR(ret, out);
> > /* for constant n, n_get_bulk and n_put_bulk must be the same */
> > if (use_constant_values && n_put_bulk != n_get_bulk)
> > @@ -200,7 +259,9 @@ per_lcore_mempool_test(void *arg)
> > start_cycles = rte_get_timer_cycles();
> >
> > while (time_diff/hz < TIME_S) {
> > - if (!use_constant_values)
> > + if (n_max_bulk)
> > + ret = test_loop_random(mp, cache, n_keep,
> > n_max_bulk);
> > + else if (!use_constant_values)
> > ret = test_loop(mp, cache, n_keep, n_get_bulk,
> > n_put_bulk);
> > else if (n_get_bulk == 1)
> > ret = test_loop(mp, cache, n_keep, 1, 1);
> > @@ -246,10 +307,10 @@ per_lcore_mempool_test(void *arg)
> > static int
> > launch_cores(struct rte_mempool *mp, unsigned int cores)
> > {
> > - unsigned lcore_id;
> > + unsigned int lcore_id;
> > uint64_t rate;
> > int ret;
> > - unsigned cores_save = cores;
> > + unsigned int cores_save = cores;
> > double hz = rte_get_timer_hz();
> >
> > rte_atomic_store_explicit(&synchro, 0, rte_memory_order_relaxed);
> > @@ -257,11 +318,18 @@ launch_cores(struct rte_mempool *mp, unsigned
> int
> > cores)
> > /* reset stats */
> > memset(stats, 0, sizeof(stats));
> >
> > - printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u "
> > - "n_put_bulk=%u n_keep=%u constant_n=%u ",
> > + printf("mempool_autotest cache=%u cores=%u n_keep=%5u ",
> > use_external_cache ?
> > external_cache_size : (unsigned) mp->cache_size,
> > - cores, n_get_bulk, n_put_bulk, n_keep,
> > use_constant_values);
> > + cores,
> > + n_keep);
> > + if (n_max_bulk)
> > + printf("n_max_bulk=%3u ",
> > + n_max_bulk);
> > + else
> > + printf("n_get_bulk=%3u n_put_bulk=%3u constant_n=%u ",
> > + n_get_bulk, n_put_bulk,
> > + use_constant_values);
> >
> > if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) {
> > printf("mempool is not full\n");
> > @@ -301,7 +369,7 @@ launch_cores(struct rte_mempool *mp, unsigned int
> > cores)
> > rate += (double)stats[lcore_id].enq_count * hz /
> > (double)stats[lcore_id].duration_cycles;
> >
> > - printf("rate_persec=%" PRIu64 "\n", rate);
> > + printf("rate_persec=%10" PRIu64 "\n", rate);
> >
> > return 0;
> > }
> > @@ -310,25 +378,47 @@ launch_cores(struct rte_mempool *mp, unsigned
> int
> > cores)
> > static int
> > do_one_mempool_test(struct rte_mempool *mp, unsigned int cores, int
> > external_cache)
> > {
> > + unsigned int bulk_tab_max[] = { CACHE_LINE_BURST, 32, 64, 128,
> > 256,
> > + RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
> > unsigned int bulk_tab_get[] = { 1, 4, CACHE_LINE_BURST, 32, 64,
> > 128, 256,
> > RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
> > unsigned int bulk_tab_put[] = { 1, 4, CACHE_LINE_BURST, 32, 64,
> > 128, 256,
> > RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
> > unsigned int keep_tab[] = { 32, 128, 512, 2048, 8192, 32768, 0 };
> > - unsigned *get_bulk_ptr;
> > - unsigned *put_bulk_ptr;
> > - unsigned *keep_ptr;
> > + unsigned int *max_bulk_ptr;
> > + unsigned int *get_bulk_ptr;
> > + unsigned int *put_bulk_ptr;
> > + unsigned int *keep_ptr;
> > int ret;
> >
> > - for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++)
> > {
> > - for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr;
> > put_bulk_ptr++) {
> > - for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
> > + for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
> > + for (max_bulk_ptr = bulk_tab_max; *max_bulk_ptr;
> > max_bulk_ptr++) {
> > +
> > + if (*keep_ptr < *max_bulk_ptr)
> > + continue;
> > +
> > + use_external_cache = external_cache;
> > + use_constant_values = 0;
> > + n_max_bulk = *max_bulk_ptr;
> > + n_get_bulk = 0;
> > + n_put_bulk = 0;
> > + n_keep = *keep_ptr;
> > + ret = launch_cores(mp, cores);
> > + if (ret < 0)
> > + return -1;
> > + }
> > + }
> > +
> > + for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
> > + for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr;
> > get_bulk_ptr++) {
> > + for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr;
> > put_bulk_ptr++) {
> >
> > if (*keep_ptr < *get_bulk_ptr || *keep_ptr <
> > *put_bulk_ptr)
> > continue;
> >
> > use_external_cache = external_cache;
> > use_constant_values = 0;
> > + n_max_bulk = 0;
> > n_get_bulk = *get_bulk_ptr;
> > n_put_bulk = *put_bulk_ptr;
> > n_keep = *keep_ptr;
> > @@ -346,6 +436,7 @@ do_one_mempool_test(struct rte_mempool *mp,
> > unsigned int cores, int external_cac
> > }
> > }
> > }
> > +
> > return 0;
> > }
> >
> > @@ -354,7 +445,10 @@ do_all_mempool_perf_tests(unsigned int cores)
> > {
> > struct rte_mempool *mp_cache = NULL;
> > struct rte_mempool *mp_nocache = NULL;
> > - struct rte_mempool *default_pool = NULL;
> > + struct rte_mempool *default_pool_cache = NULL;
> > + struct rte_mempool *default_pool_nocache = NULL;
> > + const char *mp_cache_ops;
> > + const char *mp_nocache_ops;
> > const char *default_pool_ops;
> > int ret = -1;
> >
> > @@ -368,6 +462,7 @@ do_all_mempool_perf_tests(unsigned int cores)
> > printf("cannot allocate mempool (without cache)\n");
> > goto err;
> > }
> > + mp_nocache_ops = rte_mempool_get_ops(mp_nocache->ops_index)-
> > >name;
> >
> > /* create a mempool (with cache) */
> > mp_cache = rte_mempool_create("perf_test_cache", MEMPOOL_SIZE,
> > @@ -380,47 +475,76 @@ do_all_mempool_perf_tests(unsigned int cores)
> > printf("cannot allocate mempool (with cache)\n");
> > goto err;
> > }
> > + mp_cache_ops = rte_mempool_get_ops(mp_cache->ops_index)->name;
> >
> > default_pool_ops = rte_mbuf_best_mempool_ops();
> > - /* Create a mempool based on Default handler */
> > - default_pool = rte_mempool_create_empty("default_pool",
> > - MEMPOOL_SIZE,
> > - MEMPOOL_ELT_SIZE,
> > - 0, 0,
> > - SOCKET_ID_ANY, 0);
> > -
> > - if (default_pool == NULL) {
> > - printf("cannot allocate %s mempool\n", default_pool_ops);
> > +
> > + /* Create a mempool (without cache) based on Default handler */
> > + default_pool_nocache =
> > rte_mempool_create_empty("default_pool_nocache",
> > + MEMPOOL_SIZE,
> > + MEMPOOL_ELT_SIZE,
> > + 0, 0,
> > + SOCKET_ID_ANY, 0);
> > + if (default_pool_nocache == NULL) {
> > + printf("cannot allocate %s mempool (without cache)\n",
> > default_pool_ops);
> > goto err;
> > }
> > -
> > - if (rte_mempool_set_ops_byname(default_pool, default_pool_ops,
> > NULL)
> > - < 0) {
> > + if (rte_mempool_set_ops_byname(default_pool_nocache,
> > default_pool_ops, NULL) < 0) {
> > printf("cannot set %s handler\n", default_pool_ops);
> > goto err;
> > }
> > -
> > - if (rte_mempool_populate_default(default_pool) < 0) {
> > + if (rte_mempool_populate_default(default_pool_nocache) < 0) {
> > printf("cannot populate %s mempool\n", default_pool_ops);
> > goto err;
> > }
> > + rte_mempool_obj_iter(default_pool_nocache, my_obj_init, NULL);
> > +
> > + /* Create a mempool (with cache) based on Default handler */
> > + default_pool_cache =
> > rte_mempool_create_empty("default_pool_cache",
> > + MEMPOOL_SIZE,
> > + MEMPOOL_ELT_SIZE,
> > + RTE_MEMPOOL_CACHE_MAX_SIZE, 0,
> > + SOCKET_ID_ANY, 0);
> > + if (default_pool_cache == NULL) {
> > + printf("cannot allocate %s mempool (with cache)\n",
> > default_pool_ops);
> > + goto err;
> > + }
> > + if (rte_mempool_set_ops_byname(default_pool_cache,
> > default_pool_ops, NULL) < 0) {
> > + printf("cannot set %s handler\n", default_pool_ops);
> > + goto err;
> > + }
> > + if (rte_mempool_populate_default(default_pool_cache) < 0) {
> > + printf("cannot populate %s mempool\n", default_pool_ops);
> > + goto err;
> > + }
> > + rte_mempool_obj_iter(default_pool_cache, my_obj_init, NULL);
> >
> > - rte_mempool_obj_iter(default_pool, my_obj_init, NULL);
> > -
> > - printf("start performance test (without cache)\n");
> > + printf("start performance test (using %s, without cache)\n",
> > + mp_nocache_ops);
> > if (do_one_mempool_test(mp_nocache, cores, 0) < 0)
> > goto err;
> >
> > - printf("start performance test for %s (without cache)\n",
> > - default_pool_ops);
> > - if (do_one_mempool_test(default_pool, cores, 0) < 0)
> > - goto err;
> > + if (strcmp(default_pool_ops, mp_nocache_ops) != 0) {
> > + printf("start performance test for %s (without cache)\n",
> > + default_pool_ops);
> > + if (do_one_mempool_test(default_pool_nocache, cores, 0) <
> > 0)
> > + goto err;
> > + }
> >
> > - printf("start performance test (with cache)\n");
> > + printf("start performance test (using %s, with cache)\n",
> > + mp_cache_ops);
> > if (do_one_mempool_test(mp_cache, cores, 0) < 0)
> > goto err;
> >
> > - printf("start performance test (with user-owned cache)\n");
> > + if (strcmp(default_pool_ops, mp_cache_ops) != 0) {
> > + printf("start performance test for %s (with cache)\n",
> > + default_pool_ops);
> > + if (do_one_mempool_test(default_pool_cache, cores, 0) < 0)
> > + goto err;
> > + }
> > +
> > + printf("start performance test (using %s, with user-owned
> > cache)\n",
> > + mp_nocache_ops);
> > if (do_one_mempool_test(mp_nocache, cores, 1) < 0)
> > goto err;
> >
> > @@ -431,7 +555,8 @@ do_all_mempool_perf_tests(unsigned int cores)
> > err:
> > rte_mempool_free(mp_cache);
> > rte_mempool_free(mp_nocache);
> > - rte_mempool_free(default_pool);
> > + rte_mempool_free(default_pool_cache);
> > + rte_mempool_free(default_pool_nocache);
> > return ret;
> > }
> >
> > --
> > 2.43.0
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH] mempool perf test: test random bulk sizes
2025-02-28 16:48 [PATCH] mempool perf test: test random bulk sizes Morten Brørup
2025-03-13 8:23 ` Morten Brørup
@ 2025-03-30 8:29 ` Andrew Rybchenko
2025-03-30 8:57 ` Morten Brørup
2025-03-31 10:03 ` [PATCH v2 0/4] " Morten Brørup
` (2 subsequent siblings)
4 siblings, 1 reply; 22+ messages in thread
From: Andrew Rybchenko @ 2025-03-30 8:29 UTC (permalink / raw)
To: Morten Brørup, Bruce Richardson, dev
On 2/28/25 19:48, Morten Brørup wrote:
> Bulk requests to get or put objects in a mempool often vary in size.
> A series of tests with pseudo random request sizes, to mitigate the
> benefits of the CPU's dynamic branch predictor, was added.
>
> Also, various other minor changes:
> - Improved the output formatting for readability.
> - Added test for the "default" mempool with cache.
> - Skip the tests for the "default" mempool, if it happens to use the same
> driver (i.e. operations) as already tested.
> - Replaced bare use of "unsigned" with "unsigned int",
> to make checkpatches happy.
IMHO, it would be much better and easier to review if all above changes
are done one by one in separate patches.
>
> Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
Overall the idea looks good, so
Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
but I'd be thankful if you split the patch.
> ---
> app/test/test_mempool_perf.c | 219 +++++++++++++++++++++++++++--------
> 1 file changed, 172 insertions(+), 47 deletions(-)
>
> diff --git a/app/test/test_mempool_perf.c b/app/test/test_mempool_perf.c
> index 4dd74ef75a..5e29797f02 100644
> --- a/app/test/test_mempool_perf.c
> +++ b/app/test/test_mempool_perf.c
> @@ -33,6 +33,13 @@
> * Mempool performance
> * =======
> *
> + * Each core get *n_keep* objects per bulk of a pseudorandom number
> + * between 1 and *n_max_bulk*.
> + * Objects are put back in the pool per bulk of a similar pseudorandom number.
> + * Note: The very low entropy of the randomization algorithm is harmless, because
> + * the sole purpose of randomization is to prevent the CPU's dynamic branch
> + * predictor from enhancing the test results.
> + *
> * Each core get *n_keep* objects per bulk of *n_get_bulk*. Then,
> * objects are put back in the pool per bulk of *n_put_bulk*.
> *
> @@ -52,7 +59,12 @@
> * - Two cores with user-owned cache
> * - Max. cores with user-owned cache
> *
> - * - Bulk size (*n_get_bulk*, *n_put_bulk*)
> + * - Pseudorandom max bulk size (*n_max_bulk*)
> + *
> + * - Max bulk from CACHE_LINE_BURST to 256, and RTE_MEMPOOL_CACHE_MAX_SIZE,
> + * where CACHE_LINE_BURST is the number of pointers fitting into one CPU cache line.
> + *
> + * - Fixed bulk size (*n_get_bulk*, *n_put_bulk*)
> *
> * - Bulk get from 1 to 256, and RTE_MEMPOOL_CACHE_MAX_SIZE
> * - Bulk put from 1 to 256, and RTE_MEMPOOL_CACHE_MAX_SIZE
> @@ -89,16 +101,19 @@
> } while (0)
>
> static int use_external_cache;
> -static unsigned external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
> +static unsigned int external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
>
> static RTE_ATOMIC(uint32_t) synchro;
>
> +/* max random number of objects in one bulk operation (get and put) */
> +static unsigned int n_max_bulk;
> +
> /* number of objects in one bulk operation (get or put) */
> -static unsigned n_get_bulk;
> -static unsigned n_put_bulk;
> +static unsigned int n_get_bulk;
> +static unsigned int n_put_bulk;
>
> /* number of objects retrieved from mempool before putting them back */
> -static unsigned n_keep;
> +static unsigned int n_keep;
>
> /* true if we want to test with constant n_get_bulk and n_put_bulk */
> static int use_constant_values;
> @@ -118,7 +133,7 @@ static struct mempool_test_stats stats[RTE_MAX_LCORE];
> */
> static void
> my_obj_init(struct rte_mempool *mp, __rte_unused void *arg,
> - void *obj, unsigned i)
> + void *obj, unsigned int i)
> {
> uint32_t *objnum = obj;
> memset(obj, 0, mp->elt_size);
> @@ -159,11 +174,55 @@ test_loop(struct rte_mempool *mp, struct rte_mempool_cache *cache,
> return 0;
> }
>
> +static __rte_always_inline int
> +test_loop_random(struct rte_mempool *mp, struct rte_mempool_cache *cache,
> + unsigned int x_keep, unsigned int x_max_bulk)
> +{
> + alignas(RTE_CACHE_LINE_SIZE) void *obj_table[MAX_KEEP];
> + unsigned int idx;
> + unsigned int i;
> + unsigned int r = 0;
> + unsigned int x_bulk;
> + int ret;
> +
> + for (i = 0; likely(i < (N / x_keep)); i++) {
> + /* get x_keep objects by bulk of random [1 .. x_max_bulk] */
> + for (idx = 0; idx < x_keep; idx += x_bulk, r++) {
> + /* Generate a pseudorandom number [1 .. x_max_bulk]. */
> + x_bulk = ((r ^ (r >> 2) ^ (r << 3)) & (x_max_bulk - 1)) + 1;
> + if (unlikely(idx + x_bulk > x_keep))
> + x_bulk = x_keep - idx;
> + ret = rte_mempool_generic_get(mp,
> + &obj_table[idx],
> + x_bulk,
> + cache);
> + if (unlikely(ret < 0)) {
> + rte_mempool_dump(stdout, mp);
> + return ret;
> + }
> + }
> +
> + /* put the objects back by bulk of random [1 .. x_max_bulk] */
> + for (idx = 0; idx < x_keep; idx += x_bulk, r++) {
> + /* Generate a pseudorandom number [1 .. x_max_bulk]. */
> + x_bulk = ((r ^ (r >> 2) ^ (r << 3)) & (x_max_bulk - 1)) + 1;
> + if (unlikely(idx + x_bulk > x_keep))
> + x_bulk = x_keep - idx;
> + rte_mempool_generic_put(mp,
> + &obj_table[idx],
> + x_bulk,
> + cache);
> + }
> + }
> +
> + return 0;
> +}
> +
> static int
> per_lcore_mempool_test(void *arg)
> {
> struct rte_mempool *mp = arg;
> - unsigned lcore_id = rte_lcore_id();
> + unsigned int lcore_id = rte_lcore_id();
> int ret = 0;
> uint64_t start_cycles, end_cycles;
> uint64_t time_diff = 0, hz = rte_get_timer_hz();
> @@ -181,9 +240,9 @@ per_lcore_mempool_test(void *arg)
> }
>
> /* n_get_bulk and n_put_bulk must be divisors of n_keep */
> - if (((n_keep / n_get_bulk) * n_get_bulk) != n_keep)
> + if (!n_max_bulk && (((n_keep / n_get_bulk) * n_get_bulk) != n_keep))
> GOTO_ERR(ret, out);
> - if (((n_keep / n_put_bulk) * n_put_bulk) != n_keep)
> + if (!n_max_bulk && (((n_keep / n_put_bulk) * n_put_bulk) != n_keep))
> GOTO_ERR(ret, out);
> /* for constant n, n_get_bulk and n_put_bulk must be the same */
> if (use_constant_values && n_put_bulk != n_get_bulk)
> @@ -200,7 +259,9 @@ per_lcore_mempool_test(void *arg)
> start_cycles = rte_get_timer_cycles();
>
> while (time_diff/hz < TIME_S) {
> - if (!use_constant_values)
> + if (n_max_bulk)
> + ret = test_loop_random(mp, cache, n_keep, n_max_bulk);
> + else if (!use_constant_values)
> ret = test_loop(mp, cache, n_keep, n_get_bulk, n_put_bulk);
> else if (n_get_bulk == 1)
> ret = test_loop(mp, cache, n_keep, 1, 1);
> @@ -246,10 +307,10 @@ per_lcore_mempool_test(void *arg)
> static int
> launch_cores(struct rte_mempool *mp, unsigned int cores)
> {
> - unsigned lcore_id;
> + unsigned int lcore_id;
> uint64_t rate;
> int ret;
> - unsigned cores_save = cores;
> + unsigned int cores_save = cores;
> double hz = rte_get_timer_hz();
>
> rte_atomic_store_explicit(&synchro, 0, rte_memory_order_relaxed);
> @@ -257,11 +318,18 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
> /* reset stats */
> memset(stats, 0, sizeof(stats));
>
> - printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u "
> - "n_put_bulk=%u n_keep=%u constant_n=%u ",
> + printf("mempool_autotest cache=%u cores=%u n_keep=%5u ",
> use_external_cache ?
> external_cache_size : (unsigned) mp->cache_size,
> - cores, n_get_bulk, n_put_bulk, n_keep, use_constant_values);
> + cores,
> + n_keep);
> + if (n_max_bulk)
> + printf("n_max_bulk=%3u ",
> + n_max_bulk);
> + else
> + printf("n_get_bulk=%3u n_put_bulk=%3u constant_n=%u ",
> + n_get_bulk, n_put_bulk,
> + use_constant_values);
>
> if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) {
> printf("mempool is not full\n");
> @@ -301,7 +369,7 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
> rate += (double)stats[lcore_id].enq_count * hz /
> (double)stats[lcore_id].duration_cycles;
>
> - printf("rate_persec=%" PRIu64 "\n", rate);
> + printf("rate_persec=%10" PRIu64 "\n", rate);
>
> return 0;
> }
> @@ -310,25 +378,47 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
> static int
> do_one_mempool_test(struct rte_mempool *mp, unsigned int cores, int external_cache)
> {
> + unsigned int bulk_tab_max[] = { CACHE_LINE_BURST, 32, 64, 128, 256,
> + RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
> unsigned int bulk_tab_get[] = { 1, 4, CACHE_LINE_BURST, 32, 64, 128, 256,
> RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
> unsigned int bulk_tab_put[] = { 1, 4, CACHE_LINE_BURST, 32, 64, 128, 256,
> RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
> unsigned int keep_tab[] = { 32, 128, 512, 2048, 8192, 32768, 0 };
> - unsigned *get_bulk_ptr;
> - unsigned *put_bulk_ptr;
> - unsigned *keep_ptr;
> + unsigned int *max_bulk_ptr;
> + unsigned int *get_bulk_ptr;
> + unsigned int *put_bulk_ptr;
> + unsigned int *keep_ptr;
> int ret;
>
> - for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
> - for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) {
> - for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
> + for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
> + for (max_bulk_ptr = bulk_tab_max; *max_bulk_ptr; max_bulk_ptr++) {
> +
> + if (*keep_ptr < *max_bulk_ptr)
> + continue;
> +
> + use_external_cache = external_cache;
> + use_constant_values = 0;
> + n_max_bulk = *max_bulk_ptr;
> + n_get_bulk = 0;
> + n_put_bulk = 0;
> + n_keep = *keep_ptr;
> + ret = launch_cores(mp, cores);
> + if (ret < 0)
> + return -1;
> + }
> + }
> +
> + for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
> + for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
> + for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) {
>
> if (*keep_ptr < *get_bulk_ptr || *keep_ptr < *put_bulk_ptr)
> continue;
>
> use_external_cache = external_cache;
> use_constant_values = 0;
> + n_max_bulk = 0;
> n_get_bulk = *get_bulk_ptr;
> n_put_bulk = *put_bulk_ptr;
> n_keep = *keep_ptr;
> @@ -346,6 +436,7 @@ do_one_mempool_test(struct rte_mempool *mp, unsigned int cores, int external_cac
> }
> }
> }
> +
> return 0;
> }
>
> @@ -354,7 +445,10 @@ do_all_mempool_perf_tests(unsigned int cores)
> {
> struct rte_mempool *mp_cache = NULL;
> struct rte_mempool *mp_nocache = NULL;
> - struct rte_mempool *default_pool = NULL;
> + struct rte_mempool *default_pool_cache = NULL;
> + struct rte_mempool *default_pool_nocache = NULL;
> + const char *mp_cache_ops;
> + const char *mp_nocache_ops;
> const char *default_pool_ops;
> int ret = -1;
>
> @@ -368,6 +462,7 @@ do_all_mempool_perf_tests(unsigned int cores)
> printf("cannot allocate mempool (without cache)\n");
> goto err;
> }
> + mp_nocache_ops = rte_mempool_get_ops(mp_nocache->ops_index)->name;
>
> /* create a mempool (with cache) */
> mp_cache = rte_mempool_create("perf_test_cache", MEMPOOL_SIZE,
> @@ -380,47 +475,76 @@ do_all_mempool_perf_tests(unsigned int cores)
> printf("cannot allocate mempool (with cache)\n");
> goto err;
> }
> + mp_cache_ops = rte_mempool_get_ops(mp_cache->ops_index)->name;
>
> default_pool_ops = rte_mbuf_best_mempool_ops();
> - /* Create a mempool based on Default handler */
> - default_pool = rte_mempool_create_empty("default_pool",
> - MEMPOOL_SIZE,
> - MEMPOOL_ELT_SIZE,
> - 0, 0,
> - SOCKET_ID_ANY, 0);
> -
> - if (default_pool == NULL) {
> - printf("cannot allocate %s mempool\n", default_pool_ops);
> +
> + /* Create a mempool (without cache) based on Default handler */
> + default_pool_nocache = rte_mempool_create_empty("default_pool_nocache",
> + MEMPOOL_SIZE,
> + MEMPOOL_ELT_SIZE,
> + 0, 0,
> + SOCKET_ID_ANY, 0);
> + if (default_pool_nocache == NULL) {
> + printf("cannot allocate %s mempool (without cache)\n", default_pool_ops);
> goto err;
> }
> -
> - if (rte_mempool_set_ops_byname(default_pool, default_pool_ops, NULL)
> - < 0) {
> + if (rte_mempool_set_ops_byname(default_pool_nocache, default_pool_ops, NULL) < 0) {
> printf("cannot set %s handler\n", default_pool_ops);
> goto err;
> }
> -
> - if (rte_mempool_populate_default(default_pool) < 0) {
> + if (rte_mempool_populate_default(default_pool_nocache) < 0) {
> printf("cannot populate %s mempool\n", default_pool_ops);
> goto err;
> }
> + rte_mempool_obj_iter(default_pool_nocache, my_obj_init, NULL);
> +
> + /* Create a mempool (with cache) based on Default handler */
> + default_pool_cache = rte_mempool_create_empty("default_pool_cache",
> + MEMPOOL_SIZE,
> + MEMPOOL_ELT_SIZE,
> + RTE_MEMPOOL_CACHE_MAX_SIZE, 0,
> + SOCKET_ID_ANY, 0);
> + if (default_pool_cache == NULL) {
> + printf("cannot allocate %s mempool (with cache)\n", default_pool_ops);
> + goto err;
> + }
> + if (rte_mempool_set_ops_byname(default_pool_cache, default_pool_ops, NULL) < 0) {
> + printf("cannot set %s handler\n", default_pool_ops);
> + goto err;
> + }
> + if (rte_mempool_populate_default(default_pool_cache) < 0) {
> + printf("cannot populate %s mempool\n", default_pool_ops);
> + goto err;
> + }
> + rte_mempool_obj_iter(default_pool_cache, my_obj_init, NULL);
>
> - rte_mempool_obj_iter(default_pool, my_obj_init, NULL);
> -
> - printf("start performance test (without cache)\n");
> + printf("start performance test (using %s, without cache)\n",
> + mp_nocache_ops);
> if (do_one_mempool_test(mp_nocache, cores, 0) < 0)
> goto err;
>
> - printf("start performance test for %s (without cache)\n",
> - default_pool_ops);
> - if (do_one_mempool_test(default_pool, cores, 0) < 0)
> - goto err;
> + if (strcmp(default_pool_ops, mp_nocache_ops) != 0) {
> + printf("start performance test for %s (without cache)\n",
> + default_pool_ops);
> + if (do_one_mempool_test(default_pool_nocache, cores, 0) < 0)
> + goto err;
> + }
>
> - printf("start performance test (with cache)\n");
> + printf("start performance test (using %s, with cache)\n",
> + mp_cache_ops);
> if (do_one_mempool_test(mp_cache, cores, 0) < 0)
> goto err;
>
> - printf("start performance test (with user-owned cache)\n");
> + if (strcmp(default_pool_ops, mp_cache_ops) != 0) {
> + printf("start performance test for %s (with cache)\n",
> + default_pool_ops);
> + if (do_one_mempool_test(default_pool_cache, cores, 0) < 0)
> + goto err;
> + }
> +
> + printf("start performance test (using %s, with user-owned cache)\n",
> + mp_nocache_ops);
> if (do_one_mempool_test(mp_nocache, cores, 1) < 0)
> goto err;
>
> @@ -431,7 +555,8 @@ do_all_mempool_perf_tests(unsigned int cores)
> err:
> rte_mempool_free(mp_cache);
> rte_mempool_free(mp_nocache);
> - rte_mempool_free(default_pool);
> + rte_mempool_free(default_pool_cache);
> + rte_mempool_free(default_pool_nocache);
> return ret;
> }
>
^ permalink raw reply [flat|nested] 22+ messages in thread
* RE: [PATCH] mempool perf test: test random bulk sizes
2025-03-30 8:29 ` Andrew Rybchenko
@ 2025-03-30 8:57 ` Morten Brørup
0 siblings, 0 replies; 22+ messages in thread
From: Morten Brørup @ 2025-03-30 8:57 UTC (permalink / raw)
To: Andrew Rybchenko; +Cc: Bruce Richardson, dev
> From: Andrew Rybchenko [mailto:andrew.rybchenko@oktetlabs.ru]
> Sent: Sunday, 30 March 2025 10.29
>
> On 2/28/25 19:48, Morten Brørup wrote:
> > Bulk requests to get or put objects in a mempool often vary in size.
> > A series of tests with pseudo random request sizes, to mitigate the
> > benefits of the CPU's dynamic branch predictor, was added.
> >
> > Also, various other minor changes:
> > - Improved the output formatting for readability.
> > - Added test for the "default" mempool with cache.
> > - Skip the tests for the "default" mempool, if it happens to use the
> same
> > driver (i.e. operations) as already tested.
> > - Replaced bare use of "unsigned" with "unsigned int",
> > to make checkpatches happy.
>
> IMHO, it would be much better and easier to review if all above changes
> are done one by one in separate patches.
>
> >
> > Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
>
> Overall the idea looks good, so
> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
>
> but I'd be thankful if you split the patch.
OK.
I'll post a v2 with changes one by one in a series of patches.
<sarcasm about the write-only mailing list>
Let's hope someone else finds the series of simple patches sufficiently readable to care to review. ;-)
</sarcasm>
Thanks, Andrew.
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v2 0/4] mempool perf test: test random bulk sizes
2025-02-28 16:48 [PATCH] mempool perf test: test random bulk sizes Morten Brørup
2025-03-13 8:23 ` Morten Brørup
2025-03-30 8:29 ` Andrew Rybchenko
@ 2025-03-31 10:03 ` Morten Brørup
2025-03-31 10:03 ` [PATCH v2 1/4] mempool perf test: replace bare unsigned with unsigned int Morten Brørup
` (4 more replies)
2025-04-01 15:00 ` Morten Brørup
2025-04-01 15:02 ` [PATCH v3 0/4] " Morten Brørup
4 siblings, 5 replies; 22+ messages in thread
From: Morten Brørup @ 2025-03-31 10:03 UTC (permalink / raw)
To: Andrew Rybchenko, Bruce Richardson, dev; +Cc: Morten Brørup
Bulk requests to get or put objects in a mempool often vary in size.
A series of tests with pseudo random request sizes, to mitigate the
benefits of the CPU's dynamic branch predictor, was added.
Also, various other minor changes:
- Improved the output formatting for readability.
- Added test for the "default" mempool with cache.
- Skip the tests for the "default" mempool, if it happens to use the same
driver (i.e. operations) as already tested.
- Replaced bare use of "unsigned" with "unsigned int",
to make checkpatches happy.
v2:
* Split patch into individual patches. (Andrew Rybchenko)
Morten Brørup (4):
mempool perf test: replace bare unsigned with unsigned int
mempool perf test: test default mempool with cache
mempool perf test: improve output readability
mempool perf test: test random bulk sizes
app/test/test_mempool_perf.c | 222 +++++++++++++++++++++++++++--------
1 file changed, 173 insertions(+), 49 deletions(-)
--
2.43.0
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v2 1/4] mempool perf test: replace bare unsigned with unsigned int
2025-03-31 10:03 ` [PATCH v2 0/4] " Morten Brørup
@ 2025-03-31 10:03 ` Morten Brørup
2025-03-31 10:03 ` [PATCH v2 2/4] mempool perf test: test default mempool with cache Morten Brørup
` (3 subsequent siblings)
4 siblings, 0 replies; 22+ messages in thread
From: Morten Brørup @ 2025-03-31 10:03 UTC (permalink / raw)
To: Andrew Rybchenko, Bruce Richardson, dev; +Cc: Morten Brørup
Updated old code using bare "unsigned" with "unsigned int".
Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
---
app/test/test_mempool_perf.c | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/app/test/test_mempool_perf.c b/app/test/test_mempool_perf.c
index 4dd74ef75a..d4271a5ef9 100644
--- a/app/test/test_mempool_perf.c
+++ b/app/test/test_mempool_perf.c
@@ -89,16 +89,16 @@
} while (0)
static int use_external_cache;
-static unsigned external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
+static unsigned int external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
static RTE_ATOMIC(uint32_t) synchro;
/* number of objects in one bulk operation (get or put) */
-static unsigned n_get_bulk;
-static unsigned n_put_bulk;
+static unsigned int n_get_bulk;
+static unsigned int n_put_bulk;
/* number of objects retrieved from mempool before putting them back */
-static unsigned n_keep;
+static unsigned int n_keep;
/* true if we want to test with constant n_get_bulk and n_put_bulk */
static int use_constant_values;
@@ -118,7 +118,7 @@ static struct mempool_test_stats stats[RTE_MAX_LCORE];
*/
static void
my_obj_init(struct rte_mempool *mp, __rte_unused void *arg,
- void *obj, unsigned i)
+ void *obj, unsigned int i)
{
uint32_t *objnum = obj;
memset(obj, 0, mp->elt_size);
@@ -163,7 +163,7 @@ static int
per_lcore_mempool_test(void *arg)
{
struct rte_mempool *mp = arg;
- unsigned lcore_id = rte_lcore_id();
+ unsigned int lcore_id = rte_lcore_id();
int ret = 0;
uint64_t start_cycles, end_cycles;
uint64_t time_diff = 0, hz = rte_get_timer_hz();
@@ -246,10 +246,10 @@ per_lcore_mempool_test(void *arg)
static int
launch_cores(struct rte_mempool *mp, unsigned int cores)
{
- unsigned lcore_id;
+ unsigned int lcore_id;
uint64_t rate;
int ret;
- unsigned cores_save = cores;
+ unsigned int cores_save = cores;
double hz = rte_get_timer_hz();
rte_atomic_store_explicit(&synchro, 0, rte_memory_order_relaxed);
@@ -260,7 +260,7 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u "
"n_put_bulk=%u n_keep=%u constant_n=%u ",
use_external_cache ?
- external_cache_size : (unsigned) mp->cache_size,
+ external_cache_size : (unsigned int) mp->cache_size,
cores, n_get_bulk, n_put_bulk, n_keep, use_constant_values);
if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) {
@@ -315,9 +315,9 @@ do_one_mempool_test(struct rte_mempool *mp, unsigned int cores, int external_cac
unsigned int bulk_tab_put[] = { 1, 4, CACHE_LINE_BURST, 32, 64, 128, 256,
RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
unsigned int keep_tab[] = { 32, 128, 512, 2048, 8192, 32768, 0 };
- unsigned *get_bulk_ptr;
- unsigned *put_bulk_ptr;
- unsigned *keep_ptr;
+ unsigned int *get_bulk_ptr;
+ unsigned int *put_bulk_ptr;
+ unsigned int *keep_ptr;
int ret;
for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
--
2.43.0
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v2 2/4] mempool perf test: test default mempool with cache
2025-03-31 10:03 ` [PATCH v2 0/4] " Morten Brørup
2025-03-31 10:03 ` [PATCH v2 1/4] mempool perf test: replace bare unsigned with unsigned int Morten Brørup
@ 2025-03-31 10:03 ` Morten Brørup
2025-03-31 10:03 ` [PATCH v2 3/4] mempool perf test: improve output readability Morten Brørup
` (2 subsequent siblings)
4 siblings, 0 replies; 22+ messages in thread
From: Morten Brørup @ 2025-03-31 10:03 UTC (permalink / raw)
To: Andrew Rybchenko, Bruce Richardson, dev; +Cc: Morten Brørup
Added test for the "default" mempool with cache.
Skip the tests for the "default" mempool, if it happens to use the same
driver (i.e. operations) as already tested.
Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
---
app/test/test_mempool_perf.c | 84 +++++++++++++++++++++++++-----------
1 file changed, 59 insertions(+), 25 deletions(-)
diff --git a/app/test/test_mempool_perf.c b/app/test/test_mempool_perf.c
index d4271a5ef9..3594d81888 100644
--- a/app/test/test_mempool_perf.c
+++ b/app/test/test_mempool_perf.c
@@ -354,7 +354,10 @@ do_all_mempool_perf_tests(unsigned int cores)
{
struct rte_mempool *mp_cache = NULL;
struct rte_mempool *mp_nocache = NULL;
- struct rte_mempool *default_pool = NULL;
+ struct rte_mempool *default_pool_cache = NULL;
+ struct rte_mempool *default_pool_nocache = NULL;
+ const char *mp_cache_ops;
+ const char *mp_nocache_ops;
const char *default_pool_ops;
int ret = -1;
@@ -368,6 +371,7 @@ do_all_mempool_perf_tests(unsigned int cores)
printf("cannot allocate mempool (without cache)\n");
goto err;
}
+ mp_nocache_ops = rte_mempool_get_ops(mp_nocache->ops_index)->name;
/* create a mempool (with cache) */
mp_cache = rte_mempool_create("perf_test_cache", MEMPOOL_SIZE,
@@ -380,47 +384,76 @@ do_all_mempool_perf_tests(unsigned int cores)
printf("cannot allocate mempool (with cache)\n");
goto err;
}
+ mp_cache_ops = rte_mempool_get_ops(mp_cache->ops_index)->name;
default_pool_ops = rte_mbuf_best_mempool_ops();
- /* Create a mempool based on Default handler */
- default_pool = rte_mempool_create_empty("default_pool",
- MEMPOOL_SIZE,
- MEMPOOL_ELT_SIZE,
- 0, 0,
- SOCKET_ID_ANY, 0);
-
- if (default_pool == NULL) {
- printf("cannot allocate %s mempool\n", default_pool_ops);
+
+ /* Create a mempool (without cache) based on Default handler */
+ default_pool_nocache = rte_mempool_create_empty("default_pool_nocache",
+ MEMPOOL_SIZE,
+ MEMPOOL_ELT_SIZE,
+ 0, 0,
+ SOCKET_ID_ANY, 0);
+ if (default_pool_nocache == NULL) {
+ printf("cannot allocate %s mempool (without cache)\n", default_pool_ops);
goto err;
}
-
- if (rte_mempool_set_ops_byname(default_pool, default_pool_ops, NULL)
- < 0) {
+ if (rte_mempool_set_ops_byname(default_pool_nocache, default_pool_ops, NULL) < 0) {
printf("cannot set %s handler\n", default_pool_ops);
goto err;
}
-
- if (rte_mempool_populate_default(default_pool) < 0) {
+ if (rte_mempool_populate_default(default_pool_nocache) < 0) {
printf("cannot populate %s mempool\n", default_pool_ops);
goto err;
}
+ rte_mempool_obj_iter(default_pool_nocache, my_obj_init, NULL);
+
+ /* Create a mempool (with cache) based on Default handler */
+ default_pool_cache = rte_mempool_create_empty("default_pool_cache",
+ MEMPOOL_SIZE,
+ MEMPOOL_ELT_SIZE,
+ RTE_MEMPOOL_CACHE_MAX_SIZE, 0,
+ SOCKET_ID_ANY, 0);
+ if (default_pool_cache == NULL) {
+ printf("cannot allocate %s mempool (with cache)\n", default_pool_ops);
+ goto err;
+ }
+ if (rte_mempool_set_ops_byname(default_pool_cache, default_pool_ops, NULL) < 0) {
+ printf("cannot set %s handler\n", default_pool_ops);
+ goto err;
+ }
+ if (rte_mempool_populate_default(default_pool_cache) < 0) {
+ printf("cannot populate %s mempool\n", default_pool_ops);
+ goto err;
+ }
+ rte_mempool_obj_iter(default_pool_cache, my_obj_init, NULL);
- rte_mempool_obj_iter(default_pool, my_obj_init, NULL);
-
- printf("start performance test (without cache)\n");
+ printf("start performance test (using %s, without cache)\n",
+ mp_nocache_ops);
if (do_one_mempool_test(mp_nocache, cores, 0) < 0)
goto err;
- printf("start performance test for %s (without cache)\n",
- default_pool_ops);
- if (do_one_mempool_test(default_pool, cores, 0) < 0)
- goto err;
+ if (strcmp(default_pool_ops, mp_nocache_ops) != 0) {
+ printf("start performance test for %s (without cache)\n",
+ default_pool_ops);
+ if (do_one_mempool_test(default_pool_nocache, cores, 0) < 0)
+ goto err;
+ }
- printf("start performance test (with cache)\n");
+ printf("start performance test (using %s, with cache)\n",
+ mp_cache_ops);
if (do_one_mempool_test(mp_cache, cores, 0) < 0)
goto err;
- printf("start performance test (with user-owned cache)\n");
+ if (strcmp(default_pool_ops, mp_cache_ops) != 0) {
+ printf("start performance test for %s (with cache)\n",
+ default_pool_ops);
+ if (do_one_mempool_test(default_pool_cache, cores, 0) < 0)
+ goto err;
+ }
+
+ printf("start performance test (using %s, with user-owned cache)\n",
+ mp_nocache_ops);
if (do_one_mempool_test(mp_nocache, cores, 1) < 0)
goto err;
@@ -431,7 +464,8 @@ do_all_mempool_perf_tests(unsigned int cores)
err:
rte_mempool_free(mp_cache);
rte_mempool_free(mp_nocache);
- rte_mempool_free(default_pool);
+ rte_mempool_free(default_pool_cache);
+ rte_mempool_free(default_pool_nocache);
return ret;
}
--
2.43.0
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v2 3/4] mempool perf test: improve output readability
2025-03-31 10:03 ` [PATCH v2 0/4] " Morten Brørup
2025-03-31 10:03 ` [PATCH v2 1/4] mempool perf test: replace bare unsigned with unsigned int Morten Brørup
2025-03-31 10:03 ` [PATCH v2 2/4] mempool perf test: test default mempool with cache Morten Brørup
@ 2025-03-31 10:03 ` Morten Brørup
2025-03-31 10:03 ` [PATCH v2 4/4] mempool perf test: test random bulk sizes Morten Brørup
2025-03-31 14:48 ` [PATCH v2 0/4] " Andrew Rybchenko
4 siblings, 0 replies; 22+ messages in thread
From: Morten Brørup @ 2025-03-31 10:03 UTC (permalink / raw)
To: Andrew Rybchenko, Bruce Richardson, dev; +Cc: Morten Brørup
Improved the output parameter ordering and formatting for readability.
Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
---
app/test/test_mempool_perf.c | 20 +++++++++++---------
1 file changed, 11 insertions(+), 9 deletions(-)
diff --git a/app/test/test_mempool_perf.c b/app/test/test_mempool_perf.c
index 3594d81888..c252f1968a 100644
--- a/app/test/test_mempool_perf.c
+++ b/app/test/test_mempool_perf.c
@@ -257,11 +257,13 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
/* reset stats */
memset(stats, 0, sizeof(stats));
- printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u "
- "n_put_bulk=%u n_keep=%u constant_n=%u ",
- use_external_cache ?
- external_cache_size : (unsigned int) mp->cache_size,
- cores, n_get_bulk, n_put_bulk, n_keep, use_constant_values);
+ printf("mempool_autotest cache=%u cores=%u n_keep=%5u ",
+ use_external_cache ? external_cache_size : (unsigned int) mp->cache_size,
+ cores,
+ n_keep);
+ printf("n_get_bulk=%3u n_put_bulk=%3u constant_n=%u ",
+ n_get_bulk, n_put_bulk,
+ use_constant_values);
if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) {
printf("mempool is not full\n");
@@ -301,7 +303,7 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
rate += (double)stats[lcore_id].enq_count * hz /
(double)stats[lcore_id].duration_cycles;
- printf("rate_persec=%" PRIu64 "\n", rate);
+ printf("rate_persec=%10" PRIu64 "\n", rate);
return 0;
}
@@ -320,9 +322,9 @@ do_one_mempool_test(struct rte_mempool *mp, unsigned int cores, int external_cac
unsigned int *keep_ptr;
int ret;
- for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
- for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) {
- for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
+ for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
+ for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
+ for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) {
if (*keep_ptr < *get_bulk_ptr || *keep_ptr < *put_bulk_ptr)
continue;
--
2.43.0
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v2 4/4] mempool perf test: test random bulk sizes
2025-03-31 10:03 ` [PATCH v2 0/4] " Morten Brørup
` (2 preceding siblings ...)
2025-03-31 10:03 ` [PATCH v2 3/4] mempool perf test: improve output readability Morten Brørup
@ 2025-03-31 10:03 ` Morten Brørup
2025-03-31 14:48 ` Andrew Rybchenko
2025-03-31 14:48 ` [PATCH v2 0/4] " Andrew Rybchenko
4 siblings, 1 reply; 22+ messages in thread
From: Morten Brørup @ 2025-03-31 10:03 UTC (permalink / raw)
To: Andrew Rybchenko, Bruce Richardson, dev; +Cc: Morten Brørup
Bulk requests to get or put objects in a mempool often vary in size.
A series of tests with pseudo random request sizes, to mitigate the
benefits of the CPU's dynamic branch predictor, was added.
Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
---
app/test/test_mempool_perf.c | 102 ++++++++++++++++++++++++++++++++---
1 file changed, 95 insertions(+), 7 deletions(-)
diff --git a/app/test/test_mempool_perf.c b/app/test/test_mempool_perf.c
index c252f1968a..40ecaa5049 100644
--- a/app/test/test_mempool_perf.c
+++ b/app/test/test_mempool_perf.c
@@ -33,6 +33,13 @@
* Mempool performance
* =======
*
+ * Each core get *n_keep* objects per bulk of a pseudorandom number
+ * between 1 and *n_max_bulk*.
+ * Objects are put back in the pool per bulk of a similar pseudorandom number.
+ * Note: The very low entropy of the randomization algorithm is harmless, because
+ * the sole purpose of randomization is to prevent the CPU's dynamic branch
+ * predictor from enhancing the test results.
+ *
* Each core get *n_keep* objects per bulk of *n_get_bulk*. Then,
* objects are put back in the pool per bulk of *n_put_bulk*.
*
@@ -52,7 +59,12 @@
* - Two cores with user-owned cache
* - Max. cores with user-owned cache
*
- * - Bulk size (*n_get_bulk*, *n_put_bulk*)
+ * - Pseudorandom max bulk size (*n_max_bulk*)
+ *
+ * - Max bulk from CACHE_LINE_BURST to 256, and RTE_MEMPOOL_CACHE_MAX_SIZE,
+ * where CACHE_LINE_BURST is the number of pointers fitting into one CPU cache line.
+ *
+ * - Fixed bulk size (*n_get_bulk*, *n_put_bulk*)
*
* - Bulk get from 1 to 256, and RTE_MEMPOOL_CACHE_MAX_SIZE
* - Bulk put from 1 to 256, and RTE_MEMPOOL_CACHE_MAX_SIZE
@@ -93,6 +105,9 @@ static unsigned int external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
static RTE_ATOMIC(uint32_t) synchro;
+/* max random number of objects in one bulk operation (get and put) */
+static unsigned int n_max_bulk;
+
/* number of objects in one bulk operation (get or put) */
static unsigned int n_get_bulk;
static unsigned int n_put_bulk;
@@ -159,6 +174,50 @@ test_loop(struct rte_mempool *mp, struct rte_mempool_cache *cache,
return 0;
}
+static __rte_always_inline int
+test_loop_random(struct rte_mempool *mp, struct rte_mempool_cache *cache,
+ unsigned int x_keep, unsigned int x_max_bulk)
+{
+ alignas(RTE_CACHE_LINE_SIZE) void *obj_table[MAX_KEEP];
+ unsigned int idx;
+ unsigned int i;
+ unsigned int r = 0;
+ unsigned int x_bulk;
+ int ret;
+
+ for (i = 0; likely(i < (N / x_keep)); i++) {
+ /* get x_keep objects by bulk of random [1 .. x_max_bulk] */
+ for (idx = 0; idx < x_keep; idx += x_bulk, r++) {
+ /* Generate a pseudorandom number [1 .. x_max_bulk]. */
+ x_bulk = ((r ^ (r >> 2) ^ (r << 3)) & (x_max_bulk - 1)) + 1;
+ if (unlikely(idx + x_bulk > x_keep))
+ x_bulk = x_keep - idx;
+ ret = rte_mempool_generic_get(mp,
+ &obj_table[idx],
+ x_bulk,
+ cache);
+ if (unlikely(ret < 0)) {
+ rte_mempool_dump(stdout, mp);
+ return ret;
+ }
+ }
+
+ /* put the objects back by bulk of random [1 .. x_max_bulk] */
+ for (idx = 0; idx < x_keep; idx += x_bulk, r++) {
+ /* Generate a pseudorandom number [1 .. x_max_bulk]. */
+ x_bulk = ((r ^ (r >> 2) ^ (r << 3)) & (x_max_bulk - 1)) + 1;
+ if (unlikely(idx + x_bulk > x_keep))
+ x_bulk = x_keep - idx;
+ rte_mempool_generic_put(mp,
+ &obj_table[idx],
+ x_bulk,
+ cache);
+ }
+ }
+
+ return 0;
+}
+
static int
per_lcore_mempool_test(void *arg)
{
@@ -181,9 +240,9 @@ per_lcore_mempool_test(void *arg)
}
/* n_get_bulk and n_put_bulk must be divisors of n_keep */
- if (((n_keep / n_get_bulk) * n_get_bulk) != n_keep)
+ if (!n_max_bulk && (((n_keep / n_get_bulk) * n_get_bulk) != n_keep))
GOTO_ERR(ret, out);
- if (((n_keep / n_put_bulk) * n_put_bulk) != n_keep)
+ if (!n_max_bulk && (((n_keep / n_put_bulk) * n_put_bulk) != n_keep))
GOTO_ERR(ret, out);
/* for constant n, n_get_bulk and n_put_bulk must be the same */
if (use_constant_values && n_put_bulk != n_get_bulk)
@@ -200,7 +259,9 @@ per_lcore_mempool_test(void *arg)
start_cycles = rte_get_timer_cycles();
while (time_diff/hz < TIME_S) {
- if (!use_constant_values)
+ if (n_max_bulk)
+ ret = test_loop_random(mp, cache, n_keep, n_max_bulk);
+ else if (!use_constant_values)
ret = test_loop(mp, cache, n_keep, n_get_bulk, n_put_bulk);
else if (n_get_bulk == 1)
ret = test_loop(mp, cache, n_keep, 1, 1);
@@ -261,9 +322,13 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
use_external_cache ? external_cache_size : (unsigned int) mp->cache_size,
cores,
n_keep);
- printf("n_get_bulk=%3u n_put_bulk=%3u constant_n=%u ",
- n_get_bulk, n_put_bulk,
- use_constant_values);
+ if (n_max_bulk)
+ printf("n_max_bulk=%3u ",
+ n_max_bulk);
+ else
+ printf("n_get_bulk=%3u n_put_bulk=%3u constant_n=%u ",
+ n_get_bulk, n_put_bulk,
+ use_constant_values);
if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) {
printf("mempool is not full\n");
@@ -312,16 +377,37 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
static int
do_one_mempool_test(struct rte_mempool *mp, unsigned int cores, int external_cache)
{
+ unsigned int bulk_tab_max[] = { CACHE_LINE_BURST, 32, 64, 128, 256,
+ RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
unsigned int bulk_tab_get[] = { 1, 4, CACHE_LINE_BURST, 32, 64, 128, 256,
RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
unsigned int bulk_tab_put[] = { 1, 4, CACHE_LINE_BURST, 32, 64, 128, 256,
RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
unsigned int keep_tab[] = { 32, 128, 512, 2048, 8192, 32768, 0 };
+ unsigned int *max_bulk_ptr;
unsigned int *get_bulk_ptr;
unsigned int *put_bulk_ptr;
unsigned int *keep_ptr;
int ret;
+ for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
+ for (max_bulk_ptr = bulk_tab_max; *max_bulk_ptr; max_bulk_ptr++) {
+
+ if (*keep_ptr < *max_bulk_ptr)
+ continue;
+
+ use_external_cache = external_cache;
+ use_constant_values = 0;
+ n_max_bulk = *max_bulk_ptr;
+ n_get_bulk = 0;
+ n_put_bulk = 0;
+ n_keep = *keep_ptr;
+ ret = launch_cores(mp, cores);
+ if (ret < 0)
+ return -1;
+ }
+ }
+
for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) {
@@ -331,6 +417,7 @@ do_one_mempool_test(struct rte_mempool *mp, unsigned int cores, int external_cac
use_external_cache = external_cache;
use_constant_values = 0;
+ n_max_bulk = 0;
n_get_bulk = *get_bulk_ptr;
n_put_bulk = *put_bulk_ptr;
n_keep = *keep_ptr;
@@ -348,6 +435,7 @@ do_one_mempool_test(struct rte_mempool *mp, unsigned int cores, int external_cac
}
}
}
+
return 0;
}
--
2.43.0
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v2 0/4] mempool perf test: test random bulk sizes
2025-03-31 10:03 ` [PATCH v2 0/4] " Morten Brørup
` (3 preceding siblings ...)
2025-03-31 10:03 ` [PATCH v2 4/4] mempool perf test: test random bulk sizes Morten Brørup
@ 2025-03-31 14:48 ` Andrew Rybchenko
4 siblings, 0 replies; 22+ messages in thread
From: Andrew Rybchenko @ 2025-03-31 14:48 UTC (permalink / raw)
To: Morten Brørup, Bruce Richardson, dev
On 3/31/25 13:03, Morten Brørup wrote:
> Bulk requests to get or put objects in a mempool often vary in size.
> A series of tests with pseudo random request sizes, to mitigate the
> benefits of the CPU's dynamic branch predictor, was added.
>
> Also, various other minor changes:
> - Improved the output formatting for readability.
> - Added test for the "default" mempool with cache.
> - Skip the tests for the "default" mempool, if it happens to use the same
> driver (i.e. operations) as already tested.
> - Replaced bare use of "unsigned" with "unsigned int",
> to make checkpatches happy.
>
> v2:
> * Split patch into individual patches. (Andrew Rybchenko)
Thanks Morten, LGTM except a couple of nits
>
> Morten Brørup (4):
> mempool perf test: replace bare unsigned with unsigned int
> mempool perf test: test default mempool with cache
> mempool perf test: improve output readability
> mempool perf test: test random bulk sizes
>
> app/test/test_mempool_perf.c | 222 +++++++++++++++++++++++++++--------
> 1 file changed, 173 insertions(+), 49 deletions(-)
>
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v2 4/4] mempool perf test: test random bulk sizes
2025-03-31 10:03 ` [PATCH v2 4/4] mempool perf test: test random bulk sizes Morten Brørup
@ 2025-03-31 14:48 ` Andrew Rybchenko
0 siblings, 0 replies; 22+ messages in thread
From: Andrew Rybchenko @ 2025-03-31 14:48 UTC (permalink / raw)
To: Morten Brørup, Bruce Richardson, dev
On 3/31/25 13:03, Morten Brørup wrote:
> Bulk requests to get or put objects in a mempool often vary in size.
> A series of tests with pseudo random request sizes, to mitigate the
> benefits of the CPU's dynamic branch predictor, was added.
>
> Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
[snip]
> @@ -181,9 +240,9 @@ per_lcore_mempool_test(void *arg)
> }
>
> /* n_get_bulk and n_put_bulk must be divisors of n_keep */
> - if (((n_keep / n_get_bulk) * n_get_bulk) != n_keep)
> + if (!n_max_bulk && (((n_keep / n_get_bulk) * n_get_bulk) != n_keep))
IMHO n_max_bulk == 0 would be easier to read and as far as I remember
DPDK coding style recommends the same style.
> GOTO_ERR(ret, out);
> - if (((n_keep / n_put_bulk) * n_put_bulk) != n_keep)
> + if (!n_max_bulk && (((n_keep / n_put_bulk) * n_put_bulk) != n_keep))
same
> GOTO_ERR(ret, out);
> /* for constant n, n_get_bulk and n_put_bulk must be the same */
> if (use_constant_values && n_put_bulk != n_get_bulk)
> @@ -200,7 +259,9 @@ per_lcore_mempool_test(void *arg)
> start_cycles = rte_get_timer_cycles();
>
> while (time_diff/hz < TIME_S) {
> - if (!use_constant_values)
> + if (n_max_bulk)
n_max_bulk != 0
as DPDK coding style says
> + ret = test_loop_random(mp, cache, n_keep, n_max_bulk);
> + else if (!use_constant_values)
> ret = test_loop(mp, cache, n_keep, n_get_bulk, n_put_bulk);
> else if (n_get_bulk == 1)
> ret = test_loop(mp, cache, n_keep, 1, 1);
> @@ -261,9 +322,13 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
> use_external_cache ? external_cache_size : (unsigned int) mp->cache_size,
> cores,
> n_keep);
> - printf("n_get_bulk=%3u n_put_bulk=%3u constant_n=%u ",
> - n_get_bulk, n_put_bulk,
> - use_constant_values);
> + if (n_max_bulk)
same
> + printf("n_max_bulk=%3u ",
> + n_max_bulk);
> + else
> + printf("n_get_bulk=%3u n_put_bulk=%3u constant_n=%u ",
> + n_get_bulk, n_put_bulk,
> + use_constant_values);
>
> if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) {
> printf("mempool is not full\n");
[snip]
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v2 0/4] mempool perf test: test random bulk sizes
2025-02-28 16:48 [PATCH] mempool perf test: test random bulk sizes Morten Brørup
` (2 preceding siblings ...)
2025-03-31 10:03 ` [PATCH v2 0/4] " Morten Brørup
@ 2025-04-01 15:00 ` Morten Brørup
2025-04-01 15:00 ` [PATCH v2 1/4] mempool perf test: replace bare unsigned with unsigned int Morten Brørup
` (3 more replies)
2025-04-01 15:02 ` [PATCH v3 0/4] " Morten Brørup
4 siblings, 4 replies; 22+ messages in thread
From: Morten Brørup @ 2025-04-01 15:00 UTC (permalink / raw)
To: Andrew Rybchenko, Bruce Richardson, dev; +Cc: Morten Brørup
Bulk requests to get or put objects in a mempool often vary in size.
A series of tests with pseudo random request sizes, to mitigate the
benefits of the CPU's dynamic branch predictor, was added.
Also, various other minor changes:
- Improved the output formatting for readability.
- Added test for the "default" mempool with cache.
- Skip the tests for the "default" mempool, if it happens to use the same
driver (i.e. operations) as already tested.
- Replaced bare use of "unsigned" with "unsigned int",
to make checkpatches happy.
v3:
* Fix code style for checking variable is set or zero. (Andrew Rybchenko)
v2:
* Split patch into individual patches. (Andrew Rybchenko)
Morten Brørup (4):
mempool perf test: replace bare unsigned with unsigned int
mempool perf test: test default mempool with cache
mempool perf test: improve output readability
mempool perf test: test random bulk sizes
app/test/test_mempool_perf.c | 222 +++++++++++++++++++++++++++--------
1 file changed, 173 insertions(+), 49 deletions(-)
--
2.43.0
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v2 1/4] mempool perf test: replace bare unsigned with unsigned int
2025-04-01 15:00 ` Morten Brørup
@ 2025-04-01 15:00 ` Morten Brørup
2025-04-01 15:00 ` [PATCH v2 2/4] mempool perf test: test default mempool with cache Morten Brørup
` (2 subsequent siblings)
3 siblings, 0 replies; 22+ messages in thread
From: Morten Brørup @ 2025-04-01 15:00 UTC (permalink / raw)
To: Andrew Rybchenko, Bruce Richardson, dev; +Cc: Morten Brørup
Updated old code using bare "unsigned" with "unsigned int".
Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
---
app/test/test_mempool_perf.c | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/app/test/test_mempool_perf.c b/app/test/test_mempool_perf.c
index 4dd74ef75a..d4271a5ef9 100644
--- a/app/test/test_mempool_perf.c
+++ b/app/test/test_mempool_perf.c
@@ -89,16 +89,16 @@
} while (0)
static int use_external_cache;
-static unsigned external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
+static unsigned int external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
static RTE_ATOMIC(uint32_t) synchro;
/* number of objects in one bulk operation (get or put) */
-static unsigned n_get_bulk;
-static unsigned n_put_bulk;
+static unsigned int n_get_bulk;
+static unsigned int n_put_bulk;
/* number of objects retrieved from mempool before putting them back */
-static unsigned n_keep;
+static unsigned int n_keep;
/* true if we want to test with constant n_get_bulk and n_put_bulk */
static int use_constant_values;
@@ -118,7 +118,7 @@ static struct mempool_test_stats stats[RTE_MAX_LCORE];
*/
static void
my_obj_init(struct rte_mempool *mp, __rte_unused void *arg,
- void *obj, unsigned i)
+ void *obj, unsigned int i)
{
uint32_t *objnum = obj;
memset(obj, 0, mp->elt_size);
@@ -163,7 +163,7 @@ static int
per_lcore_mempool_test(void *arg)
{
struct rte_mempool *mp = arg;
- unsigned lcore_id = rte_lcore_id();
+ unsigned int lcore_id = rte_lcore_id();
int ret = 0;
uint64_t start_cycles, end_cycles;
uint64_t time_diff = 0, hz = rte_get_timer_hz();
@@ -246,10 +246,10 @@ per_lcore_mempool_test(void *arg)
static int
launch_cores(struct rte_mempool *mp, unsigned int cores)
{
- unsigned lcore_id;
+ unsigned int lcore_id;
uint64_t rate;
int ret;
- unsigned cores_save = cores;
+ unsigned int cores_save = cores;
double hz = rte_get_timer_hz();
rte_atomic_store_explicit(&synchro, 0, rte_memory_order_relaxed);
@@ -260,7 +260,7 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u "
"n_put_bulk=%u n_keep=%u constant_n=%u ",
use_external_cache ?
- external_cache_size : (unsigned) mp->cache_size,
+ external_cache_size : (unsigned int) mp->cache_size,
cores, n_get_bulk, n_put_bulk, n_keep, use_constant_values);
if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) {
@@ -315,9 +315,9 @@ do_one_mempool_test(struct rte_mempool *mp, unsigned int cores, int external_cac
unsigned int bulk_tab_put[] = { 1, 4, CACHE_LINE_BURST, 32, 64, 128, 256,
RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
unsigned int keep_tab[] = { 32, 128, 512, 2048, 8192, 32768, 0 };
- unsigned *get_bulk_ptr;
- unsigned *put_bulk_ptr;
- unsigned *keep_ptr;
+ unsigned int *get_bulk_ptr;
+ unsigned int *put_bulk_ptr;
+ unsigned int *keep_ptr;
int ret;
for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
--
2.43.0
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v2 2/4] mempool perf test: test default mempool with cache
2025-04-01 15:00 ` Morten Brørup
2025-04-01 15:00 ` [PATCH v2 1/4] mempool perf test: replace bare unsigned with unsigned int Morten Brørup
@ 2025-04-01 15:00 ` Morten Brørup
2025-04-01 15:00 ` [PATCH v2 3/4] mempool perf test: improve output readability Morten Brørup
2025-04-01 15:00 ` [PATCH v2 4/4] mempool perf test: test random bulk sizes Morten Brørup
3 siblings, 0 replies; 22+ messages in thread
From: Morten Brørup @ 2025-04-01 15:00 UTC (permalink / raw)
To: Andrew Rybchenko, Bruce Richardson, dev; +Cc: Morten Brørup
Added test for the "default" mempool with cache.
Skip the tests for the "default" mempool, if it happens to use the same
driver (i.e. operations) as already tested.
Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
---
app/test/test_mempool_perf.c | 84 +++++++++++++++++++++++++-----------
1 file changed, 59 insertions(+), 25 deletions(-)
diff --git a/app/test/test_mempool_perf.c b/app/test/test_mempool_perf.c
index d4271a5ef9..3594d81888 100644
--- a/app/test/test_mempool_perf.c
+++ b/app/test/test_mempool_perf.c
@@ -354,7 +354,10 @@ do_all_mempool_perf_tests(unsigned int cores)
{
struct rte_mempool *mp_cache = NULL;
struct rte_mempool *mp_nocache = NULL;
- struct rte_mempool *default_pool = NULL;
+ struct rte_mempool *default_pool_cache = NULL;
+ struct rte_mempool *default_pool_nocache = NULL;
+ const char *mp_cache_ops;
+ const char *mp_nocache_ops;
const char *default_pool_ops;
int ret = -1;
@@ -368,6 +371,7 @@ do_all_mempool_perf_tests(unsigned int cores)
printf("cannot allocate mempool (without cache)\n");
goto err;
}
+ mp_nocache_ops = rte_mempool_get_ops(mp_nocache->ops_index)->name;
/* create a mempool (with cache) */
mp_cache = rte_mempool_create("perf_test_cache", MEMPOOL_SIZE,
@@ -380,47 +384,76 @@ do_all_mempool_perf_tests(unsigned int cores)
printf("cannot allocate mempool (with cache)\n");
goto err;
}
+ mp_cache_ops = rte_mempool_get_ops(mp_cache->ops_index)->name;
default_pool_ops = rte_mbuf_best_mempool_ops();
- /* Create a mempool based on Default handler */
- default_pool = rte_mempool_create_empty("default_pool",
- MEMPOOL_SIZE,
- MEMPOOL_ELT_SIZE,
- 0, 0,
- SOCKET_ID_ANY, 0);
-
- if (default_pool == NULL) {
- printf("cannot allocate %s mempool\n", default_pool_ops);
+
+ /* Create a mempool (without cache) based on Default handler */
+ default_pool_nocache = rte_mempool_create_empty("default_pool_nocache",
+ MEMPOOL_SIZE,
+ MEMPOOL_ELT_SIZE,
+ 0, 0,
+ SOCKET_ID_ANY, 0);
+ if (default_pool_nocache == NULL) {
+ printf("cannot allocate %s mempool (without cache)\n", default_pool_ops);
goto err;
}
-
- if (rte_mempool_set_ops_byname(default_pool, default_pool_ops, NULL)
- < 0) {
+ if (rte_mempool_set_ops_byname(default_pool_nocache, default_pool_ops, NULL) < 0) {
printf("cannot set %s handler\n", default_pool_ops);
goto err;
}
-
- if (rte_mempool_populate_default(default_pool) < 0) {
+ if (rte_mempool_populate_default(default_pool_nocache) < 0) {
printf("cannot populate %s mempool\n", default_pool_ops);
goto err;
}
+ rte_mempool_obj_iter(default_pool_nocache, my_obj_init, NULL);
+
+ /* Create a mempool (with cache) based on Default handler */
+ default_pool_cache = rte_mempool_create_empty("default_pool_cache",
+ MEMPOOL_SIZE,
+ MEMPOOL_ELT_SIZE,
+ RTE_MEMPOOL_CACHE_MAX_SIZE, 0,
+ SOCKET_ID_ANY, 0);
+ if (default_pool_cache == NULL) {
+ printf("cannot allocate %s mempool (with cache)\n", default_pool_ops);
+ goto err;
+ }
+ if (rte_mempool_set_ops_byname(default_pool_cache, default_pool_ops, NULL) < 0) {
+ printf("cannot set %s handler\n", default_pool_ops);
+ goto err;
+ }
+ if (rte_mempool_populate_default(default_pool_cache) < 0) {
+ printf("cannot populate %s mempool\n", default_pool_ops);
+ goto err;
+ }
+ rte_mempool_obj_iter(default_pool_cache, my_obj_init, NULL);
- rte_mempool_obj_iter(default_pool, my_obj_init, NULL);
-
- printf("start performance test (without cache)\n");
+ printf("start performance test (using %s, without cache)\n",
+ mp_nocache_ops);
if (do_one_mempool_test(mp_nocache, cores, 0) < 0)
goto err;
- printf("start performance test for %s (without cache)\n",
- default_pool_ops);
- if (do_one_mempool_test(default_pool, cores, 0) < 0)
- goto err;
+ if (strcmp(default_pool_ops, mp_nocache_ops) != 0) {
+ printf("start performance test for %s (without cache)\n",
+ default_pool_ops);
+ if (do_one_mempool_test(default_pool_nocache, cores, 0) < 0)
+ goto err;
+ }
- printf("start performance test (with cache)\n");
+ printf("start performance test (using %s, with cache)\n",
+ mp_cache_ops);
if (do_one_mempool_test(mp_cache, cores, 0) < 0)
goto err;
- printf("start performance test (with user-owned cache)\n");
+ if (strcmp(default_pool_ops, mp_cache_ops) != 0) {
+ printf("start performance test for %s (with cache)\n",
+ default_pool_ops);
+ if (do_one_mempool_test(default_pool_cache, cores, 0) < 0)
+ goto err;
+ }
+
+ printf("start performance test (using %s, with user-owned cache)\n",
+ mp_nocache_ops);
if (do_one_mempool_test(mp_nocache, cores, 1) < 0)
goto err;
@@ -431,7 +464,8 @@ do_all_mempool_perf_tests(unsigned int cores)
err:
rte_mempool_free(mp_cache);
rte_mempool_free(mp_nocache);
- rte_mempool_free(default_pool);
+ rte_mempool_free(default_pool_cache);
+ rte_mempool_free(default_pool_nocache);
return ret;
}
--
2.43.0
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v2 3/4] mempool perf test: improve output readability
2025-04-01 15:00 ` Morten Brørup
2025-04-01 15:00 ` [PATCH v2 1/4] mempool perf test: replace bare unsigned with unsigned int Morten Brørup
2025-04-01 15:00 ` [PATCH v2 2/4] mempool perf test: test default mempool with cache Morten Brørup
@ 2025-04-01 15:00 ` Morten Brørup
2025-04-01 15:00 ` [PATCH v2 4/4] mempool perf test: test random bulk sizes Morten Brørup
3 siblings, 0 replies; 22+ messages in thread
From: Morten Brørup @ 2025-04-01 15:00 UTC (permalink / raw)
To: Andrew Rybchenko, Bruce Richardson, dev; +Cc: Morten Brørup
Improved the output parameter ordering and formatting for readability.
Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
---
app/test/test_mempool_perf.c | 20 +++++++++++---------
1 file changed, 11 insertions(+), 9 deletions(-)
diff --git a/app/test/test_mempool_perf.c b/app/test/test_mempool_perf.c
index 3594d81888..c252f1968a 100644
--- a/app/test/test_mempool_perf.c
+++ b/app/test/test_mempool_perf.c
@@ -257,11 +257,13 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
/* reset stats */
memset(stats, 0, sizeof(stats));
- printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u "
- "n_put_bulk=%u n_keep=%u constant_n=%u ",
- use_external_cache ?
- external_cache_size : (unsigned int) mp->cache_size,
- cores, n_get_bulk, n_put_bulk, n_keep, use_constant_values);
+ printf("mempool_autotest cache=%u cores=%u n_keep=%5u ",
+ use_external_cache ? external_cache_size : (unsigned int) mp->cache_size,
+ cores,
+ n_keep);
+ printf("n_get_bulk=%3u n_put_bulk=%3u constant_n=%u ",
+ n_get_bulk, n_put_bulk,
+ use_constant_values);
if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) {
printf("mempool is not full\n");
@@ -301,7 +303,7 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
rate += (double)stats[lcore_id].enq_count * hz /
(double)stats[lcore_id].duration_cycles;
- printf("rate_persec=%" PRIu64 "\n", rate);
+ printf("rate_persec=%10" PRIu64 "\n", rate);
return 0;
}
@@ -320,9 +322,9 @@ do_one_mempool_test(struct rte_mempool *mp, unsigned int cores, int external_cac
unsigned int *keep_ptr;
int ret;
- for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
- for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) {
- for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
+ for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
+ for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
+ for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) {
if (*keep_ptr < *get_bulk_ptr || *keep_ptr < *put_bulk_ptr)
continue;
--
2.43.0
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v2 4/4] mempool perf test: test random bulk sizes
2025-04-01 15:00 ` Morten Brørup
` (2 preceding siblings ...)
2025-04-01 15:00 ` [PATCH v2 3/4] mempool perf test: improve output readability Morten Brørup
@ 2025-04-01 15:00 ` Morten Brørup
3 siblings, 0 replies; 22+ messages in thread
From: Morten Brørup @ 2025-04-01 15:00 UTC (permalink / raw)
To: Andrew Rybchenko, Bruce Richardson, dev; +Cc: Morten Brørup
Bulk requests to get or put objects in a mempool often vary in size.
A series of tests with pseudo random request sizes, to mitigate the
benefits of the CPU's dynamic branch predictor, was added.
Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
---
app/test/test_mempool_perf.c | 102 ++++++++++++++++++++++++++++++++---
1 file changed, 95 insertions(+), 7 deletions(-)
diff --git a/app/test/test_mempool_perf.c b/app/test/test_mempool_perf.c
index c252f1968a..40ecaa5049 100644
--- a/app/test/test_mempool_perf.c
+++ b/app/test/test_mempool_perf.c
@@ -33,6 +33,13 @@
* Mempool performance
* =======
*
+ * Each core get *n_keep* objects per bulk of a pseudorandom number
+ * between 1 and *n_max_bulk*.
+ * Objects are put back in the pool per bulk of a similar pseudorandom number.
+ * Note: The very low entropy of the randomization algorithm is harmless, because
+ * the sole purpose of randomization is to prevent the CPU's dynamic branch
+ * predictor from enhancing the test results.
+ *
* Each core get *n_keep* objects per bulk of *n_get_bulk*. Then,
* objects are put back in the pool per bulk of *n_put_bulk*.
*
@@ -52,7 +59,12 @@
* - Two cores with user-owned cache
* - Max. cores with user-owned cache
*
- * - Bulk size (*n_get_bulk*, *n_put_bulk*)
+ * - Pseudorandom max bulk size (*n_max_bulk*)
+ *
+ * - Max bulk from CACHE_LINE_BURST to 256, and RTE_MEMPOOL_CACHE_MAX_SIZE,
+ * where CACHE_LINE_BURST is the number of pointers fitting into one CPU cache line.
+ *
+ * - Fixed bulk size (*n_get_bulk*, *n_put_bulk*)
*
* - Bulk get from 1 to 256, and RTE_MEMPOOL_CACHE_MAX_SIZE
* - Bulk put from 1 to 256, and RTE_MEMPOOL_CACHE_MAX_SIZE
@@ -93,6 +105,9 @@ static unsigned int external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
static RTE_ATOMIC(uint32_t) synchro;
+/* max random number of objects in one bulk operation (get and put) */
+static unsigned int n_max_bulk;
+
/* number of objects in one bulk operation (get or put) */
static unsigned int n_get_bulk;
static unsigned int n_put_bulk;
@@ -159,6 +174,50 @@ test_loop(struct rte_mempool *mp, struct rte_mempool_cache *cache,
return 0;
}
+static __rte_always_inline int
+test_loop_random(struct rte_mempool *mp, struct rte_mempool_cache *cache,
+ unsigned int x_keep, unsigned int x_max_bulk)
+{
+ alignas(RTE_CACHE_LINE_SIZE) void *obj_table[MAX_KEEP];
+ unsigned int idx;
+ unsigned int i;
+ unsigned int r = 0;
+ unsigned int x_bulk;
+ int ret;
+
+ for (i = 0; likely(i < (N / x_keep)); i++) {
+ /* get x_keep objects by bulk of random [1 .. x_max_bulk] */
+ for (idx = 0; idx < x_keep; idx += x_bulk, r++) {
+ /* Generate a pseudorandom number [1 .. x_max_bulk]. */
+ x_bulk = ((r ^ (r >> 2) ^ (r << 3)) & (x_max_bulk - 1)) + 1;
+ if (unlikely(idx + x_bulk > x_keep))
+ x_bulk = x_keep - idx;
+ ret = rte_mempool_generic_get(mp,
+ &obj_table[idx],
+ x_bulk,
+ cache);
+ if (unlikely(ret < 0)) {
+ rte_mempool_dump(stdout, mp);
+ return ret;
+ }
+ }
+
+ /* put the objects back by bulk of random [1 .. x_max_bulk] */
+ for (idx = 0; idx < x_keep; idx += x_bulk, r++) {
+ /* Generate a pseudorandom number [1 .. x_max_bulk]. */
+ x_bulk = ((r ^ (r >> 2) ^ (r << 3)) & (x_max_bulk - 1)) + 1;
+ if (unlikely(idx + x_bulk > x_keep))
+ x_bulk = x_keep - idx;
+ rte_mempool_generic_put(mp,
+ &obj_table[idx],
+ x_bulk,
+ cache);
+ }
+ }
+
+ return 0;
+}
+
static int
per_lcore_mempool_test(void *arg)
{
@@ -181,9 +240,9 @@ per_lcore_mempool_test(void *arg)
}
/* n_get_bulk and n_put_bulk must be divisors of n_keep */
- if (((n_keep / n_get_bulk) * n_get_bulk) != n_keep)
+ if (n_max_bulk == 0 && (((n_keep / n_get_bulk) * n_get_bulk) != n_keep))
GOTO_ERR(ret, out);
- if (((n_keep / n_put_bulk) * n_put_bulk) != n_keep)
+ if (n_max_bulk == 0 && (((n_keep / n_put_bulk) * n_put_bulk) != n_keep))
GOTO_ERR(ret, out);
/* for constant n, n_get_bulk and n_put_bulk must be the same */
if (use_constant_values && n_put_bulk != n_get_bulk)
@@ -200,7 +259,9 @@ per_lcore_mempool_test(void *arg)
start_cycles = rte_get_timer_cycles();
while (time_diff/hz < TIME_S) {
- if (!use_constant_values)
+ if (n_max_bulk != 0)
+ ret = test_loop_random(mp, cache, n_keep, n_max_bulk);
+ else if (!use_constant_values)
ret = test_loop(mp, cache, n_keep, n_get_bulk, n_put_bulk);
else if (n_get_bulk == 1)
ret = test_loop(mp, cache, n_keep, 1, 1);
@@ -261,9 +322,13 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
use_external_cache ? external_cache_size : (unsigned int) mp->cache_size,
cores,
n_keep);
- printf("n_get_bulk=%3u n_put_bulk=%3u constant_n=%u ",
- n_get_bulk, n_put_bulk,
- use_constant_values);
+ if (n_max_bulk != 0)
+ printf("n_max_bulk=%3u ",
+ n_max_bulk);
+ else
+ printf("n_get_bulk=%3u n_put_bulk=%3u constant_n=%u ",
+ n_get_bulk, n_put_bulk,
+ use_constant_values);
if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) {
printf("mempool is not full\n");
@@ -312,16 +377,37 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
static int
do_one_mempool_test(struct rte_mempool *mp, unsigned int cores, int external_cache)
{
+ unsigned int bulk_tab_max[] = { CACHE_LINE_BURST, 32, 64, 128, 256,
+ RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
unsigned int bulk_tab_get[] = { 1, 4, CACHE_LINE_BURST, 32, 64, 128, 256,
RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
unsigned int bulk_tab_put[] = { 1, 4, CACHE_LINE_BURST, 32, 64, 128, 256,
RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
unsigned int keep_tab[] = { 32, 128, 512, 2048, 8192, 32768, 0 };
+ unsigned int *max_bulk_ptr;
unsigned int *get_bulk_ptr;
unsigned int *put_bulk_ptr;
unsigned int *keep_ptr;
int ret;
+ for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
+ for (max_bulk_ptr = bulk_tab_max; *max_bulk_ptr; max_bulk_ptr++) {
+
+ if (*keep_ptr < *max_bulk_ptr)
+ continue;
+
+ use_external_cache = external_cache;
+ use_constant_values = 0;
+ n_max_bulk = *max_bulk_ptr;
+ n_get_bulk = 0;
+ n_put_bulk = 0;
+ n_keep = *keep_ptr;
+ ret = launch_cores(mp, cores);
+ if (ret < 0)
+ return -1;
+ }
+ }
+
for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) {
@@ -331,6 +417,7 @@ do_one_mempool_test(struct rte_mempool *mp, unsigned int cores, int external_cac
use_external_cache = external_cache;
use_constant_values = 0;
+ n_max_bulk = 0;
n_get_bulk = *get_bulk_ptr;
n_put_bulk = *put_bulk_ptr;
n_keep = *keep_ptr;
@@ -348,6 +435,7 @@ do_one_mempool_test(struct rte_mempool *mp, unsigned int cores, int external_cac
}
}
}
+
return 0;
}
--
2.43.0
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v3 0/4] mempool perf test: test random bulk sizes
2025-02-28 16:48 [PATCH] mempool perf test: test random bulk sizes Morten Brørup
` (3 preceding siblings ...)
2025-04-01 15:00 ` Morten Brørup
@ 2025-04-01 15:02 ` Morten Brørup
2025-04-01 15:02 ` [PATCH v3 1/4] mempool perf test: replace bare unsigned with unsigned int Morten Brørup
` (3 more replies)
4 siblings, 4 replies; 22+ messages in thread
From: Morten Brørup @ 2025-04-01 15:02 UTC (permalink / raw)
To: Andrew Rybchenko, Bruce Richardson, dev; +Cc: Morten Brørup
Bulk requests to get or put objects in a mempool often vary in size.
A series of tests with pseudo random request sizes, to mitigate the
benefits of the CPU's dynamic branch predictor, was added.
Also, various other minor changes:
- Improved the output formatting for readability.
- Added test for the "default" mempool with cache.
- Skip the tests for the "default" mempool, if it happens to use the same
driver (i.e. operations) as already tested.
- Replaced bare use of "unsigned" with "unsigned int",
to make checkpatches happy.
v3:
* Fix code style for checking variable is set or zero. (Andrew Rybchenko)
v2:
* Split patch into individual patches. (Andrew Rybchenko)
Morten Brørup (4):
mempool perf test: replace bare unsigned with unsigned int
mempool perf test: test default mempool with cache
mempool perf test: improve output readability
mempool perf test: test random bulk sizes
app/test/test_mempool_perf.c | 222 +++++++++++++++++++++++++++--------
1 file changed, 173 insertions(+), 49 deletions(-)
--
2.43.0
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v3 1/4] mempool perf test: replace bare unsigned with unsigned int
2025-04-01 15:02 ` [PATCH v3 0/4] " Morten Brørup
@ 2025-04-01 15:02 ` Morten Brørup
2025-04-01 15:02 ` [PATCH v3 2/4] mempool perf test: test default mempool with cache Morten Brørup
` (2 subsequent siblings)
3 siblings, 0 replies; 22+ messages in thread
From: Morten Brørup @ 2025-04-01 15:02 UTC (permalink / raw)
To: Andrew Rybchenko, Bruce Richardson, dev; +Cc: Morten Brørup
Updated old code using bare "unsigned" with "unsigned int".
Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
---
app/test/test_mempool_perf.c | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/app/test/test_mempool_perf.c b/app/test/test_mempool_perf.c
index 4dd74ef75a..d4271a5ef9 100644
--- a/app/test/test_mempool_perf.c
+++ b/app/test/test_mempool_perf.c
@@ -89,16 +89,16 @@
} while (0)
static int use_external_cache;
-static unsigned external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
+static unsigned int external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
static RTE_ATOMIC(uint32_t) synchro;
/* number of objects in one bulk operation (get or put) */
-static unsigned n_get_bulk;
-static unsigned n_put_bulk;
+static unsigned int n_get_bulk;
+static unsigned int n_put_bulk;
/* number of objects retrieved from mempool before putting them back */
-static unsigned n_keep;
+static unsigned int n_keep;
/* true if we want to test with constant n_get_bulk and n_put_bulk */
static int use_constant_values;
@@ -118,7 +118,7 @@ static struct mempool_test_stats stats[RTE_MAX_LCORE];
*/
static void
my_obj_init(struct rte_mempool *mp, __rte_unused void *arg,
- void *obj, unsigned i)
+ void *obj, unsigned int i)
{
uint32_t *objnum = obj;
memset(obj, 0, mp->elt_size);
@@ -163,7 +163,7 @@ static int
per_lcore_mempool_test(void *arg)
{
struct rte_mempool *mp = arg;
- unsigned lcore_id = rte_lcore_id();
+ unsigned int lcore_id = rte_lcore_id();
int ret = 0;
uint64_t start_cycles, end_cycles;
uint64_t time_diff = 0, hz = rte_get_timer_hz();
@@ -246,10 +246,10 @@ per_lcore_mempool_test(void *arg)
static int
launch_cores(struct rte_mempool *mp, unsigned int cores)
{
- unsigned lcore_id;
+ unsigned int lcore_id;
uint64_t rate;
int ret;
- unsigned cores_save = cores;
+ unsigned int cores_save = cores;
double hz = rte_get_timer_hz();
rte_atomic_store_explicit(&synchro, 0, rte_memory_order_relaxed);
@@ -260,7 +260,7 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u "
"n_put_bulk=%u n_keep=%u constant_n=%u ",
use_external_cache ?
- external_cache_size : (unsigned) mp->cache_size,
+ external_cache_size : (unsigned int) mp->cache_size,
cores, n_get_bulk, n_put_bulk, n_keep, use_constant_values);
if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) {
@@ -315,9 +315,9 @@ do_one_mempool_test(struct rte_mempool *mp, unsigned int cores, int external_cac
unsigned int bulk_tab_put[] = { 1, 4, CACHE_LINE_BURST, 32, 64, 128, 256,
RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
unsigned int keep_tab[] = { 32, 128, 512, 2048, 8192, 32768, 0 };
- unsigned *get_bulk_ptr;
- unsigned *put_bulk_ptr;
- unsigned *keep_ptr;
+ unsigned int *get_bulk_ptr;
+ unsigned int *put_bulk_ptr;
+ unsigned int *keep_ptr;
int ret;
for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
--
2.43.0
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v3 2/4] mempool perf test: test default mempool with cache
2025-04-01 15:02 ` [PATCH v3 0/4] " Morten Brørup
2025-04-01 15:02 ` [PATCH v3 1/4] mempool perf test: replace bare unsigned with unsigned int Morten Brørup
@ 2025-04-01 15:02 ` Morten Brørup
2025-04-01 15:02 ` [PATCH v3 3/4] mempool perf test: improve output readability Morten Brørup
2025-04-01 15:02 ` [PATCH v3 4/4] mempool perf test: test random bulk sizes Morten Brørup
3 siblings, 0 replies; 22+ messages in thread
From: Morten Brørup @ 2025-04-01 15:02 UTC (permalink / raw)
To: Andrew Rybchenko, Bruce Richardson, dev; +Cc: Morten Brørup
Added test for the "default" mempool with cache.
Skip the tests for the "default" mempool, if it happens to use the same
driver (i.e. operations) as already tested.
Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
---
app/test/test_mempool_perf.c | 84 +++++++++++++++++++++++++-----------
1 file changed, 59 insertions(+), 25 deletions(-)
diff --git a/app/test/test_mempool_perf.c b/app/test/test_mempool_perf.c
index d4271a5ef9..3594d81888 100644
--- a/app/test/test_mempool_perf.c
+++ b/app/test/test_mempool_perf.c
@@ -354,7 +354,10 @@ do_all_mempool_perf_tests(unsigned int cores)
{
struct rte_mempool *mp_cache = NULL;
struct rte_mempool *mp_nocache = NULL;
- struct rte_mempool *default_pool = NULL;
+ struct rte_mempool *default_pool_cache = NULL;
+ struct rte_mempool *default_pool_nocache = NULL;
+ const char *mp_cache_ops;
+ const char *mp_nocache_ops;
const char *default_pool_ops;
int ret = -1;
@@ -368,6 +371,7 @@ do_all_mempool_perf_tests(unsigned int cores)
printf("cannot allocate mempool (without cache)\n");
goto err;
}
+ mp_nocache_ops = rte_mempool_get_ops(mp_nocache->ops_index)->name;
/* create a mempool (with cache) */
mp_cache = rte_mempool_create("perf_test_cache", MEMPOOL_SIZE,
@@ -380,47 +384,76 @@ do_all_mempool_perf_tests(unsigned int cores)
printf("cannot allocate mempool (with cache)\n");
goto err;
}
+ mp_cache_ops = rte_mempool_get_ops(mp_cache->ops_index)->name;
default_pool_ops = rte_mbuf_best_mempool_ops();
- /* Create a mempool based on Default handler */
- default_pool = rte_mempool_create_empty("default_pool",
- MEMPOOL_SIZE,
- MEMPOOL_ELT_SIZE,
- 0, 0,
- SOCKET_ID_ANY, 0);
-
- if (default_pool == NULL) {
- printf("cannot allocate %s mempool\n", default_pool_ops);
+
+ /* Create a mempool (without cache) based on Default handler */
+ default_pool_nocache = rte_mempool_create_empty("default_pool_nocache",
+ MEMPOOL_SIZE,
+ MEMPOOL_ELT_SIZE,
+ 0, 0,
+ SOCKET_ID_ANY, 0);
+ if (default_pool_nocache == NULL) {
+ printf("cannot allocate %s mempool (without cache)\n", default_pool_ops);
goto err;
}
-
- if (rte_mempool_set_ops_byname(default_pool, default_pool_ops, NULL)
- < 0) {
+ if (rte_mempool_set_ops_byname(default_pool_nocache, default_pool_ops, NULL) < 0) {
printf("cannot set %s handler\n", default_pool_ops);
goto err;
}
-
- if (rte_mempool_populate_default(default_pool) < 0) {
+ if (rte_mempool_populate_default(default_pool_nocache) < 0) {
printf("cannot populate %s mempool\n", default_pool_ops);
goto err;
}
+ rte_mempool_obj_iter(default_pool_nocache, my_obj_init, NULL);
+
+ /* Create a mempool (with cache) based on Default handler */
+ default_pool_cache = rte_mempool_create_empty("default_pool_cache",
+ MEMPOOL_SIZE,
+ MEMPOOL_ELT_SIZE,
+ RTE_MEMPOOL_CACHE_MAX_SIZE, 0,
+ SOCKET_ID_ANY, 0);
+ if (default_pool_cache == NULL) {
+ printf("cannot allocate %s mempool (with cache)\n", default_pool_ops);
+ goto err;
+ }
+ if (rte_mempool_set_ops_byname(default_pool_cache, default_pool_ops, NULL) < 0) {
+ printf("cannot set %s handler\n", default_pool_ops);
+ goto err;
+ }
+ if (rte_mempool_populate_default(default_pool_cache) < 0) {
+ printf("cannot populate %s mempool\n", default_pool_ops);
+ goto err;
+ }
+ rte_mempool_obj_iter(default_pool_cache, my_obj_init, NULL);
- rte_mempool_obj_iter(default_pool, my_obj_init, NULL);
-
- printf("start performance test (without cache)\n");
+ printf("start performance test (using %s, without cache)\n",
+ mp_nocache_ops);
if (do_one_mempool_test(mp_nocache, cores, 0) < 0)
goto err;
- printf("start performance test for %s (without cache)\n",
- default_pool_ops);
- if (do_one_mempool_test(default_pool, cores, 0) < 0)
- goto err;
+ if (strcmp(default_pool_ops, mp_nocache_ops) != 0) {
+ printf("start performance test for %s (without cache)\n",
+ default_pool_ops);
+ if (do_one_mempool_test(default_pool_nocache, cores, 0) < 0)
+ goto err;
+ }
- printf("start performance test (with cache)\n");
+ printf("start performance test (using %s, with cache)\n",
+ mp_cache_ops);
if (do_one_mempool_test(mp_cache, cores, 0) < 0)
goto err;
- printf("start performance test (with user-owned cache)\n");
+ if (strcmp(default_pool_ops, mp_cache_ops) != 0) {
+ printf("start performance test for %s (with cache)\n",
+ default_pool_ops);
+ if (do_one_mempool_test(default_pool_cache, cores, 0) < 0)
+ goto err;
+ }
+
+ printf("start performance test (using %s, with user-owned cache)\n",
+ mp_nocache_ops);
if (do_one_mempool_test(mp_nocache, cores, 1) < 0)
goto err;
@@ -431,7 +464,8 @@ do_all_mempool_perf_tests(unsigned int cores)
err:
rte_mempool_free(mp_cache);
rte_mempool_free(mp_nocache);
- rte_mempool_free(default_pool);
+ rte_mempool_free(default_pool_cache);
+ rte_mempool_free(default_pool_nocache);
return ret;
}
--
2.43.0
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v3 3/4] mempool perf test: improve output readability
2025-04-01 15:02 ` [PATCH v3 0/4] " Morten Brørup
2025-04-01 15:02 ` [PATCH v3 1/4] mempool perf test: replace bare unsigned with unsigned int Morten Brørup
2025-04-01 15:02 ` [PATCH v3 2/4] mempool perf test: test default mempool with cache Morten Brørup
@ 2025-04-01 15:02 ` Morten Brørup
2025-04-01 15:02 ` [PATCH v3 4/4] mempool perf test: test random bulk sizes Morten Brørup
3 siblings, 0 replies; 22+ messages in thread
From: Morten Brørup @ 2025-04-01 15:02 UTC (permalink / raw)
To: Andrew Rybchenko, Bruce Richardson, dev; +Cc: Morten Brørup
Improved the output parameter ordering and formatting for readability.
Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
---
app/test/test_mempool_perf.c | 20 +++++++++++---------
1 file changed, 11 insertions(+), 9 deletions(-)
diff --git a/app/test/test_mempool_perf.c b/app/test/test_mempool_perf.c
index 3594d81888..c252f1968a 100644
--- a/app/test/test_mempool_perf.c
+++ b/app/test/test_mempool_perf.c
@@ -257,11 +257,13 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
/* reset stats */
memset(stats, 0, sizeof(stats));
- printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u "
- "n_put_bulk=%u n_keep=%u constant_n=%u ",
- use_external_cache ?
- external_cache_size : (unsigned int) mp->cache_size,
- cores, n_get_bulk, n_put_bulk, n_keep, use_constant_values);
+ printf("mempool_autotest cache=%u cores=%u n_keep=%5u ",
+ use_external_cache ? external_cache_size : (unsigned int) mp->cache_size,
+ cores,
+ n_keep);
+ printf("n_get_bulk=%3u n_put_bulk=%3u constant_n=%u ",
+ n_get_bulk, n_put_bulk,
+ use_constant_values);
if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) {
printf("mempool is not full\n");
@@ -301,7 +303,7 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
rate += (double)stats[lcore_id].enq_count * hz /
(double)stats[lcore_id].duration_cycles;
- printf("rate_persec=%" PRIu64 "\n", rate);
+ printf("rate_persec=%10" PRIu64 "\n", rate);
return 0;
}
@@ -320,9 +322,9 @@ do_one_mempool_test(struct rte_mempool *mp, unsigned int cores, int external_cac
unsigned int *keep_ptr;
int ret;
- for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
- for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) {
- for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
+ for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
+ for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
+ for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) {
if (*keep_ptr < *get_bulk_ptr || *keep_ptr < *put_bulk_ptr)
continue;
--
2.43.0
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v3 4/4] mempool perf test: test random bulk sizes
2025-04-01 15:02 ` [PATCH v3 0/4] " Morten Brørup
` (2 preceding siblings ...)
2025-04-01 15:02 ` [PATCH v3 3/4] mempool perf test: improve output readability Morten Brørup
@ 2025-04-01 15:02 ` Morten Brørup
3 siblings, 0 replies; 22+ messages in thread
From: Morten Brørup @ 2025-04-01 15:02 UTC (permalink / raw)
To: Andrew Rybchenko, Bruce Richardson, dev; +Cc: Morten Brørup
Bulk requests to get or put objects in a mempool often vary in size.
A series of tests with pseudo random request sizes, to mitigate the
benefits of the CPU's dynamic branch predictor, was added.
Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Andrew Rybchenko <andrew.rybchenko@oktetlabs.ru>
---
app/test/test_mempool_perf.c | 102 ++++++++++++++++++++++++++++++++---
1 file changed, 95 insertions(+), 7 deletions(-)
diff --git a/app/test/test_mempool_perf.c b/app/test/test_mempool_perf.c
index c252f1968a..40ecaa5049 100644
--- a/app/test/test_mempool_perf.c
+++ b/app/test/test_mempool_perf.c
@@ -33,6 +33,13 @@
* Mempool performance
* =======
*
+ * Each core get *n_keep* objects per bulk of a pseudorandom number
+ * between 1 and *n_max_bulk*.
+ * Objects are put back in the pool per bulk of a similar pseudorandom number.
+ * Note: The very low entropy of the randomization algorithm is harmless, because
+ * the sole purpose of randomization is to prevent the CPU's dynamic branch
+ * predictor from enhancing the test results.
+ *
* Each core get *n_keep* objects per bulk of *n_get_bulk*. Then,
* objects are put back in the pool per bulk of *n_put_bulk*.
*
@@ -52,7 +59,12 @@
* - Two cores with user-owned cache
* - Max. cores with user-owned cache
*
- * - Bulk size (*n_get_bulk*, *n_put_bulk*)
+ * - Pseudorandom max bulk size (*n_max_bulk*)
+ *
+ * - Max bulk from CACHE_LINE_BURST to 256, and RTE_MEMPOOL_CACHE_MAX_SIZE,
+ * where CACHE_LINE_BURST is the number of pointers fitting into one CPU cache line.
+ *
+ * - Fixed bulk size (*n_get_bulk*, *n_put_bulk*)
*
* - Bulk get from 1 to 256, and RTE_MEMPOOL_CACHE_MAX_SIZE
* - Bulk put from 1 to 256, and RTE_MEMPOOL_CACHE_MAX_SIZE
@@ -93,6 +105,9 @@ static unsigned int external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE;
static RTE_ATOMIC(uint32_t) synchro;
+/* max random number of objects in one bulk operation (get and put) */
+static unsigned int n_max_bulk;
+
/* number of objects in one bulk operation (get or put) */
static unsigned int n_get_bulk;
static unsigned int n_put_bulk;
@@ -159,6 +174,50 @@ test_loop(struct rte_mempool *mp, struct rte_mempool_cache *cache,
return 0;
}
+static __rte_always_inline int
+test_loop_random(struct rte_mempool *mp, struct rte_mempool_cache *cache,
+ unsigned int x_keep, unsigned int x_max_bulk)
+{
+ alignas(RTE_CACHE_LINE_SIZE) void *obj_table[MAX_KEEP];
+ unsigned int idx;
+ unsigned int i;
+ unsigned int r = 0;
+ unsigned int x_bulk;
+ int ret;
+
+ for (i = 0; likely(i < (N / x_keep)); i++) {
+ /* get x_keep objects by bulk of random [1 .. x_max_bulk] */
+ for (idx = 0; idx < x_keep; idx += x_bulk, r++) {
+ /* Generate a pseudorandom number [1 .. x_max_bulk]. */
+ x_bulk = ((r ^ (r >> 2) ^ (r << 3)) & (x_max_bulk - 1)) + 1;
+ if (unlikely(idx + x_bulk > x_keep))
+ x_bulk = x_keep - idx;
+ ret = rte_mempool_generic_get(mp,
+ &obj_table[idx],
+ x_bulk,
+ cache);
+ if (unlikely(ret < 0)) {
+ rte_mempool_dump(stdout, mp);
+ return ret;
+ }
+ }
+
+ /* put the objects back by bulk of random [1 .. x_max_bulk] */
+ for (idx = 0; idx < x_keep; idx += x_bulk, r++) {
+ /* Generate a pseudorandom number [1 .. x_max_bulk]. */
+ x_bulk = ((r ^ (r >> 2) ^ (r << 3)) & (x_max_bulk - 1)) + 1;
+ if (unlikely(idx + x_bulk > x_keep))
+ x_bulk = x_keep - idx;
+ rte_mempool_generic_put(mp,
+ &obj_table[idx],
+ x_bulk,
+ cache);
+ }
+ }
+
+ return 0;
+}
+
static int
per_lcore_mempool_test(void *arg)
{
@@ -181,9 +240,9 @@ per_lcore_mempool_test(void *arg)
}
/* n_get_bulk and n_put_bulk must be divisors of n_keep */
- if (((n_keep / n_get_bulk) * n_get_bulk) != n_keep)
+ if (n_max_bulk == 0 && (((n_keep / n_get_bulk) * n_get_bulk) != n_keep))
GOTO_ERR(ret, out);
- if (((n_keep / n_put_bulk) * n_put_bulk) != n_keep)
+ if (n_max_bulk == 0 && (((n_keep / n_put_bulk) * n_put_bulk) != n_keep))
GOTO_ERR(ret, out);
/* for constant n, n_get_bulk and n_put_bulk must be the same */
if (use_constant_values && n_put_bulk != n_get_bulk)
@@ -200,7 +259,9 @@ per_lcore_mempool_test(void *arg)
start_cycles = rte_get_timer_cycles();
while (time_diff/hz < TIME_S) {
- if (!use_constant_values)
+ if (n_max_bulk != 0)
+ ret = test_loop_random(mp, cache, n_keep, n_max_bulk);
+ else if (!use_constant_values)
ret = test_loop(mp, cache, n_keep, n_get_bulk, n_put_bulk);
else if (n_get_bulk == 1)
ret = test_loop(mp, cache, n_keep, 1, 1);
@@ -261,9 +322,13 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
use_external_cache ? external_cache_size : (unsigned int) mp->cache_size,
cores,
n_keep);
- printf("n_get_bulk=%3u n_put_bulk=%3u constant_n=%u ",
- n_get_bulk, n_put_bulk,
- use_constant_values);
+ if (n_max_bulk != 0)
+ printf("n_max_bulk=%3u ",
+ n_max_bulk);
+ else
+ printf("n_get_bulk=%3u n_put_bulk=%3u constant_n=%u ",
+ n_get_bulk, n_put_bulk,
+ use_constant_values);
if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) {
printf("mempool is not full\n");
@@ -312,16 +377,37 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
static int
do_one_mempool_test(struct rte_mempool *mp, unsigned int cores, int external_cache)
{
+ unsigned int bulk_tab_max[] = { CACHE_LINE_BURST, 32, 64, 128, 256,
+ RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
unsigned int bulk_tab_get[] = { 1, 4, CACHE_LINE_BURST, 32, 64, 128, 256,
RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
unsigned int bulk_tab_put[] = { 1, 4, CACHE_LINE_BURST, 32, 64, 128, 256,
RTE_MEMPOOL_CACHE_MAX_SIZE, 0 };
unsigned int keep_tab[] = { 32, 128, 512, 2048, 8192, 32768, 0 };
+ unsigned int *max_bulk_ptr;
unsigned int *get_bulk_ptr;
unsigned int *put_bulk_ptr;
unsigned int *keep_ptr;
int ret;
+ for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
+ for (max_bulk_ptr = bulk_tab_max; *max_bulk_ptr; max_bulk_ptr++) {
+
+ if (*keep_ptr < *max_bulk_ptr)
+ continue;
+
+ use_external_cache = external_cache;
+ use_constant_values = 0;
+ n_max_bulk = *max_bulk_ptr;
+ n_get_bulk = 0;
+ n_put_bulk = 0;
+ n_keep = *keep_ptr;
+ ret = launch_cores(mp, cores);
+ if (ret < 0)
+ return -1;
+ }
+ }
+
for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) {
for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) {
@@ -331,6 +417,7 @@ do_one_mempool_test(struct rte_mempool *mp, unsigned int cores, int external_cac
use_external_cache = external_cache;
use_constant_values = 0;
+ n_max_bulk = 0;
n_get_bulk = *get_bulk_ptr;
n_put_bulk = *put_bulk_ptr;
n_keep = *keep_ptr;
@@ -348,6 +435,7 @@ do_one_mempool_test(struct rte_mempool *mp, unsigned int cores, int external_cac
}
}
}
+
return 0;
}
--
2.43.0
^ permalink raw reply [flat|nested] 22+ messages in thread
end of thread, other threads:[~2025-04-01 15:02 UTC | newest]
Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-02-28 16:48 [PATCH] mempool perf test: test random bulk sizes Morten Brørup
2025-03-13 8:23 ` Morten Brørup
2025-03-25 7:15 ` Morten Brørup
2025-03-30 8:29 ` Andrew Rybchenko
2025-03-30 8:57 ` Morten Brørup
2025-03-31 10:03 ` [PATCH v2 0/4] " Morten Brørup
2025-03-31 10:03 ` [PATCH v2 1/4] mempool perf test: replace bare unsigned with unsigned int Morten Brørup
2025-03-31 10:03 ` [PATCH v2 2/4] mempool perf test: test default mempool with cache Morten Brørup
2025-03-31 10:03 ` [PATCH v2 3/4] mempool perf test: improve output readability Morten Brørup
2025-03-31 10:03 ` [PATCH v2 4/4] mempool perf test: test random bulk sizes Morten Brørup
2025-03-31 14:48 ` Andrew Rybchenko
2025-03-31 14:48 ` [PATCH v2 0/4] " Andrew Rybchenko
2025-04-01 15:00 ` Morten Brørup
2025-04-01 15:00 ` [PATCH v2 1/4] mempool perf test: replace bare unsigned with unsigned int Morten Brørup
2025-04-01 15:00 ` [PATCH v2 2/4] mempool perf test: test default mempool with cache Morten Brørup
2025-04-01 15:00 ` [PATCH v2 3/4] mempool perf test: improve output readability Morten Brørup
2025-04-01 15:00 ` [PATCH v2 4/4] mempool perf test: test random bulk sizes Morten Brørup
2025-04-01 15:02 ` [PATCH v3 0/4] " Morten Brørup
2025-04-01 15:02 ` [PATCH v3 1/4] mempool perf test: replace bare unsigned with unsigned int Morten Brørup
2025-04-01 15:02 ` [PATCH v3 2/4] mempool perf test: test default mempool with cache Morten Brørup
2025-04-01 15:02 ` [PATCH v3 3/4] mempool perf test: improve output readability Morten Brørup
2025-04-01 15:02 ` [PATCH v3 4/4] mempool perf test: test random bulk sizes Morten Brørup
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).