Re: [PATCH v1 2/6] app/test: add allocator performance benchmark

DPDK patches and discussions
 help / color / mirror / Atom feed

From: Aaron Conole <aconole@redhat.com>
To: Dmitry Kozlyuk <dkozlyuk@nvidia.com>
Cc: <dev@dpdk.org>,  Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Subject: Re: [PATCH v1 2/6] app/test: add allocator performance benchmark
Date: Mon, 17 Jan 2022 11:06:28 -0500	[thread overview]
Message-ID: <f7tczkq2vp7.fsf@redhat.com> (raw)
In-Reply-To: <20220117080801.481568-3-dkozlyuk@nvidia.com> (Dmitry Kozlyuk's message of "Mon, 17 Jan 2022 10:07:57 +0200")

Dmitry Kozlyuk <dkozlyuk@nvidia.com> writes:

> Memory allocator performance is crucial to applications that deal
> with large amount of memory or allocate frequently. DPDK allocator
> performance is affected by EAL options, API used and, at least,
> allocation size. New autotest is intended to be run with different
> EAL options. It measures performance with a range of sizes
> for dirrerent APIs: rte_malloc, rte_zmalloc, and rte_memzone_reserve.
>
> Work distribution between allocation and deallocation depends on EAL
> options. The test prints both times and total time to ease comparison.
>
> Memory can be filled with zeroes at different points of allocation path,
> but it always takes considerable fraction of overall timing. This is why
> the test measures filling speed and prints how long clearing takes
> for each size as a reference (for rte_memzone_reserve estimations
> are printed).
>
> Signed-off-by: Dmitry Kozlyuk <dkozlyuk@nvidia.com>
> Reviewed-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
> ---

Thanks for making the changes.

Acked-by: Aaron Conole <aconole@redhat.com>

>  app/test/meson.build        |   2 +
>  app/test/test_malloc_perf.c | 174 ++++++++++++++++++++++++++++++++++++
>  2 files changed, 176 insertions(+)
>  create mode 100644 app/test/test_malloc_perf.c
>
> diff --git a/app/test/meson.build b/app/test/meson.build
> index 344a609a4d..50cf2602a9 100644
> --- a/app/test/meson.build
> +++ b/app/test/meson.build
> @@ -88,6 +88,7 @@ test_sources = files(
>          'test_lpm6_perf.c',
>          'test_lpm_perf.c',
>          'test_malloc.c',
> +        'test_malloc_perf.c',
>          'test_mbuf.c',
>          'test_member.c',
>          'test_member_perf.c',
> @@ -295,6 +296,7 @@ extra_test_names = [
>  
>  perf_test_names = [
>          'ring_perf_autotest',
> +        'malloc_perf_autotest',
>          'mempool_perf_autotest',
>          'memcpy_perf_autotest',
>          'hash_perf_autotest',
> diff --git a/app/test/test_malloc_perf.c b/app/test/test_malloc_perf.c
> new file mode 100644
> index 0000000000..9686fc8af5
> --- /dev/null
> +++ b/app/test/test_malloc_perf.c
> @@ -0,0 +1,174 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright (c) 2021 NVIDIA Corporation & Affiliates
> + */
> +
> +#include <inttypes.h>
> +#include <string.h>
> +#include <rte_cycles.h>
> +#include <rte_errno.h>
> +#include <rte_malloc.h>
> +#include <rte_memzone.h>
> +
> +#include "test.h"
> +
> +#define TEST_LOG(level, ...) RTE_LOG(level, USER1, __VA_ARGS__)
> +
> +typedef void * (alloc_t)(const char *name, size_t size, unsigned int align);
> +typedef void (free_t)(void *addr);
> +typedef void * (memset_t)(void *addr, int value, size_t size);
> +
> +static const uint64_t KB = 1 << 10;
> +static const uint64_t GB = 1 << 30;
> +
> +static double
> +tsc_to_us(uint64_t tsc, size_t runs)
> +{
> +	return (double)tsc / rte_get_tsc_hz() * US_PER_S / runs;
> +}
> +
> +static int
> +test_memset_perf(double *us_per_gb)
> +{
> +	static const size_t RUNS = 20;
> +
> +	void *ptr;
> +	size_t i;
> +	uint64_t tsc;
> +
> +	TEST_LOG(INFO, "Reference: memset\n");
> +
> +	ptr = rte_malloc(NULL, GB, 0);
> +	if (ptr == NULL) {
> +		TEST_LOG(ERR, "rte_malloc(size=%"PRIx64") failed\n", GB);
> +		return -1;
> +	}
> +
> +	tsc = rte_rdtsc_precise();
> +	for (i = 0; i < RUNS; i++)
> +		memset(ptr, 0, GB);
> +	tsc = rte_rdtsc_precise() - tsc;
> +
> +	*us_per_gb = tsc_to_us(tsc, RUNS);
> +	TEST_LOG(INFO, "Result: %f.3 GiB/s <=> %.2f us/MiB\n",
> +			US_PER_S / *us_per_gb, *us_per_gb / KB);
> +
> +	rte_free(ptr);
> +	TEST_LOG(INFO, "\n");
> +	return 0;
> +}
> +
> +static int
> +test_alloc_perf(const char *name, alloc_t *alloc_fn, free_t *free_fn,
> +		memset_t *memset_fn, double memset_gb_us, size_t max_runs)
> +{
> +	static const size_t SIZES[] = {
> +			1 << 6, 1 << 7, 1 << 10, 1 << 12, 1 << 16, 1 << 20,
> +			1 << 21, 1 << 22, 1 << 24, 1 << 30 };
> +
> +	size_t i, j;
> +	void **ptrs;
> +
> +	TEST_LOG(INFO, "Performance: %s\n", name);
> +
> +	ptrs = calloc(max_runs, sizeof(ptrs[0]));
> +	if (ptrs == NULL) {
> +		TEST_LOG(ERR, "Cannot allocate memory for pointers");
> +		return -1;
> +	}
> +
> +	TEST_LOG(INFO, "%12s%8s%12s%12s%12s%17s\n", "Size (B)", "Runs",
> +			"Alloc (us)", "Free (us)", "Total (us)",
> +			memset_fn != NULL ? "memset (us)" : "est.memset (us)");
> +	for (i = 0; i < RTE_DIM(SIZES); i++) {
> +		size_t size = SIZES[i];
> +		size_t runs_done;
> +		uint64_t tsc_start, tsc_alloc, tsc_memset = 0, tsc_free;
> +		double alloc_time, free_time, memset_time;
> +
> +		tsc_start = rte_rdtsc_precise();
> +		for (j = 0; j < max_runs; j++) {
> +			ptrs[j] = alloc_fn(NULL, size, 0);
> +			if (ptrs[j] == NULL)
> +				break;
> +		}
> +		tsc_alloc = rte_rdtsc_precise() - tsc_start;
> +
> +		if (j == 0) {
> +			TEST_LOG(INFO, "%12zu Interrupted: out of memory.\n",
> +					size);
> +			break;
> +		}
> +		runs_done = j;
> +
> +		if (memset_fn != NULL) {
> +			tsc_start = rte_rdtsc_precise();
> +			for (j = 0; j < runs_done && ptrs[j] != NULL; j++)
> +				memset_fn(ptrs[j], 0, size);
> +			tsc_memset = rte_rdtsc_precise() - tsc_start;
> +		}
> +
> +		tsc_start = rte_rdtsc_precise();
> +		for (j = 0; j < runs_done && ptrs[j] != NULL; j++)
> +			free_fn(ptrs[j]);
> +		tsc_free = rte_rdtsc_precise() - tsc_start;
> +
> +		alloc_time = tsc_to_us(tsc_alloc, runs_done);
> +		free_time = tsc_to_us(tsc_free, runs_done);
> +		memset_time = memset_fn != NULL ?
> +				tsc_to_us(tsc_memset, runs_done) :
> +				memset_gb_us * size / GB;
> +		TEST_LOG(INFO, "%12zu%8zu%12.2f%12.2f%12.2f%17.2f\n",
> +				size, runs_done, alloc_time, free_time,
> +				alloc_time + free_time, memset_time);
> +
> +		memset(ptrs, 0, max_runs * sizeof(ptrs[0]));
> +	}
> +
> +	free(ptrs);
> +	TEST_LOG(INFO, "\n");
> +	return 0;
> +}
> +
> +static void *
> +memzone_alloc(const char *name __rte_unused, size_t size, unsigned int align)
> +{
> +	const struct rte_memzone *mz;
> +	char gen_name[RTE_MEMZONE_NAMESIZE];
> +
> +	snprintf(gen_name, sizeof(gen_name), "test-mz-%"PRIx64, rte_rdtsc());
> +	mz = rte_memzone_reserve_aligned(gen_name, size, SOCKET_ID_ANY,
> +			RTE_MEMZONE_1GB | RTE_MEMZONE_SIZE_HINT_ONLY, align);
> +	return (void *)(uintptr_t)mz;
> +}
> +
> +static void
> +memzone_free(void *addr)
> +{
> +	rte_memzone_free((struct rte_memzone *)addr);
> +}
> +
> +static int
> +test_malloc_perf(void)
> +{
> +	static const size_t MAX_RUNS = 10000;
> +
> +	double memset_us_gb;
> +
> +	if (test_memset_perf(&memset_us_gb) < 0)
> +		return -1;
> +
> +	if (test_alloc_perf("rte_malloc", rte_malloc, rte_free, memset,
> +			memset_us_gb, MAX_RUNS) < 0)
> +		return -1;
> +	if (test_alloc_perf("rte_zmalloc", rte_zmalloc, rte_free, memset,
> +			memset_us_gb, MAX_RUNS) < 0)
> +		return -1;
> +
> +	if (test_alloc_perf("rte_memzone_reserve", memzone_alloc, memzone_free,
> +			NULL, memset_us_gb, RTE_MAX_MEMZONE - 1) < 0)
> +		return -1;
> +
> +	return 0;
> +}
> +
> +REGISTER_TEST_COMMAND(malloc_perf_autotest, test_malloc_perf);

next prev parent reply	other threads:[~2022-01-17 16:06 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-30 14:37 [RFC PATCH 0/6] Fast restart with many hugepages Dmitry Kozlyuk
2021-12-30 14:37 ` [RFC PATCH 1/6] doc: add hugepage mapping details Dmitry Kozlyuk
2021-12-30 14:37 ` [RFC PATCH 2/6] mem: add dirty malloc element support Dmitry Kozlyuk
2021-12-30 14:37 ` [RFC PATCH 3/6] eal: refactor --huge-unlink storage Dmitry Kozlyuk
2021-12-30 14:37 ` [RFC PATCH 4/6] eal/linux: allow hugepage file reuse Dmitry Kozlyuk
2021-12-30 14:48 ` [RFC PATCH 5/6] eal: allow hugepage file reuse with --huge-unlink Dmitry Kozlyuk
2021-12-30 14:49 ` [RFC PATCH 6/6] app/test: add allocator performance benchmark Dmitry Kozlyuk
2022-01-17  8:07 ` [PATCH v1 0/6] Fast restart with many hugepages Dmitry Kozlyuk
2022-01-17  8:07   ` [PATCH v1 1/6] doc: add hugepage mapping details Dmitry Kozlyuk
2022-01-17  9:20     ` Thomas Monjalon
2022-01-17  8:07   ` [PATCH v1 2/6] app/test: add allocator performance benchmark Dmitry Kozlyuk
2022-01-17 15:47     ` Bruce Richardson
2022-01-17 15:51       ` Bruce Richardson
2022-01-19 21:12         ` Dmitry Kozlyuk
2022-01-20  9:04           ` Bruce Richardson
2022-01-17 16:06     ` Aaron Conole [this message]
2022-01-17  8:07   ` [PATCH v1 3/6] mem: add dirty malloc element support Dmitry Kozlyuk
2022-01-17 14:07     ` Thomas Monjalon
2022-01-17  8:07   ` [PATCH v1 4/6] eal: refactor --huge-unlink storage Dmitry Kozlyuk
2022-01-17 14:10     ` Thomas Monjalon
2022-01-17  8:14   ` [PATCH v1 5/6] eal/linux: allow hugepage file reuse Dmitry Kozlyuk
2022-01-17 14:24     ` Thomas Monjalon
2022-01-17  8:14   ` [PATCH v1 6/6] eal: extend --huge-unlink for " Dmitry Kozlyuk
2022-01-17 14:27     ` Thomas Monjalon
2022-01-17 16:40   ` [PATCH v1 0/6] Fast restart with many hugepages Bruce Richardson
2022-01-19 21:12     ` Dmitry Kozlyuk
2022-01-20  9:05       ` Bruce Richardson
2022-01-19 21:09   ` [PATCH v2 " Dmitry Kozlyuk
2022-01-19 21:09     ` [PATCH v2 1/6] doc: add hugepage mapping details Dmitry Kozlyuk
2022-01-27 13:59       ` Bruce Richardson
2022-01-19 21:09     ` [PATCH v2 2/6] app/test: add allocator performance benchmark Dmitry Kozlyuk
2022-01-19 21:09     ` [PATCH v2 3/6] mem: add dirty malloc element support Dmitry Kozlyuk
2022-01-19 21:09     ` [PATCH v2 4/6] eal: refactor --huge-unlink storage Dmitry Kozlyuk
2022-01-19 21:11     ` [PATCH v2 5/6] eal/linux: allow hugepage file reuse Dmitry Kozlyuk
2022-01-19 21:11       ` [PATCH v2 6/6] eal: extend --huge-unlink for " Dmitry Kozlyuk
2022-01-27 12:07     ` [PATCH v2 0/6] Fast restart with many hugepages Bruce Richardson
2022-02-02 14:12     ` Thomas Monjalon
2022-02-02 21:54     ` David Marchand
2022-02-03 10:26       ` David Marchand
2022-02-03 18:13     ` [PATCH v3 " Dmitry Kozlyuk
2022-02-03 18:13       ` [PATCH v3 1/6] doc: add hugepage mapping details Dmitry Kozlyuk
2022-02-08 15:28         ` Burakov, Anatoly
2022-02-03 18:13       ` [PATCH v3 2/6] app/test: add allocator performance benchmark Dmitry Kozlyuk
2022-02-08 16:20         ` Burakov, Anatoly
2022-02-03 18:13       ` [PATCH v3 3/6] mem: add dirty malloc element support Dmitry Kozlyuk
2022-02-08 16:36         ` Burakov, Anatoly
2022-02-03 18:13       ` [PATCH v3 4/6] eal: refactor --huge-unlink storage Dmitry Kozlyuk
2022-02-08 16:39         ` Burakov, Anatoly
2022-02-03 18:13       ` [PATCH v3 5/6] eal/linux: allow hugepage file reuse Dmitry Kozlyuk
2022-02-08 17:05         ` Burakov, Anatoly
2022-02-03 18:13       ` [PATCH v3 6/6] eal: extend --huge-unlink for " Dmitry Kozlyuk
2022-02-08 17:14         ` Burakov, Anatoly
2022-02-08 20:40       ` [PATCH v3 0/6] Fast restart with many hugepages David Marchand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f7tczkq2vp7.fsf@redhat.com \
    --to=aconole@redhat.com \
    --cc=dev@dpdk.org \
    --cc=dkozlyuk@nvidia.com \
    --cc=viacheslavo@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).