From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <dev-bounces@dpdk.org>
Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124])
	by inbox.dpdk.org (Postfix) with ESMTP id E1089439AE;
	Wed, 24 Jan 2024 03:41:45 +0100 (CET)
Received: from mails.dpdk.org (localhost [127.0.0.1])
	by mails.dpdk.org (Postfix) with ESMTP id 60810402A8;
	Wed, 24 Jan 2024 03:41:45 +0100 (CET)
Received: from szxga02-in.huawei.com (szxga02-in.huawei.com [45.249.212.188])
 by mails.dpdk.org (Postfix) with ESMTP id 06F48402A6
 for <dev@dpdk.org>; Wed, 24 Jan 2024 03:41:42 +0100 (CET)
Received: from mail.maildlp.com (unknown [172.19.88.105])
 by szxga02-in.huawei.com (SkyGuard) with ESMTP id 4TKSqd2R5CzXgkl;
 Wed, 24 Jan 2024 10:40:29 +0800 (CST)
Received: from dggpeml500024.china.huawei.com (unknown [7.185.36.10])
 by mail.maildlp.com (Postfix) with ESMTPS id 27CF8140153;
 Wed, 24 Jan 2024 10:41:41 +0800 (CST)
Received: from [10.67.121.161] (10.67.121.161) by
 dggpeml500024.china.huawei.com (7.185.36.10) with Microsoft SMTP Server
 (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id
 15.1.2507.35; Wed, 24 Jan 2024 10:41:40 +0800
Subject: Re: [PATCH v2] mempool: test performance with larger bursts
To: =?UTF-8?Q?Morten_Br=c3=b8rup?= <mb@smartsharesystems.com>,
 <andrew.rybchenko@oktetlabs.ru>
CC: <dev@dpdk.org>
References: <20240121045249.22465-1-mb@smartsharesystems.com>
 <20240122143445.45276-1-mb@smartsharesystems.com>
From: fengchengwen <fengchengwen@huawei.com>
Message-ID: <6f36e290-1cf5-144c-3886-6016907b7b25@huawei.com>
Date: Wed, 24 Jan 2024 10:41:40 +0800
User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101
 Thunderbird/68.11.0
MIME-Version: 1.0
In-Reply-To: <20240122143445.45276-1-mb@smartsharesystems.com>
Content-Type: text/plain; charset="utf-8"
Content-Language: en-US
Content-Transfer-Encoding: 8bit
X-Originating-IP: [10.67.121.161]
X-ClientProxiedBy: dggems702-chm.china.huawei.com (10.3.19.179) To
 dggpeml500024.china.huawei.com (7.185.36.10)
X-BeenThere: dev@dpdk.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <https://mails.dpdk.org/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://mails.dpdk.org/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <https://mails.dpdk.org/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
Errors-To: dev-bounces@dpdk.org

Hi Morten,

On 2024/1/22 22:34, Morten Brørup wrote:
> Bursts of up to 64 or 128 packets are not uncommon, so increase the
> maximum tested get and put burst sizes from 32 to 128.
> 
> Some applications keep more than 512 objects, so increase the maximum
> number of kept objects from 512 to 8192, still in jumps of factor four.
> This exceeds the typical mempool cache size of 512 objects, so the test
> also exercises the mempool driver.
> 
> Signed-off-by: Morten Brørup <mb@smartsharesystems.com>
> 
> ---
> 
> v2: Addressed feedback by Chengwen Feng
> * Added get and put burst sizes of 64 packets, which is probably also not
>   uncommon.
> * Fixed list of number of kept objects so list remains in jumps of factor
>   four.
> * Added three derivative test cases, for faster testing.
> ---
>  app/test/test_mempool_perf.c | 107 ++++++++++++++++++++---------------
>  1 file changed, 62 insertions(+), 45 deletions(-)
> 
> diff --git a/app/test/test_mempool_perf.c b/app/test/test_mempool_perf.c
> index 96de347f04..a5a7d43608 100644
> --- a/app/test/test_mempool_perf.c
> +++ b/app/test/test_mempool_perf.c
> @@ -1,6 +1,6 @@
>  /* SPDX-License-Identifier: BSD-3-Clause
>   * Copyright(c) 2010-2014 Intel Corporation
> - * Copyright(c) 2022 SmartShare Systems
> + * Copyright(c) 2022-2024 SmartShare Systems
>   */
>  
>  #include <string.h>
> @@ -54,22 +54,24 @@
>   *
>   *    - Bulk size (*n_get_bulk*, *n_put_bulk*)
>   *
> - *      - Bulk get from 1 to 32
> - *      - Bulk put from 1 to 32
> - *      - Bulk get and put from 1 to 32, compile time constant
> + *      - Bulk get from 1 to 128
> + *      - Bulk put from 1 to 128
> + *      - Bulk get and put from 1 to 128, compile time constant
>   *
>   *    - Number of kept objects (*n_keep*)
>   *
>   *      - 32
>   *      - 128
>   *      - 512
> + *      - 2048
> + *      - 8192
>   */
>  
>  #define N 65536
>  #define TIME_S 5
>  #define MEMPOOL_ELT_SIZE 2048
> -#define MAX_KEEP 512
> -#define MEMPOOL_SIZE ((rte_lcore_count()*(MAX_KEEP+RTE_MEMPOOL_CACHE_MAX_SIZE))-1)
> +#define MAX_KEEP 8192
> +#define MEMPOOL_SIZE ((rte_lcore_count()*(MAX_KEEP+RTE_MEMPOOL_CACHE_MAX_SIZE*2))-1)
>  
>  /* Number of pointers fitting into one cache line. */
>  #define CACHE_LINE_BURST (RTE_CACHE_LINE_SIZE / sizeof(uintptr_t))
> @@ -204,6 +206,10 @@ per_lcore_mempool_test(void *arg)
>  					CACHE_LINE_BURST, CACHE_LINE_BURST);
>  		else if (n_get_bulk == 32)
>  			ret = test_loop(mp, cache, n_keep, 32, 32);
> +		else if (n_get_bulk == 64)
> +			ret = test_loop(mp, cache, n_keep, 64, 64);
> +		else if (n_get_bulk == 128)
> +			ret = test_loop(mp, cache, n_keep, 128, 128);
>  		else
>  			ret = -1;
>  
> @@ -289,9 +295,9 @@ launch_cores(struct rte_mempool *mp, unsigned int cores)
>  static int
>  do_one_mempool_test(struct rte_mempool *mp, unsigned int cores)
>  {
> -	unsigned int bulk_tab_get[] = { 1, 4, CACHE_LINE_BURST, 32, 0 };
> -	unsigned int bulk_tab_put[] = { 1, 4, CACHE_LINE_BURST, 32, 0 };
> -	unsigned int keep_tab[] = { 32, 128, 512, 0 };
> +	unsigned int bulk_tab_get[] = { 1, 4, CACHE_LINE_BURST, 32, 64, 128, 0 };
> +	unsigned int bulk_tab_put[] = { 1, 4, CACHE_LINE_BURST, 32, 64, 128, 0 };
> +	unsigned int keep_tab[] = { 32, 128, 512, 2048, 8192, 0 };
>  	unsigned *get_bulk_ptr;
>  	unsigned *put_bulk_ptr;
>  	unsigned *keep_ptr;
> @@ -301,6 +307,9 @@ do_one_mempool_test(struct rte_mempool *mp, unsigned int cores)
>  		for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) {
>  			for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) {
>  
> +				if (*keep_ptr < *get_bulk_ptr || *keep_ptr < *put_bulk_ptr)
> +					continue;
> +
>  				use_constant_values = 0;
>  				n_get_bulk = *get_bulk_ptr;
>  				n_put_bulk = *put_bulk_ptr;
> @@ -323,7 +332,7 @@ do_one_mempool_test(struct rte_mempool *mp, unsigned int cores)
>  }
>  
>  static int
> -test_mempool_perf(void)
> +do_all_mempool_perf_tests(unsigned int cores)
>  {
>  	struct rte_mempool *mp_cache = NULL;
>  	struct rte_mempool *mp_nocache = NULL;
> @@ -376,65 +385,73 @@ test_mempool_perf(void)
>  
>  	rte_mempool_obj_iter(default_pool, my_obj_init, NULL);
>  
> -	/* performance test with 1, 2 and max cores */
>  	printf("start performance test (without cache)\n");
> -
> -	if (do_one_mempool_test(mp_nocache, 1) < 0)
> +	if (do_one_mempool_test(mp_nocache, cores) < 0)
>  		goto err;
>  
> -	if (do_one_mempool_test(mp_nocache, 2) < 0)
> -		goto err;
> -
> -	if (do_one_mempool_test(mp_nocache, rte_lcore_count()) < 0)
> -		goto err;
> -
> -	/* performance test with 1, 2 and max cores */
>  	printf("start performance test for %s (without cache)\n",
>  	       default_pool_ops);
> -
> -	if (do_one_mempool_test(default_pool, 1) < 0)
> +	if (do_one_mempool_test(default_pool, cores) < 0)
>  		goto err;
>  
> -	if (do_one_mempool_test(default_pool, 2) < 0)
> +	printf("start performance test (with cache)\n");
> +	if (do_one_mempool_test(mp_cache, cores) < 0)
>  		goto err;
>  
> -	if (do_one_mempool_test(default_pool, rte_lcore_count()) < 0)
> +	printf("start performance test (with user-owned cache)\n");
> +	use_external_cache = 1;

This variable should set to zero after next test, because we may repeat execute command again.
I think the original code already has this bug, suggest add a bugfix first and then with this commit.

> +	if (do_one_mempool_test(mp_nocache, cores) < 0)
>  		goto err;
>  
> -	/* performance test with 1, 2 and max cores */
> -	printf("start performance test (with cache)\n");
> +	rte_mempool_list_dump(stdout);
>  
> -	if (do_one_mempool_test(mp_cache, 1) < 0)
> -		goto err;
> +	ret = 0;
>  
> -	if (do_one_mempool_test(mp_cache, 2) < 0)
> -		goto err;
> +err:
> +	rte_mempool_free(mp_cache);
> +	rte_mempool_free(mp_nocache);
> +	rte_mempool_free(default_pool);
> +	return ret;
> +}
>  
> -	if (do_one_mempool_test(mp_cache, rte_lcore_count()) < 0)
> -		goto err;
> +static int
> +test_mempool_perf_1core(void)
> +{
> +	return do_all_mempool_perf_tests(1);
> +}
>  
> -	/* performance test with 1, 2 and max cores */
> -	printf("start performance test (with user-owned cache)\n");
> -	use_external_cache = 1;
> +static int
> +test_mempool_perf_2cores(void)
> +{
> +	return do_all_mempool_perf_tests(2);
> +}
>  
> -	if (do_one_mempool_test(mp_nocache, 1) < 0)
> -		goto err;
> +static int
> +test_mempool_perf_allcores(void)
> +{
> +	return do_all_mempool_perf_tests(rte_lcore_count());
> +}
>  
> -	if (do_one_mempool_test(mp_nocache, 2) < 0)
> -		goto err;
> +static int
> +test_mempool_perf(void)
> +{
> +	int ret = -1;
>  
> -	if (do_one_mempool_test(mp_nocache, rte_lcore_count()) < 0)
> +	/* performance test with 1, 2 and max cores */
> +	if (do_all_mempool_perf_tests(1) < 0)
> +		goto err;
> +	if (do_all_mempool_perf_tests(2) < 0)
> +		goto err;
> +	if (do_all_mempool_perf_tests(rte_lcore_count()) < 0)
>  		goto err;
> -
> -	rte_mempool_list_dump(stdout);
>  
>  	ret = 0;
>  
>  err:
> -	rte_mempool_free(mp_cache);
> -	rte_mempool_free(mp_nocache);
> -	rte_mempool_free(default_pool);
>  	return ret;
>  }
>  
>  REGISTER_PERF_TEST(mempool_perf_autotest, test_mempool_perf);
> +REGISTER_PERF_TEST(mempool_perf_autotest_1core, test_mempool_perf_1core);
> +REGISTER_PERF_TEST(mempool_perf_autotest_2cores, test_mempool_perf_2cores);
> +REGISTER_PERF_TEST(mempool_perf_autotest_allcores, test_mempool_perf_allcores);

I'm OK for derivative tests by core-number.

With above bug fixed,
Acked-by: Chengwen Feng <fengchengwen@huawei.com>

Thanks

>