From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id A108AA0563; Mon, 23 Mar 2020 05:16:34 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id F3BD11BF71; Mon, 23 Mar 2020 05:16:33 +0100 (CET) Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by dpdk.org (Postfix) with ESMTP id 806931BF6D for ; Mon, 23 Mar 2020 05:16:32 +0100 (CET) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id B4B351FB; Sun, 22 Mar 2020 21:16:31 -0700 (PDT) Received: from phil-VirtualBox.arm.com (unknown [10.170.243.36]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id 26BCF3F52E; Sun, 22 Mar 2020 21:16:28 -0700 (PDT) From: Phil Yang To: aconole@redhat.com, maicolgabriel@hotmail.com, dev@dpdk.org Cc: david.marchand@redhat.com, drc@linux.vnet.ibm.com, gavin.hu@arm.com, Honnappa.Nagarahalli@arm.com, ruifeng.wang@arm.com, nd@arm.com Date: Mon, 23 Mar 2020 12:16:17 +0800 Message-Id: <1584936978-11899-1-git-send-email-phil.yang@arm.com> X-Mailer: git-send-email 2.7.4 Subject: [dpdk-dev] [PATCH 1/2] test/mcslock: move performance test to perf tests X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" The MCS lock performance test takes more than 10 seconds and leads to meson test timeout on some platforms. Move the performance test into perf tests. Signed-off-by: Phil Yang Reviewed-by: Gavin Hu --- MAINTAINERS | 1 + app/test/Makefile | 1 + app/test/autotest_data.py | 6 +++ app/test/meson.build | 2 + app/test/test_mcslock.c | 88 ------------------------------- app/test/test_mcslock_perf.c | 121 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 131 insertions(+), 88 deletions(-) create mode 100644 app/test/test_mcslock_perf.c diff --git a/MAINTAINERS b/MAINTAINERS index db235c2..411bdeb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -247,6 +247,7 @@ MCSlock - EXPERIMENTAL M: Phil Yang F: lib/librte_eal/common/include/generic/rte_mcslock.h F: app/test/test_mcslock.c +F: app/test/test_mcslock_perf.c Ticketlock M: Joyce Kong diff --git a/app/test/Makefile b/app/test/Makefile index 1f080d1..97de3ac 100644 --- a/app/test/Makefile +++ b/app/test/Makefile @@ -65,6 +65,7 @@ SRCS-y += test_barrier.c SRCS-y += test_malloc.c SRCS-y += test_cycles.c SRCS-y += test_mcslock.c +SRCS-y += test_mcslock_perf.c SRCS-y += test_spinlock.c SRCS-y += test_ticketlock.c SRCS-y += test_memory.c diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py index 7b1d013..2a4619d 100644 --- a/app/test/autotest_data.py +++ b/app/test/autotest_data.py @@ -784,6 +784,12 @@ "Func": default_autotest, "Report": None, }, + { + "Name": "MCS Lock performance autotest", + "Command": "mcslock_perf_autotest", + "Func": default_autotest, + "Report": None, + }, # # Please always make sure that ring_perf is the last test! # diff --git a/app/test/meson.build b/app/test/meson.build index 0a2ce71..335a869 100644 --- a/app/test/meson.build +++ b/app/test/meson.build @@ -82,6 +82,7 @@ test_sources = files('commands.c', 'test_meter.c', 'test_metrics.c', 'test_mcslock.c', + 'test_mcslock_perf.c', 'test_mp_secondary.c', 'test_per_lcore.c', 'test_pmd_perf.c', @@ -270,6 +271,7 @@ perf_test_names = [ 'rand_perf_autotest', 'hash_readwrite_perf_autotest', 'hash_readwrite_lf_perf_autotest', + 'mcslock_perf_autotest', ] driver_test_names = [ diff --git a/app/test/test_mcslock.c b/app/test/test_mcslock.c index e9359df..15f9751 100644 --- a/app/test/test_mcslock.c +++ b/app/test/test_mcslock.c @@ -32,23 +32,16 @@ * * - The function takes the global lock, display something, then releases * the global lock on each core. - * - * - A load test is carried out, with all cores attempting to lock a single - * lock multiple times. */ RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_me); RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_try_me); -RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me); rte_mcslock_t *p_ml; rte_mcslock_t *p_ml_try; -rte_mcslock_t *p_ml_perf; static unsigned int count; -static rte_atomic32_t synchro; - static int test_mcslock_per_core(__attribute__((unused)) void *arg) { @@ -63,85 +56,8 @@ test_mcslock_per_core(__attribute__((unused)) void *arg) return 0; } -static uint64_t time_count[RTE_MAX_LCORE] = {0}; - #define MAX_LOOP 1000000 -static int -load_loop_fn(void *func_param) -{ - uint64_t time_diff = 0, begin; - uint64_t hz = rte_get_timer_hz(); - volatile uint64_t lcount = 0; - const int use_lock = *(int *)func_param; - const unsigned int lcore = rte_lcore_id(); - - /**< Per core me node. */ - rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me); - - /* wait synchro */ - while (rte_atomic32_read(&synchro) == 0) - ; - - begin = rte_get_timer_cycles(); - while (lcount < MAX_LOOP) { - if (use_lock) - rte_mcslock_lock(&p_ml_perf, &ml_perf_me); - - lcount++; - if (use_lock) - rte_mcslock_unlock(&p_ml_perf, &ml_perf_me); - } - time_diff = rte_get_timer_cycles() - begin; - time_count[lcore] = time_diff * 1000000 / hz; - return 0; -} - -static int -test_mcslock_perf(void) -{ - unsigned int i; - uint64_t total = 0; - int lock = 0; - const unsigned int lcore = rte_lcore_id(); - - printf("\nTest with no lock on single core...\n"); - rte_atomic32_set(&synchro, 1); - load_loop_fn(&lock); - printf("Core [%u] Cost Time = %"PRIu64" us\n", - lcore, time_count[lcore]); - memset(time_count, 0, sizeof(time_count)); - - printf("\nTest with lock on single core...\n"); - lock = 1; - rte_atomic32_set(&synchro, 1); - load_loop_fn(&lock); - printf("Core [%u] Cost Time = %"PRIu64" us\n", - lcore, time_count[lcore]); - memset(time_count, 0, sizeof(time_count)); - - printf("\nTest with lock on %u cores...\n", (rte_lcore_count())); - - rte_atomic32_set(&synchro, 0); - rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER); - - /* start synchro and launch test on master */ - rte_atomic32_set(&synchro, 1); - load_loop_fn(&lock); - - rte_eal_mp_wait_lcore(); - - RTE_LCORE_FOREACH(i) { - printf("Core [%u] Cost Time = %"PRIu64" us\n", - i, time_count[i]); - total += time_count[i]; - } - - printf("Total Cost Time = %"PRIu64" us\n", total); - - return 0; -} - /* * Use rte_mcslock_trylock() to trylock a mcs lock object, * If it could not lock the object successfully, it would @@ -240,10 +156,6 @@ test_mcslock(void) ret = -1; rte_mcslock_unlock(&p_ml, &ml_me); - /* mcs lock perf test */ - if (test_mcslock_perf() < 0) - return -1; - return ret; } diff --git a/app/test/test_mcslock_perf.c b/app/test/test_mcslock_perf.c new file mode 100644 index 0000000..6948344 --- /dev/null +++ b/app/test/test_mcslock_perf.c @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2019 Arm Limited + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test.h" + +/* + * RTE MCS lock perf test + * ====================== + * + * These tests are derived from spin lock perf test cases. + * + * - A load test is carried out, with all cores attempting to lock a single + * lock multiple times. + */ + +RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me); +rte_mcslock_t *p_ml_perf; + +static rte_atomic32_t synchro; +static uint64_t time_count[RTE_MAX_LCORE] = {0}; + +#define MAX_LOOP 1000000 + +static int +load_loop_fn(void *func_param) +{ + uint64_t time_diff = 0, begin; + uint64_t hz = rte_get_timer_hz(); + volatile uint64_t lcount = 0; + const int use_lock = *(int *)func_param; + const unsigned int lcore = rte_lcore_id(); + + /**< Per core me node. */ + rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me); + + /* wait synchro */ + while (rte_atomic32_read(&synchro) == 0) + ; + + begin = rte_get_timer_cycles(); + while (lcount < MAX_LOOP) { + if (use_lock) + rte_mcslock_lock(&p_ml_perf, &ml_perf_me); + + lcount++; + if (use_lock) + rte_mcslock_unlock(&p_ml_perf, &ml_perf_me); + } + time_diff = rte_get_timer_cycles() - begin; + time_count[lcore] = time_diff * 1000000 / hz; + return 0; +} + +/* + * Test rte_eal_get_lcore_state() in addition to mcs locks + * as we have "waiting" then "running" lcores. + */ +static int +test_mcslock_perf(void) +{ + unsigned int i; + uint64_t total = 0; + int lock = 0; + const unsigned int lcore = rte_lcore_id(); + + printf("\nTest with no lock on single core...\n"); + rte_atomic32_set(&synchro, 1); + load_loop_fn(&lock); + printf("Core [%u] Cost Time = %"PRIu64" us\n", + lcore, time_count[lcore]); + memset(time_count, 0, sizeof(time_count)); + + printf("\nTest with lock on single core...\n"); + lock = 1; + rte_atomic32_set(&synchro, 1); + load_loop_fn(&lock); + printf("Core [%u] Cost Time = %"PRIu64" us\n", + lcore, time_count[lcore]); + memset(time_count, 0, sizeof(time_count)); + + printf("\nTest with lock on %u cores...\n", (rte_lcore_count())); + + rte_atomic32_set(&synchro, 0); + rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER); + + /* start synchro and launch test on master */ + rte_atomic32_set(&synchro, 1); + load_loop_fn(&lock); + + rte_eal_mp_wait_lcore(); + + RTE_LCORE_FOREACH(i) { + printf("Core [%u] Cost Time = %"PRIu64" us\n", + i, time_count[i]); + total += time_count[i]; + } + + printf("Total Cost Time = %"PRIu64" us\n", total); + + return 0; +} + +REGISTER_TEST_COMMAND(mcslock_perf_autotest, test_mcslock_perf); -- 2.7.4