[dpdk-dev] [PATCH 1/2] test/mcslock: move performance test to perf tests

DPDK patches and discussions
 help / color / mirror / Atom feed

* [dpdk-dev] [PATCH 1/2] test/mcslock: move performance test to perf tests
@ 2020-03-23  4:16 Phil Yang
  2020-03-23  4:16 ` [dpdk-dev] [PATCH 2/2] test/atomic: reduce the number of loops to avoid timeouts Phil Yang
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Phil Yang @ 2020-03-23  4:16 UTC (permalink / raw)
  To: aconole, maicolgabriel, dev
  Cc: david.marchand, drc, gavin.hu, Honnappa.Nagarahalli, ruifeng.wang, nd

The MCS lock performance test takes more than 10 seconds and leads
to meson test timeout on some platforms. Move the performance test
into perf tests.

Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 MAINTAINERS                  |   1 +
 app/test/Makefile            |   1 +
 app/test/autotest_data.py    |   6 +++
 app/test/meson.build         |   2 +
 app/test/test_mcslock.c      |  88 -------------------------------
 app/test/test_mcslock_perf.c | 121 +++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 131 insertions(+), 88 deletions(-)
 create mode 100644 app/test/test_mcslock_perf.c

diff --git a/MAINTAINERS b/MAINTAINERS
index db235c2..411bdeb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -247,6 +247,7 @@ MCSlock - EXPERIMENTAL
 M: Phil Yang <phil.yang@arm.com>
 F: lib/librte_eal/common/include/generic/rte_mcslock.h
 F: app/test/test_mcslock.c
+F: app/test/test_mcslock_perf.c
 
 Ticketlock
 M: Joyce Kong <joyce.kong@arm.com>
diff --git a/app/test/Makefile b/app/test/Makefile
index 1f080d1..97de3ac 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -65,6 +65,7 @@ SRCS-y += test_barrier.c
 SRCS-y += test_malloc.c
 SRCS-y += test_cycles.c
 SRCS-y += test_mcslock.c
+SRCS-y += test_mcslock_perf.c
 SRCS-y += test_spinlock.c
 SRCS-y += test_ticketlock.c
 SRCS-y += test_memory.c
diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 7b1d013..2a4619d 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -784,6 +784,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "MCS Lock performance autotest",
+        "Command": "mcslock_perf_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     #
     # Please always make sure that ring_perf is the last test!
     #
diff --git a/app/test/meson.build b/app/test/meson.build
index 0a2ce71..335a869 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -82,6 +82,7 @@ test_sources = files('commands.c',
 	'test_meter.c',
 	'test_metrics.c',
 	'test_mcslock.c',
+	'test_mcslock_perf.c',
 	'test_mp_secondary.c',
 	'test_per_lcore.c',
 	'test_pmd_perf.c',
@@ -270,6 +271,7 @@ perf_test_names = [
         'rand_perf_autotest',
         'hash_readwrite_perf_autotest',
         'hash_readwrite_lf_perf_autotest',
+	'mcslock_perf_autotest',
 ]
 
 driver_test_names = [
diff --git a/app/test/test_mcslock.c b/app/test/test_mcslock.c
index e9359df..15f9751 100644
--- a/app/test/test_mcslock.c
+++ b/app/test/test_mcslock.c
@@ -32,23 +32,16 @@
  *
  *   - The function takes the global lock, display something, then releases
  *     the global lock on each core.
- *
- * - A load test is carried out, with all cores attempting to lock a single
- *   lock multiple times.
  */
 
 RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_me);
 RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_try_me);
-RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me);
 
 rte_mcslock_t *p_ml;
 rte_mcslock_t *p_ml_try;
-rte_mcslock_t *p_ml_perf;
 
 static unsigned int count;
 
-static rte_atomic32_t synchro;
-
 static int
 test_mcslock_per_core(__attribute__((unused)) void *arg)
 {
@@ -63,85 +56,8 @@ test_mcslock_per_core(__attribute__((unused)) void *arg)
 	return 0;
 }
 
-static uint64_t time_count[RTE_MAX_LCORE] = {0};
-
 #define MAX_LOOP 1000000
 
-static int
-load_loop_fn(void *func_param)
-{
-	uint64_t time_diff = 0, begin;
-	uint64_t hz = rte_get_timer_hz();
-	volatile uint64_t lcount = 0;
-	const int use_lock = *(int *)func_param;
-	const unsigned int lcore = rte_lcore_id();
-
-	/**< Per core me node. */
-	rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me);
-
-	/* wait synchro */
-	while (rte_atomic32_read(&synchro) == 0)
-		;
-
-	begin = rte_get_timer_cycles();
-	while (lcount < MAX_LOOP) {
-		if (use_lock)
-			rte_mcslock_lock(&p_ml_perf, &ml_perf_me);
-
-		lcount++;
-		if (use_lock)
-			rte_mcslock_unlock(&p_ml_perf, &ml_perf_me);
-	}
-	time_diff = rte_get_timer_cycles() - begin;
-	time_count[lcore] = time_diff * 1000000 / hz;
-	return 0;
-}
-
-static int
-test_mcslock_perf(void)
-{
-	unsigned int i;
-	uint64_t total = 0;
-	int lock = 0;
-	const unsigned int lcore = rte_lcore_id();
-
-	printf("\nTest with no lock on single core...\n");
-	rte_atomic32_set(&synchro, 1);
-	load_loop_fn(&lock);
-	printf("Core [%u] Cost Time = %"PRIu64" us\n",
-			lcore, time_count[lcore]);
-	memset(time_count, 0, sizeof(time_count));
-
-	printf("\nTest with lock on single core...\n");
-	lock = 1;
-	rte_atomic32_set(&synchro, 1);
-	load_loop_fn(&lock);
-	printf("Core [%u] Cost Time = %"PRIu64" us\n",
-			lcore, time_count[lcore]);
-	memset(time_count, 0, sizeof(time_count));
-
-	printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
-
-	rte_atomic32_set(&synchro, 0);
-	rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER);
-
-	/* start synchro and launch test on master */
-	rte_atomic32_set(&synchro, 1);
-	load_loop_fn(&lock);
-
-	rte_eal_mp_wait_lcore();
-
-	RTE_LCORE_FOREACH(i) {
-		printf("Core [%u] Cost Time = %"PRIu64" us\n",
-				i, time_count[i]);
-		total += time_count[i];
-	}
-
-	printf("Total Cost Time = %"PRIu64" us\n", total);
-
-	return 0;
-}
-
 /*
  * Use rte_mcslock_trylock() to trylock a mcs lock object,
  * If it could not lock the object successfully, it would
@@ -240,10 +156,6 @@ test_mcslock(void)
 		ret = -1;
 	rte_mcslock_unlock(&p_ml, &ml_me);
 
-	/* mcs lock perf test */
-	if (test_mcslock_perf() < 0)
-		return -1;
-
 	return ret;
 }
 
diff --git a/app/test/test_mcslock_perf.c b/app/test/test_mcslock_perf.c
new file mode 100644
index 0000000..6948344
--- /dev/null
+++ b/app/test/test_mcslock_perf.c
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_cycles.h>
+#include <rte_mcslock.h>
+#include <rte_atomic.h>
+
+#include "test.h"
+
+/*
+ * RTE MCS lock perf test
+ * ======================
+ *
+ * These tests are derived from spin lock perf test cases.
+ *
+ * - A load test is carried out, with all cores attempting to lock a single
+ *   lock multiple times.
+ */
+
+RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me);
+rte_mcslock_t *p_ml_perf;
+
+static rte_atomic32_t synchro;
+static uint64_t time_count[RTE_MAX_LCORE] = {0};
+
+#define MAX_LOOP 1000000
+
+static int
+load_loop_fn(void *func_param)
+{
+	uint64_t time_diff = 0, begin;
+	uint64_t hz = rte_get_timer_hz();
+	volatile uint64_t lcount = 0;
+	const int use_lock = *(int *)func_param;
+	const unsigned int lcore = rte_lcore_id();
+
+	/**< Per core me node. */
+	rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me);
+
+	/* wait synchro */
+	while (rte_atomic32_read(&synchro) == 0)
+		;
+
+	begin = rte_get_timer_cycles();
+	while (lcount < MAX_LOOP) {
+		if (use_lock)
+			rte_mcslock_lock(&p_ml_perf, &ml_perf_me);
+
+		lcount++;
+		if (use_lock)
+			rte_mcslock_unlock(&p_ml_perf, &ml_perf_me);
+	}
+	time_diff = rte_get_timer_cycles() - begin;
+	time_count[lcore] = time_diff * 1000000 / hz;
+	return 0;
+}
+
+/*
+ * Test rte_eal_get_lcore_state() in addition to mcs locks
+ * as we have "waiting" then "running" lcores.
+ */
+static int
+test_mcslock_perf(void)
+{
+	unsigned int i;
+	uint64_t total = 0;
+	int lock = 0;
+	const unsigned int lcore = rte_lcore_id();
+
+	printf("\nTest with no lock on single core...\n");
+	rte_atomic32_set(&synchro, 1);
+	load_loop_fn(&lock);
+	printf("Core [%u] Cost Time = %"PRIu64" us\n",
+			lcore, time_count[lcore]);
+	memset(time_count, 0, sizeof(time_count));
+
+	printf("\nTest with lock on single core...\n");
+	lock = 1;
+	rte_atomic32_set(&synchro, 1);
+	load_loop_fn(&lock);
+	printf("Core [%u] Cost Time = %"PRIu64" us\n",
+			lcore, time_count[lcore]);
+	memset(time_count, 0, sizeof(time_count));
+
+	printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
+
+	rte_atomic32_set(&synchro, 0);
+	rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER);
+
+	/* start synchro and launch test on master */
+	rte_atomic32_set(&synchro, 1);
+	load_loop_fn(&lock);
+
+	rte_eal_mp_wait_lcore();
+
+	RTE_LCORE_FOREACH(i) {
+		printf("Core [%u] Cost Time = %"PRIu64" us\n",
+				i, time_count[i]);
+		total += time_count[i];
+	}
+
+	printf("Total Cost Time = %"PRIu64" us\n", total);
+
+	return 0;
+}
+
+REGISTER_TEST_COMMAND(mcslock_perf_autotest, test_mcslock_perf);
-- 
2.7.4


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [dpdk-dev] [PATCH 2/2] test/atomic: reduce the number of loops to avoid timeouts
  2020-03-23  4:16 [dpdk-dev] [PATCH 1/2] test/mcslock: move performance test to perf tests Phil Yang
@ 2020-03-23  4:16 ` Phil Yang
  2020-07-28  3:41   ` [dpdk-dev] [PATCH v2] " Phil Yang
  2020-07-23  2:36 ` [dpdk-dev] [PATCH 1/2] test/mcslock: move performance test to perf tests Phil Yang
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 10+ messages in thread
From: Phil Yang @ 2020-03-23  4:16 UTC (permalink / raw)
  To: aconole, maicolgabriel, dev
  Cc: david.marchand, drc, gavin.hu, Honnappa.Nagarahalli, ruifeng.wang, nd

Reduce the number of loops to 10K to avoid the meson test timeout
warning.

Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
 app/test/test_atomic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/test/test_atomic.c b/app/test/test_atomic.c
index de3030d..661fecc 100644
--- a/app/test/test_atomic.c
+++ b/app/test/test_atomic.c
@@ -99,7 +99,7 @@
 
 #define NUM_ATOMIC_TYPES 3
 
-#define N 1000000
+#define N 10000
 
 static rte_atomic16_t a16;
 static rte_atomic32_t a32;
-- 
2.7.4


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [dpdk-dev] [PATCH v2] test/atomic: reduce the number of loops to avoid timeouts
  2020-03-23  4:16 ` [dpdk-dev] [PATCH 2/2] test/atomic: reduce the number of loops to avoid timeouts Phil Yang
@ 2020-07-28  3:41   ` Phil Yang
  2020-09-17  9:17     ` Juraj Linkeš
  2023-06-13  3:37     ` Stephen Hemminger
  0 siblings, 2 replies; 10+ messages in thread
From: Phil Yang @ 2020-07-28  3:41 UTC (permalink / raw)
  To: david.marchand, dev; +Cc: aconole, Honnappa.Nagarahalli, Ruifeng.Wang, nd

Reduce the number of loops to 10K to avoid the meson test timeout
warning.

For example:
$ sudo meson test -C build --suite DPDK:fast-tests / atomic_autotest -t 50
[...]
1/1 DPDK:fast-tests / atomic_autotest OK             187.35s

Signed-off-by: Phil Yang <phil.yang@arm.com>
---
v2:
Split from the original patchset.

 app/test/test_atomic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/test/test_atomic.c b/app/test/test_atomic.c
index 214452e..9b70f44 100644
--- a/app/test/test_atomic.c
+++ b/app/test/test_atomic.c
@@ -99,7 +99,7 @@
 
 #define NUM_ATOMIC_TYPES 3
 
-#define N 1000000
+#define N 10000
 
 static rte_atomic16_t a16;
 static rte_atomic32_t a32;
-- 
2.7.4


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH v2] test/atomic: reduce the number of loops to avoid timeouts
  2020-07-28  3:41   ` [dpdk-dev] [PATCH v2] " Phil Yang
@ 2020-09-17  9:17     ` Juraj Linkeš
  2023-06-13  3:37     ` Stephen Hemminger
  1 sibling, 0 replies; 10+ messages in thread
From: Juraj Linkeš @ 2020-09-17  9:17 UTC (permalink / raw)
  To: Phil Yang, david.marchand, dev
  Cc: aconole, Honnappa.Nagarahalli, Ruifeng.Wang, nd



> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Phil Yang
> Sent: Tuesday, July 28, 2020 5:42 AM
> To: david.marchand@redhat.com; dev@dpdk.org
> Cc: aconole@redhat.com; Honnappa.Nagarahalli@arm.com;
> Ruifeng.Wang@arm.com; nd@arm.com
> Subject: [dpdk-dev] [PATCH v2] test/atomic: reduce the number of loops to
> avoid timeouts
> 
> Reduce the number of loops to 10K to avoid the meson test timeout warning.
> 
> For example:
> $ sudo meson test -C build --suite DPDK:fast-tests / atomic_autotest -t 50 [...]
> 1/1 DPDK:fast-tests / atomic_autotest OK             187.35s
> 
> Signed-off-by: Phil Yang <phil.yang@arm.com>
> ---
> v2:
> Split from the original patchset.
> 
>  app/test/test_atomic.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/app/test/test_atomic.c b/app/test/test_atomic.c index
> 214452e..9b70f44 100644
> --- a/app/test/test_atomic.c
> +++ b/app/test/test_atomic.c
> @@ -99,7 +99,7 @@
> 
>  #define NUM_ATOMIC_TYPES 3
> 
> -#define N 1000000
> +#define N 10000
> 
>  static rte_atomic16_t a16;
>  static rte_atomic32_t a32;
> --
> 2.7.4
> 

Reviewed-by: Juraj Linkeš <juraj.linkes@pantheon.tech>


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH v2] test/atomic: reduce the number of loops to avoid timeouts
  2020-07-28  3:41   ` [dpdk-dev] [PATCH v2] " Phil Yang
  2020-09-17  9:17     ` Juraj Linkeš
@ 2023-06-13  3:37     ` Stephen Hemminger
  1 sibling, 0 replies; 10+ messages in thread
From: Stephen Hemminger @ 2023-06-13  3:37 UTC (permalink / raw)
  To: Phil Yang
  Cc: david.marchand, dev, aconole, Honnappa.Nagarahalli, Ruifeng.Wang, nd

On Tue, 28 Jul 2020 11:41:37 +0800
Phil Yang <phil.yang@arm.com> wrote:

> Reduce the number of loops to 10K to avoid the meson test timeout
> warning.
> 
> For example:
> $ sudo meson test -C build --suite DPDK:fast-tests / atomic_autotest -t 50
> [...]
> 1/1 DPDK:fast-tests / atomic_autotest OK             187.35s
> 
> Signed-off-by: Phil Yang <phil.yang@arm.com>

I don't have Arm server to test on but this looks like a serious issue
with Arm performance. FYI the default 1000000 takes 10 seconds on
slow NUC (AMD x86). Is this a problem with the lack of real compare-exchange
and the default atomic retry model for that?

This patch didn't get applied in 3 years but still worth investigating
if anyone has resources.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] test/mcslock: move performance test to perf tests
  2020-03-23  4:16 [dpdk-dev] [PATCH 1/2] test/mcslock: move performance test to perf tests Phil Yang
  2020-03-23  4:16 ` [dpdk-dev] [PATCH 2/2] test/atomic: reduce the number of loops to avoid timeouts Phil Yang
@ 2020-07-23  2:36 ` Phil Yang
  2020-07-23 15:14   ` Aaron Conole
  2020-07-28  3:24 ` [dpdk-dev] [PATCH v2] " Phil Yang
  2023-06-12 21:23 ` [dpdk-dev] [PATCH 1/2] " Stephen Hemminger
  3 siblings, 1 reply; 10+ messages in thread
From: Phil Yang @ 2020-07-23  2:36 UTC (permalink / raw)
  To: Phil Yang, aconole, maicolgabriel, dev
  Cc: david.marchand, drc, Honnappa Nagarahalli, Ruifeng Wang, nd, nd

Hi Aaron,

It seems Travis CI cannot capture this timeout issue.
But the local test records these two cases as TIMEOUT under the default timeout configuration.

1. The test results on x86 server (72 lcores online@ 2.60GHz) are:
a. Default setting:
$ sudo meson test -C build_test --suite DPDK:fast-tests
   3/92 DPDK:fast-tests / atomic_autotest                TIMEOUT        10.32s
39/92 DPDK:fast-tests / mcslock_autotest               TIMEOUT        10.32s

b. Extend timeout:
$ sudo meson test -C build_test --suite DPDK:fast-tests -t 30
   3/92 DPDK:fast-tests / atomic_autotest                OK             173.01s
39/92 DPDK:fast-tests / mcslock_autotest               OK             12.19s

2. The test results on aarch64 server (56 lcore online @ 2.0GHz) are:
a. Default setting:
$ sudo meson test -C build_test --suite DPDK:fast-tests
   3/92 DPDK:fast-tests / atomic_autotest       TIMEOUT 10.47 s
39/92 DPDK:fast-tests / mcslock_autotest      TIMEOUT 10.47 s

b. Extend timeout:
$ sudo meson test -C build_test --suite DPDK:fast-tests -t 60
   3/92 DPDK:fast-tests / atomic_autotest       OK      431.89 s
39/92 DPDK:fast-tests / mcslock_autotest      OK      21.25 s

So I think these two patches can resolve this issue. I'd love to hear your thoughts.

Thanks,
Phil

<snip>

> Subject: [dpdk-dev] [PATCH 1/2] test/mcslock: move performance test to
> perf tests
> 
> The MCS lock performance test takes more than 10 seconds and leads
> to meson test timeout on some platforms. Move the performance test
> into perf tests.
> 
> Signed-off-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> ---
>  MAINTAINERS                  |   1 +
>  app/test/Makefile            |   1 +
>  app/test/autotest_data.py    |   6 +++
>  app/test/meson.build         |   2 +
>  app/test/test_mcslock.c      |  88 -------------------------------
>  app/test/test_mcslock_perf.c | 121
> +++++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 131 insertions(+), 88 deletions(-)
>  create mode 100644 app/test/test_mcslock_perf.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index db235c2..411bdeb 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -247,6 +247,7 @@ MCSlock - EXPERIMENTAL
>  M: Phil Yang <phil.yang@arm.com>
>  F: lib/librte_eal/common/include/generic/rte_mcslock.h
>  F: app/test/test_mcslock.c
> +F: app/test/test_mcslock_perf.c
> 
>  Ticketlock
>  M: Joyce Kong <joyce.kong@arm.com>
> diff --git a/app/test/Makefile b/app/test/Makefile
> index 1f080d1..97de3ac 100644
> --- a/app/test/Makefile
> +++ b/app/test/Makefile
> @@ -65,6 +65,7 @@ SRCS-y += test_barrier.c
>  SRCS-y += test_malloc.c
>  SRCS-y += test_cycles.c
>  SRCS-y += test_mcslock.c
> +SRCS-y += test_mcslock_perf.c
>  SRCS-y += test_spinlock.c
>  SRCS-y += test_ticketlock.c
>  SRCS-y += test_memory.c
> diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
> index 7b1d013..2a4619d 100644
> --- a/app/test/autotest_data.py
> +++ b/app/test/autotest_data.py
> @@ -784,6 +784,12 @@
>          "Func":    default_autotest,
>          "Report":  None,
>      },
> +    {
> +        "Name":    "MCS Lock performance autotest",
> +        "Command": "mcslock_perf_autotest",
> +        "Func":    default_autotest,
> +        "Report":  None,
> +    },
>      #
>      # Please always make sure that ring_perf is the last test!
>      #
> diff --git a/app/test/meson.build b/app/test/meson.build
> index 0a2ce71..335a869 100644
> --- a/app/test/meson.build
> +++ b/app/test/meson.build
> @@ -82,6 +82,7 @@ test_sources = files('commands.c',
>  	'test_meter.c',
>  	'test_metrics.c',
>  	'test_mcslock.c',
> +	'test_mcslock_perf.c',
>  	'test_mp_secondary.c',
>  	'test_per_lcore.c',
>  	'test_pmd_perf.c',
> @@ -270,6 +271,7 @@ perf_test_names = [
>          'rand_perf_autotest',
>          'hash_readwrite_perf_autotest',
>          'hash_readwrite_lf_perf_autotest',
> +	'mcslock_perf_autotest',
>  ]
> 
>  driver_test_names = [
> diff --git a/app/test/test_mcslock.c b/app/test/test_mcslock.c
> index e9359df..15f9751 100644
> --- a/app/test/test_mcslock.c
> +++ b/app/test/test_mcslock.c
> @@ -32,23 +32,16 @@
>   *
>   *   - The function takes the global lock, display something, then releases
>   *     the global lock on each core.
> - *
> - * - A load test is carried out, with all cores attempting to lock a single
> - *   lock multiple times.
>   */
> 
>  RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_me);
>  RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_try_me);
> -RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me);
> 
>  rte_mcslock_t *p_ml;
>  rte_mcslock_t *p_ml_try;
> -rte_mcslock_t *p_ml_perf;
> 
>  static unsigned int count;
> 
> -static rte_atomic32_t synchro;
> -
>  static int
>  test_mcslock_per_core(__attribute__((unused)) void *arg)
>  {
> @@ -63,85 +56,8 @@ test_mcslock_per_core(__attribute__((unused)) void
> *arg)
>  	return 0;
>  }
> 
> -static uint64_t time_count[RTE_MAX_LCORE] = {0};
> -
>  #define MAX_LOOP 1000000
> 
> -static int
> -load_loop_fn(void *func_param)
> -{
> -	uint64_t time_diff = 0, begin;
> -	uint64_t hz = rte_get_timer_hz();
> -	volatile uint64_t lcount = 0;
> -	const int use_lock = *(int *)func_param;
> -	const unsigned int lcore = rte_lcore_id();
> -
> -	/**< Per core me node. */
> -	rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me);
> -
> -	/* wait synchro */
> -	while (rte_atomic32_read(&synchro) == 0)
> -		;
> -
> -	begin = rte_get_timer_cycles();
> -	while (lcount < MAX_LOOP) {
> -		if (use_lock)
> -			rte_mcslock_lock(&p_ml_perf, &ml_perf_me);
> -
> -		lcount++;
> -		if (use_lock)
> -			rte_mcslock_unlock(&p_ml_perf, &ml_perf_me);
> -	}
> -	time_diff = rte_get_timer_cycles() - begin;
> -	time_count[lcore] = time_diff * 1000000 / hz;
> -	return 0;
> -}
> -
> -static int
> -test_mcslock_perf(void)
> -{
> -	unsigned int i;
> -	uint64_t total = 0;
> -	int lock = 0;
> -	const unsigned int lcore = rte_lcore_id();
> -
> -	printf("\nTest with no lock on single core...\n");
> -	rte_atomic32_set(&synchro, 1);
> -	load_loop_fn(&lock);
> -	printf("Core [%u] Cost Time = %"PRIu64" us\n",
> -			lcore, time_count[lcore]);
> -	memset(time_count, 0, sizeof(time_count));
> -
> -	printf("\nTest with lock on single core...\n");
> -	lock = 1;
> -	rte_atomic32_set(&synchro, 1);
> -	load_loop_fn(&lock);
> -	printf("Core [%u] Cost Time = %"PRIu64" us\n",
> -			lcore, time_count[lcore]);
> -	memset(time_count, 0, sizeof(time_count));
> -
> -	printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
> -
> -	rte_atomic32_set(&synchro, 0);
> -	rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER);
> -
> -	/* start synchro and launch test on master */
> -	rte_atomic32_set(&synchro, 1);
> -	load_loop_fn(&lock);
> -
> -	rte_eal_mp_wait_lcore();
> -
> -	RTE_LCORE_FOREACH(i) {
> -		printf("Core [%u] Cost Time = %"PRIu64" us\n",
> -				i, time_count[i]);
> -		total += time_count[i];
> -	}
> -
> -	printf("Total Cost Time = %"PRIu64" us\n", total);
> -
> -	return 0;
> -}
> -
>  /*
>   * Use rte_mcslock_trylock() to trylock a mcs lock object,
>   * If it could not lock the object successfully, it would
> @@ -240,10 +156,6 @@ test_mcslock(void)
>  		ret = -1;
>  	rte_mcslock_unlock(&p_ml, &ml_me);
> 
> -	/* mcs lock perf test */
> -	if (test_mcslock_perf() < 0)
> -		return -1;
> -
>  	return ret;
>  }
> 
> diff --git a/app/test/test_mcslock_perf.c b/app/test/test_mcslock_perf.c
> new file mode 100644
> index 0000000..6948344
> --- /dev/null
> +++ b/app/test/test_mcslock_perf.c
> @@ -0,0 +1,121 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2019 Arm Limited
> + */
> +
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <inttypes.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <sys/queue.h>
> +
> +#include <rte_common.h>
> +#include <rte_memory.h>
> +#include <rte_per_lcore.h>
> +#include <rte_launch.h>
> +#include <rte_eal.h>
> +#include <rte_lcore.h>
> +#include <rte_cycles.h>
> +#include <rte_mcslock.h>
> +#include <rte_atomic.h>
> +
> +#include "test.h"
> +
> +/*
> + * RTE MCS lock perf test
> + * ======================
> + *
> + * These tests are derived from spin lock perf test cases.
> + *
> + * - A load test is carried out, with all cores attempting to lock a single
> + *   lock multiple times.
> + */
> +
> +RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me);
> +rte_mcslock_t *p_ml_perf;
> +
> +static rte_atomic32_t synchro;
> +static uint64_t time_count[RTE_MAX_LCORE] = {0};
> +
> +#define MAX_LOOP 1000000
> +
> +static int
> +load_loop_fn(void *func_param)
> +{
> +	uint64_t time_diff = 0, begin;
> +	uint64_t hz = rte_get_timer_hz();
> +	volatile uint64_t lcount = 0;
> +	const int use_lock = *(int *)func_param;
> +	const unsigned int lcore = rte_lcore_id();
> +
> +	/**< Per core me node. */
> +	rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me);
> +
> +	/* wait synchro */
> +	while (rte_atomic32_read(&synchro) == 0)
> +		;
> +
> +	begin = rte_get_timer_cycles();
> +	while (lcount < MAX_LOOP) {
> +		if (use_lock)
> +			rte_mcslock_lock(&p_ml_perf, &ml_perf_me);
> +
> +		lcount++;
> +		if (use_lock)
> +			rte_mcslock_unlock(&p_ml_perf, &ml_perf_me);
> +	}
> +	time_diff = rte_get_timer_cycles() - begin;
> +	time_count[lcore] = time_diff * 1000000 / hz;
> +	return 0;
> +}
> +
> +/*
> + * Test rte_eal_get_lcore_state() in addition to mcs locks
> + * as we have "waiting" then "running" lcores.
> + */
> +static int
> +test_mcslock_perf(void)
> +{
> +	unsigned int i;
> +	uint64_t total = 0;
> +	int lock = 0;
> +	const unsigned int lcore = rte_lcore_id();
> +
> +	printf("\nTest with no lock on single core...\n");
> +	rte_atomic32_set(&synchro, 1);
> +	load_loop_fn(&lock);
> +	printf("Core [%u] Cost Time = %"PRIu64" us\n",
> +			lcore, time_count[lcore]);
> +	memset(time_count, 0, sizeof(time_count));
> +
> +	printf("\nTest with lock on single core...\n");
> +	lock = 1;
> +	rte_atomic32_set(&synchro, 1);
> +	load_loop_fn(&lock);
> +	printf("Core [%u] Cost Time = %"PRIu64" us\n",
> +			lcore, time_count[lcore]);
> +	memset(time_count, 0, sizeof(time_count));
> +
> +	printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
> +
> +	rte_atomic32_set(&synchro, 0);
> +	rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER);
> +
> +	/* start synchro and launch test on master */
> +	rte_atomic32_set(&synchro, 1);
> +	load_loop_fn(&lock);
> +
> +	rte_eal_mp_wait_lcore();
> +
> +	RTE_LCORE_FOREACH(i) {
> +		printf("Core [%u] Cost Time = %"PRIu64" us\n",
> +				i, time_count[i]);
> +		total += time_count[i];
> +	}
> +
> +	printf("Total Cost Time = %"PRIu64" us\n", total);
> +
> +	return 0;
> +}
> +
> +REGISTER_TEST_COMMAND(mcslock_perf_autotest, test_mcslock_perf);
> --
> 2.7.4


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] test/mcslock: move performance test to perf tests
  2020-07-23  2:36 ` [dpdk-dev] [PATCH 1/2] test/mcslock: move performance test to perf tests Phil Yang
@ 2020-07-23 15:14   ` Aaron Conole
  0 siblings, 0 replies; 10+ messages in thread
From: Aaron Conole @ 2020-07-23 15:14 UTC (permalink / raw)
  To: Phil Yang
  Cc: maicolgabriel, dev, david.marchand, drc, Honnappa Nagarahalli,
	Ruifeng Wang, nd

Phil Yang <Phil.Yang@arm.com> writes:

> Hi Aaron,
>
> It seems Travis CI cannot capture this timeout issue.

That's probably because we use -t 3 to multiply the timeout.  We needed
to do that because a few of the tests were on the cusp of failure in the
Travis environment since it's a bit lower power.  In more brawny
systems, probably they aren't an issue.

> But the local test records these two cases as TIMEOUT under the default timeout configuration.
>
> 1. The test results on x86 server (72 lcores online@ 2.60GHz) are:
> a. Default setting:
> $ sudo meson test -C build_test --suite DPDK:fast-tests
>    3/92 DPDK:fast-tests / atomic_autotest                TIMEOUT        10.32s
> 39/92 DPDK:fast-tests / mcslock_autotest               TIMEOUT        10.32s
>
> b. Extend timeout:
> $ sudo meson test -C build_test --suite DPDK:fast-tests -t 30
>    3/92 DPDK:fast-tests / atomic_autotest                OK             173.01s
> 39/92 DPDK:fast-tests / mcslock_autotest               OK             12.19s
>
> 2. The test results on aarch64 server (56 lcore online @ 2.0GHz) are:
> a. Default setting:
> $ sudo meson test -C build_test --suite DPDK:fast-tests
>    3/92 DPDK:fast-tests / atomic_autotest       TIMEOUT 10.47 s
> 39/92 DPDK:fast-tests / mcslock_autotest      TIMEOUT 10.47 s
>
> b. Extend timeout:
> $ sudo meson test -C build_test --suite DPDK:fast-tests -t 60
>    3/92 DPDK:fast-tests / atomic_autotest       OK      431.89 s
> 39/92 DPDK:fast-tests / mcslock_autotest      OK      21.25 s

In all the cases it seems the mcslock_autotest is really not too far
off, but the atomic_autotest looks really long no matter what.

> So I think these two patches can resolve this issue. I'd love to hear your thoughts.

In general, I'm always happy to see perf tests more appropriately put
under perf area.  I don't have much opinion on this specific patch,
though.

> Thanks,
> Phil
>
> <snip>
>
>> Subject: [dpdk-dev] [PATCH 1/2] test/mcslock: move performance test to
>> perf tests
>> 
>> The MCS lock performance test takes more than 10 seconds and leads
>> to meson test timeout on some platforms. Move the performance test
>> into perf tests.
>> 
>> Signed-off-by: Phil Yang <phil.yang@arm.com>
>> Reviewed-by: Gavin Hu <gavin.hu@arm.com>
>> ---
>>  MAINTAINERS                  |   1 +
>>  app/test/Makefile            |   1 +
>>  app/test/autotest_data.py    |   6 +++
>>  app/test/meson.build         |   2 +
>>  app/test/test_mcslock.c      |  88 -------------------------------
>>  app/test/test_mcslock_perf.c | 121
>> +++++++++++++++++++++++++++++++++++++++++++
>>  6 files changed, 131 insertions(+), 88 deletions(-)
>>  create mode 100644 app/test/test_mcslock_perf.c
>> 
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index db235c2..411bdeb 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -247,6 +247,7 @@ MCSlock - EXPERIMENTAL
>>  M: Phil Yang <phil.yang@arm.com>
>>  F: lib/librte_eal/common/include/generic/rte_mcslock.h
>>  F: app/test/test_mcslock.c
>> +F: app/test/test_mcslock_perf.c
>> 
>>  Ticketlock
>>  M: Joyce Kong <joyce.kong@arm.com>
>> diff --git a/app/test/Makefile b/app/test/Makefile
>> index 1f080d1..97de3ac 100644
>> --- a/app/test/Makefile
>> +++ b/app/test/Makefile
>> @@ -65,6 +65,7 @@ SRCS-y += test_barrier.c
>>  SRCS-y += test_malloc.c
>>  SRCS-y += test_cycles.c
>>  SRCS-y += test_mcslock.c
>> +SRCS-y += test_mcslock_perf.c
>>  SRCS-y += test_spinlock.c
>>  SRCS-y += test_ticketlock.c
>>  SRCS-y += test_memory.c
>> diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
>> index 7b1d013..2a4619d 100644
>> --- a/app/test/autotest_data.py
>> +++ b/app/test/autotest_data.py
>> @@ -784,6 +784,12 @@
>>          "Func":    default_autotest,
>>          "Report":  None,
>>      },
>> +    {
>> +        "Name":    "MCS Lock performance autotest",
>> +        "Command": "mcslock_perf_autotest",
>> +        "Func":    default_autotest,
>> +        "Report":  None,
>> +    },
>>      #
>>      # Please always make sure that ring_perf is the last test!
>>      #
>> diff --git a/app/test/meson.build b/app/test/meson.build
>> index 0a2ce71..335a869 100644
>> --- a/app/test/meson.build
>> +++ b/app/test/meson.build
>> @@ -82,6 +82,7 @@ test_sources = files('commands.c',
>>  	'test_meter.c',
>>  	'test_metrics.c',
>>  	'test_mcslock.c',
>> +	'test_mcslock_perf.c',
>>  	'test_mp_secondary.c',
>>  	'test_per_lcore.c',
>>  	'test_pmd_perf.c',
>> @@ -270,6 +271,7 @@ perf_test_names = [
>>          'rand_perf_autotest',
>>          'hash_readwrite_perf_autotest',
>>          'hash_readwrite_lf_perf_autotest',
>> +	'mcslock_perf_autotest',
>>  ]
>> 
>>  driver_test_names = [
>> diff --git a/app/test/test_mcslock.c b/app/test/test_mcslock.c
>> index e9359df..15f9751 100644
>> --- a/app/test/test_mcslock.c
>> +++ b/app/test/test_mcslock.c
>> @@ -32,23 +32,16 @@
>>   *
>>   *   - The function takes the global lock, display something, then releases
>>   *     the global lock on each core.
>> - *
>> - * - A load test is carried out, with all cores attempting to lock a single
>> - *   lock multiple times.
>>   */
>> 
>>  RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_me);
>>  RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_try_me);
>> -RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me);
>> 
>>  rte_mcslock_t *p_ml;
>>  rte_mcslock_t *p_ml_try;
>> -rte_mcslock_t *p_ml_perf;
>> 
>>  static unsigned int count;
>> 
>> -static rte_atomic32_t synchro;
>> -
>>  static int
>>  test_mcslock_per_core(__attribute__((unused)) void *arg)
>>  {
>> @@ -63,85 +56,8 @@ test_mcslock_per_core(__attribute__((unused)) void
>> *arg)
>>  	return 0;
>>  }
>> 
>> -static uint64_t time_count[RTE_MAX_LCORE] = {0};
>> -
>>  #define MAX_LOOP 1000000
>> 
>> -static int
>> -load_loop_fn(void *func_param)
>> -{
>> -	uint64_t time_diff = 0, begin;
>> -	uint64_t hz = rte_get_timer_hz();
>> -	volatile uint64_t lcount = 0;
>> -	const int use_lock = *(int *)func_param;
>> -	const unsigned int lcore = rte_lcore_id();
>> -
>> -	/**< Per core me node. */
>> -	rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me);
>> -
>> -	/* wait synchro */
>> -	while (rte_atomic32_read(&synchro) == 0)
>> -		;
>> -
>> -	begin = rte_get_timer_cycles();
>> -	while (lcount < MAX_LOOP) {
>> -		if (use_lock)
>> -			rte_mcslock_lock(&p_ml_perf, &ml_perf_me);
>> -
>> -		lcount++;
>> -		if (use_lock)
>> -			rte_mcslock_unlock(&p_ml_perf, &ml_perf_me);
>> -	}
>> -	time_diff = rte_get_timer_cycles() - begin;
>> -	time_count[lcore] = time_diff * 1000000 / hz;
>> -	return 0;
>> -}
>> -
>> -static int
>> -test_mcslock_perf(void)
>> -{
>> -	unsigned int i;
>> -	uint64_t total = 0;
>> -	int lock = 0;
>> -	const unsigned int lcore = rte_lcore_id();
>> -
>> -	printf("\nTest with no lock on single core...\n");
>> -	rte_atomic32_set(&synchro, 1);
>> -	load_loop_fn(&lock);
>> -	printf("Core [%u] Cost Time = %"PRIu64" us\n",
>> -			lcore, time_count[lcore]);
>> -	memset(time_count, 0, sizeof(time_count));
>> -
>> -	printf("\nTest with lock on single core...\n");
>> -	lock = 1;
>> -	rte_atomic32_set(&synchro, 1);
>> -	load_loop_fn(&lock);
>> -	printf("Core [%u] Cost Time = %"PRIu64" us\n",
>> -			lcore, time_count[lcore]);
>> -	memset(time_count, 0, sizeof(time_count));
>> -
>> -	printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
>> -
>> -	rte_atomic32_set(&synchro, 0);
>> -	rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER);
>> -
>> -	/* start synchro and launch test on master */
>> -	rte_atomic32_set(&synchro, 1);
>> -	load_loop_fn(&lock);
>> -
>> -	rte_eal_mp_wait_lcore();
>> -
>> -	RTE_LCORE_FOREACH(i) {
>> -		printf("Core [%u] Cost Time = %"PRIu64" us\n",
>> -				i, time_count[i]);
>> -		total += time_count[i];
>> -	}
>> -
>> -	printf("Total Cost Time = %"PRIu64" us\n", total);
>> -
>> -	return 0;
>> -}
>> -
>>  /*
>>   * Use rte_mcslock_trylock() to trylock a mcs lock object,
>>   * If it could not lock the object successfully, it would
>> @@ -240,10 +156,6 @@ test_mcslock(void)
>>  		ret = -1;
>>  	rte_mcslock_unlock(&p_ml, &ml_me);
>> 
>> -	/* mcs lock perf test */
>> -	if (test_mcslock_perf() < 0)
>> -		return -1;
>> -
>>  	return ret;
>>  }
>> 
>> diff --git a/app/test/test_mcslock_perf.c b/app/test/test_mcslock_perf.c
>> new file mode 100644
>> index 0000000..6948344
>> --- /dev/null
>> +++ b/app/test/test_mcslock_perf.c
>> @@ -0,0 +1,121 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(c) 2019 Arm Limited
>> + */
>> +
>> +#include <stdio.h>
>> +#include <stdint.h>
>> +#include <inttypes.h>
>> +#include <string.h>
>> +#include <unistd.h>
>> +#include <sys/queue.h>
>> +
>> +#include <rte_common.h>
>> +#include <rte_memory.h>
>> +#include <rte_per_lcore.h>
>> +#include <rte_launch.h>
>> +#include <rte_eal.h>
>> +#include <rte_lcore.h>
>> +#include <rte_cycles.h>
>> +#include <rte_mcslock.h>
>> +#include <rte_atomic.h>
>> +
>> +#include "test.h"
>> +
>> +/*
>> + * RTE MCS lock perf test
>> + * ======================
>> + *
>> + * These tests are derived from spin lock perf test cases.
>> + *
>> + * - A load test is carried out, with all cores attempting to lock a single
>> + *   lock multiple times.
>> + */
>> +
>> +RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me);
>> +rte_mcslock_t *p_ml_perf;
>> +
>> +static rte_atomic32_t synchro;
>> +static uint64_t time_count[RTE_MAX_LCORE] = {0};
>> +
>> +#define MAX_LOOP 1000000
>> +
>> +static int
>> +load_loop_fn(void *func_param)
>> +{
>> +	uint64_t time_diff = 0, begin;
>> +	uint64_t hz = rte_get_timer_hz();
>> +	volatile uint64_t lcount = 0;
>> +	const int use_lock = *(int *)func_param;
>> +	const unsigned int lcore = rte_lcore_id();
>> +
>> +	/**< Per core me node. */
>> +	rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me);
>> +
>> +	/* wait synchro */
>> +	while (rte_atomic32_read(&synchro) == 0)
>> +		;
>> +
>> +	begin = rte_get_timer_cycles();
>> +	while (lcount < MAX_LOOP) {
>> +		if (use_lock)
>> +			rte_mcslock_lock(&p_ml_perf, &ml_perf_me);
>> +
>> +		lcount++;
>> +		if (use_lock)
>> +			rte_mcslock_unlock(&p_ml_perf, &ml_perf_me);
>> +	}
>> +	time_diff = rte_get_timer_cycles() - begin;
>> +	time_count[lcore] = time_diff * 1000000 / hz;
>> +	return 0;
>> +}
>> +
>> +/*
>> + * Test rte_eal_get_lcore_state() in addition to mcs locks
>> + * as we have "waiting" then "running" lcores.
>> + */
>> +static int
>> +test_mcslock_perf(void)
>> +{
>> +	unsigned int i;
>> +	uint64_t total = 0;
>> +	int lock = 0;
>> +	const unsigned int lcore = rte_lcore_id();
>> +
>> +	printf("\nTest with no lock on single core...\n");
>> +	rte_atomic32_set(&synchro, 1);
>> +	load_loop_fn(&lock);
>> +	printf("Core [%u] Cost Time = %"PRIu64" us\n",
>> +			lcore, time_count[lcore]);
>> +	memset(time_count, 0, sizeof(time_count));
>> +
>> +	printf("\nTest with lock on single core...\n");
>> +	lock = 1;
>> +	rte_atomic32_set(&synchro, 1);
>> +	load_loop_fn(&lock);
>> +	printf("Core [%u] Cost Time = %"PRIu64" us\n",
>> +			lcore, time_count[lcore]);
>> +	memset(time_count, 0, sizeof(time_count));
>> +
>> +	printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
>> +
>> +	rte_atomic32_set(&synchro, 0);
>> +	rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER);
>> +
>> +	/* start synchro and launch test on master */
>> +	rte_atomic32_set(&synchro, 1);
>> +	load_loop_fn(&lock);
>> +
>> +	rte_eal_mp_wait_lcore();
>> +
>> +	RTE_LCORE_FOREACH(i) {
>> +		printf("Core [%u] Cost Time = %"PRIu64" us\n",
>> +				i, time_count[i]);
>> +		total += time_count[i];
>> +	}
>> +
>> +	printf("Total Cost Time = %"PRIu64" us\n", total);
>> +
>> +	return 0;
>> +}
>> +
>> +REGISTER_TEST_COMMAND(mcslock_perf_autotest, test_mcslock_perf);
>> --
>> 2.7.4


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [dpdk-dev] [PATCH v2] test/mcslock: move performance test to perf tests
  2020-03-23  4:16 [dpdk-dev] [PATCH 1/2] test/mcslock: move performance test to perf tests Phil Yang
  2020-03-23  4:16 ` [dpdk-dev] [PATCH 2/2] test/atomic: reduce the number of loops to avoid timeouts Phil Yang
  2020-07-23  2:36 ` [dpdk-dev] [PATCH 1/2] test/mcslock: move performance test to perf tests Phil Yang
@ 2020-07-28  3:24 ` Phil Yang
  2020-09-17  9:16   ` Juraj Linkeš
  2023-06-12 21:23 ` [dpdk-dev] [PATCH 1/2] " Stephen Hemminger
  3 siblings, 1 reply; 10+ messages in thread
From: Phil Yang @ 2020-07-28  3:24 UTC (permalink / raw)
  To: david.marchand, dev
  Cc: aconole, Honnappa.Nagarahalli, Ruifeng.Wang, nd, Thomas Monjalon

Move the MCS lock performance test into perf tests.

Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
---
v2:
1. Rebase code.
2. Split from the original patchset.

 MAINTAINERS                  |   1 +
 app/test/Makefile            |   1 +
 app/test/autotest_data.py    |   6 +++
 app/test/meson.build         |   4 +-
 app/test/test_mcslock.c      |  88 -------------------------------
 app/test/test_mcslock_perf.c | 121 +++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 132 insertions(+), 89 deletions(-)
 create mode 100644 app/test/test_mcslock_perf.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 171c4c7..eae6034 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -265,6 +265,7 @@ MCSlock - EXPERIMENTAL
 M: Phil Yang <phil.yang@arm.com>
 F: lib/librte_eal/include/generic/rte_mcslock.h
 F: app/test/test_mcslock.c
+F: app/test/test_mcslock_perf.c
 
 Ticketlock
 M: Joyce Kong <joyce.kong@arm.com>
diff --git a/app/test/Makefile b/app/test/Makefile
index f406527..e4b3327 100644
--- a/app/test/Makefile
+++ b/app/test/Makefile
@@ -65,6 +65,7 @@ SRCS-y += test_barrier.c
 SRCS-y += test_malloc.c
 SRCS-y += test_cycles.c
 SRCS-y += test_mcslock.c
+SRCS-y += test_mcslock_perf.c
 SRCS-y += test_spinlock.c
 SRCS-y += test_ticketlock.c
 SRCS-y += test_memory.c
diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py
index 4b7da45..c813b93 100644
--- a/app/test/autotest_data.py
+++ b/app/test/autotest_data.py
@@ -796,6 +796,12 @@
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "MCS Lock performance autotest",
+        "Command": "mcslock_perf_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     #
     # Please always make sure that ring_perf is the last test!
     #
diff --git a/app/test/meson.build b/app/test/meson.build
index 786a213..59df2fd 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -86,6 +86,7 @@ test_sources = files('commands.c',
 	'test_meter.c',
 	'test_metrics.c',
 	'test_mcslock.c',
+	'test_mcslock_perf.c',
 	'test_mp_secondary.c',
 	'test_per_lcore.c',
 	'test_pmd_perf.c',
@@ -297,7 +298,8 @@ perf_test_names = [
         'hash_readwrite_perf_autotest',
         'hash_readwrite_lf_perf_autotest',
         'trace_perf_autotest',
-	'ipsec_perf_autotest',
+        'ipsec_perf_autotest',
+        'mcslock_perf_autotest',
 ]
 
 driver_test_names = [
diff --git a/app/test/test_mcslock.c b/app/test/test_mcslock.c
index ddccaaf..a1dc610 100644
--- a/app/test/test_mcslock.c
+++ b/app/test/test_mcslock.c
@@ -32,23 +32,16 @@
  *
  *   - The function takes the global lock, display something, then releases
  *     the global lock on each core.
- *
- * - A load test is carried out, with all cores attempting to lock a single
- *   lock multiple times.
  */
 
 RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_me);
 RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_try_me);
-RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me);
 
 rte_mcslock_t *p_ml;
 rte_mcslock_t *p_ml_try;
-rte_mcslock_t *p_ml_perf;
 
 static unsigned int count;
 
-static rte_atomic32_t synchro;
-
 static int
 test_mcslock_per_core(__rte_unused void *arg)
 {
@@ -63,85 +56,8 @@ test_mcslock_per_core(__rte_unused void *arg)
 	return 0;
 }
 
-static uint64_t time_count[RTE_MAX_LCORE] = {0};
-
 #define MAX_LOOP 1000000
 
-static int
-load_loop_fn(void *func_param)
-{
-	uint64_t time_diff = 0, begin;
-	uint64_t hz = rte_get_timer_hz();
-	volatile uint64_t lcount = 0;
-	const int use_lock = *(int *)func_param;
-	const unsigned int lcore = rte_lcore_id();
-
-	/**< Per core me node. */
-	rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me);
-
-	/* wait synchro */
-	while (rte_atomic32_read(&synchro) == 0)
-		;
-
-	begin = rte_get_timer_cycles();
-	while (lcount < MAX_LOOP) {
-		if (use_lock)
-			rte_mcslock_lock(&p_ml_perf, &ml_perf_me);
-
-		lcount++;
-		if (use_lock)
-			rte_mcslock_unlock(&p_ml_perf, &ml_perf_me);
-	}
-	time_diff = rte_get_timer_cycles() - begin;
-	time_count[lcore] = time_diff * 1000000 / hz;
-	return 0;
-}
-
-static int
-test_mcslock_perf(void)
-{
-	unsigned int i;
-	uint64_t total = 0;
-	int lock = 0;
-	const unsigned int lcore = rte_lcore_id();
-
-	printf("\nTest with no lock on single core...\n");
-	rte_atomic32_set(&synchro, 1);
-	load_loop_fn(&lock);
-	printf("Core [%u] Cost Time = %"PRIu64" us\n",
-			lcore, time_count[lcore]);
-	memset(time_count, 0, sizeof(time_count));
-
-	printf("\nTest with lock on single core...\n");
-	lock = 1;
-	rte_atomic32_set(&synchro, 1);
-	load_loop_fn(&lock);
-	printf("Core [%u] Cost Time = %"PRIu64" us\n",
-			lcore, time_count[lcore]);
-	memset(time_count, 0, sizeof(time_count));
-
-	printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
-
-	rte_atomic32_set(&synchro, 0);
-	rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER);
-
-	/* start synchro and launch test on master */
-	rte_atomic32_set(&synchro, 1);
-	load_loop_fn(&lock);
-
-	rte_eal_mp_wait_lcore();
-
-	RTE_LCORE_FOREACH(i) {
-		printf("Core [%u] Cost Time = %"PRIu64" us\n",
-				i, time_count[i]);
-		total += time_count[i];
-	}
-
-	printf("Total Cost Time = %"PRIu64" us\n", total);
-
-	return 0;
-}
-
 /*
  * Use rte_mcslock_trylock() to trylock a mcs lock object,
  * If it could not lock the object successfully, it would
@@ -240,10 +156,6 @@ test_mcslock(void)
 		ret = -1;
 	rte_mcslock_unlock(&p_ml, &ml_me);
 
-	/* mcs lock perf test */
-	if (test_mcslock_perf() < 0)
-		return -1;
-
 	return ret;
 }
 
diff --git a/app/test/test_mcslock_perf.c b/app/test/test_mcslock_perf.c
new file mode 100644
index 0000000..6948344
--- /dev/null
+++ b/app/test/test_mcslock_perf.c
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_cycles.h>
+#include <rte_mcslock.h>
+#include <rte_atomic.h>
+
+#include "test.h"
+
+/*
+ * RTE MCS lock perf test
+ * ======================
+ *
+ * These tests are derived from spin lock perf test cases.
+ *
+ * - A load test is carried out, with all cores attempting to lock a single
+ *   lock multiple times.
+ */
+
+RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me);
+rte_mcslock_t *p_ml_perf;
+
+static rte_atomic32_t synchro;
+static uint64_t time_count[RTE_MAX_LCORE] = {0};
+
+#define MAX_LOOP 1000000
+
+static int
+load_loop_fn(void *func_param)
+{
+	uint64_t time_diff = 0, begin;
+	uint64_t hz = rte_get_timer_hz();
+	volatile uint64_t lcount = 0;
+	const int use_lock = *(int *)func_param;
+	const unsigned int lcore = rte_lcore_id();
+
+	/**< Per core me node. */
+	rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me);
+
+	/* wait synchro */
+	while (rte_atomic32_read(&synchro) == 0)
+		;
+
+	begin = rte_get_timer_cycles();
+	while (lcount < MAX_LOOP) {
+		if (use_lock)
+			rte_mcslock_lock(&p_ml_perf, &ml_perf_me);
+
+		lcount++;
+		if (use_lock)
+			rte_mcslock_unlock(&p_ml_perf, &ml_perf_me);
+	}
+	time_diff = rte_get_timer_cycles() - begin;
+	time_count[lcore] = time_diff * 1000000 / hz;
+	return 0;
+}
+
+/*
+ * Test rte_eal_get_lcore_state() in addition to mcs locks
+ * as we have "waiting" then "running" lcores.
+ */
+static int
+test_mcslock_perf(void)
+{
+	unsigned int i;
+	uint64_t total = 0;
+	int lock = 0;
+	const unsigned int lcore = rte_lcore_id();
+
+	printf("\nTest with no lock on single core...\n");
+	rte_atomic32_set(&synchro, 1);
+	load_loop_fn(&lock);
+	printf("Core [%u] Cost Time = %"PRIu64" us\n",
+			lcore, time_count[lcore]);
+	memset(time_count, 0, sizeof(time_count));
+
+	printf("\nTest with lock on single core...\n");
+	lock = 1;
+	rte_atomic32_set(&synchro, 1);
+	load_loop_fn(&lock);
+	printf("Core [%u] Cost Time = %"PRIu64" us\n",
+			lcore, time_count[lcore]);
+	memset(time_count, 0, sizeof(time_count));
+
+	printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
+
+	rte_atomic32_set(&synchro, 0);
+	rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER);
+
+	/* start synchro and launch test on master */
+	rte_atomic32_set(&synchro, 1);
+	load_loop_fn(&lock);
+
+	rte_eal_mp_wait_lcore();
+
+	RTE_LCORE_FOREACH(i) {
+		printf("Core [%u] Cost Time = %"PRIu64" us\n",
+				i, time_count[i]);
+		total += time_count[i];
+	}
+
+	printf("Total Cost Time = %"PRIu64" us\n", total);
+
+	return 0;
+}
+
+REGISTER_TEST_COMMAND(mcslock_perf_autotest, test_mcslock_perf);
-- 
2.7.4


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH v2] test/mcslock: move performance test to perf tests
  2020-07-28  3:24 ` [dpdk-dev] [PATCH v2] " Phil Yang
@ 2020-09-17  9:16   ` Juraj Linkeš
  0 siblings, 0 replies; 10+ messages in thread
From: Juraj Linkeš @ 2020-09-17  9:16 UTC (permalink / raw)
  To: Phil Yang, david.marchand, dev
  Cc: aconole, Honnappa.Nagarahalli, Ruifeng.Wang, nd, Thomas Monjalon



> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Phil Yang
> Sent: Tuesday, July 28, 2020 5:24 AM
> To: david.marchand@redhat.com; dev@dpdk.org
> Cc: aconole@redhat.com; Honnappa.Nagarahalli@arm.com;
> Ruifeng.Wang@arm.com; nd@arm.com; Thomas Monjalon
> <thomas@monjalon.net>
> Subject: [dpdk-dev] [PATCH v2] test/mcslock: move performance test to perf
> tests
> 
> Move the MCS lock performance test into perf tests.
> 
> Signed-off-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> ---
> v2:
> 1. Rebase code.
> 2. Split from the original patchset.
> 
>  MAINTAINERS                  |   1 +
>  app/test/Makefile            |   1 +
>  app/test/autotest_data.py    |   6 +++
>  app/test/meson.build         |   4 +-
>  app/test/test_mcslock.c      |  88 -------------------------------
>  app/test/test_mcslock_perf.c | 121
> +++++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 132 insertions(+), 89 deletions(-)  create mode 100644
> app/test/test_mcslock_perf.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 171c4c7..eae6034 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -265,6 +265,7 @@ MCSlock - EXPERIMENTAL
>  M: Phil Yang <phil.yang@arm.com>
>  F: lib/librte_eal/include/generic/rte_mcslock.h
>  F: app/test/test_mcslock.c
> +F: app/test/test_mcslock_perf.c
> 
>  Ticketlock
>  M: Joyce Kong <joyce.kong@arm.com>
> diff --git a/app/test/Makefile b/app/test/Makefile index f406527..e4b3327
> 100644
> --- a/app/test/Makefile
> +++ b/app/test/Makefile
> @@ -65,6 +65,7 @@ SRCS-y += test_barrier.c  SRCS-y += test_malloc.c  SRCS-y
> += test_cycles.c  SRCS-y += test_mcslock.c
> +SRCS-y += test_mcslock_perf.c
>  SRCS-y += test_spinlock.c
>  SRCS-y += test_ticketlock.c
>  SRCS-y += test_memory.c
> diff --git a/app/test/autotest_data.py b/app/test/autotest_data.py index
> 4b7da45..c813b93 100644
> --- a/app/test/autotest_data.py
> +++ b/app/test/autotest_data.py
> @@ -796,6 +796,12 @@
>          "Func":    default_autotest,
>          "Report":  None,
>      },
> +    {
> +        "Name":    "MCS Lock performance autotest",
> +        "Command": "mcslock_perf_autotest",
> +        "Func":    default_autotest,
> +        "Report":  None,
> +    },
>      #
>      # Please always make sure that ring_perf is the last test!
>      #
> diff --git a/app/test/meson.build b/app/test/meson.build index
> 786a213..59df2fd 100644
> --- a/app/test/meson.build
> +++ b/app/test/meson.build
> @@ -86,6 +86,7 @@ test_sources = files('commands.c',
>  	'test_meter.c',
>  	'test_metrics.c',
>  	'test_mcslock.c',
> +	'test_mcslock_perf.c',
>  	'test_mp_secondary.c',
>  	'test_per_lcore.c',
>  	'test_pmd_perf.c',
> @@ -297,7 +298,8 @@ perf_test_names = [
>          'hash_readwrite_perf_autotest',
>          'hash_readwrite_lf_perf_autotest',
>          'trace_perf_autotest',
> -	'ipsec_perf_autotest',
> +        'ipsec_perf_autotest',
> +        'mcslock_perf_autotest',
>  ]
> 
>  driver_test_names = [
> diff --git a/app/test/test_mcslock.c b/app/test/test_mcslock.c index
> ddccaaf..a1dc610 100644
> --- a/app/test/test_mcslock.c
> +++ b/app/test/test_mcslock.c
> @@ -32,23 +32,16 @@
>   *
>   *   - The function takes the global lock, display something, then releases
>   *     the global lock on each core.
> - *
> - * - A load test is carried out, with all cores attempting to lock a single
> - *   lock multiple times.
>   */
> 
>  RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_me);
> RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_try_me); -
> RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me);
> 
>  rte_mcslock_t *p_ml;
>  rte_mcslock_t *p_ml_try;
> -rte_mcslock_t *p_ml_perf;
> 
>  static unsigned int count;
> 
> -static rte_atomic32_t synchro;
> -
>  static int
>  test_mcslock_per_core(__rte_unused void *arg)  { @@ -63,85 +56,8 @@
> test_mcslock_per_core(__rte_unused void *arg)
>  	return 0;
>  }
> 
> -static uint64_t time_count[RTE_MAX_LCORE] = {0};
> -
>  #define MAX_LOOP 1000000
> 
> -static int
> -load_loop_fn(void *func_param)
> -{
> -	uint64_t time_diff = 0, begin;
> -	uint64_t hz = rte_get_timer_hz();
> -	volatile uint64_t lcount = 0;
> -	const int use_lock = *(int *)func_param;
> -	const unsigned int lcore = rte_lcore_id();
> -
> -	/**< Per core me node. */
> -	rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me);
> -
> -	/* wait synchro */
> -	while (rte_atomic32_read(&synchro) == 0)
> -		;
> -
> -	begin = rte_get_timer_cycles();
> -	while (lcount < MAX_LOOP) {
> -		if (use_lock)
> -			rte_mcslock_lock(&p_ml_perf, &ml_perf_me);
> -
> -		lcount++;
> -		if (use_lock)
> -			rte_mcslock_unlock(&p_ml_perf, &ml_perf_me);
> -	}
> -	time_diff = rte_get_timer_cycles() - begin;
> -	time_count[lcore] = time_diff * 1000000 / hz;
> -	return 0;
> -}
> -
> -static int
> -test_mcslock_perf(void)
> -{
> -	unsigned int i;
> -	uint64_t total = 0;
> -	int lock = 0;
> -	const unsigned int lcore = rte_lcore_id();
> -
> -	printf("\nTest with no lock on single core...\n");
> -	rte_atomic32_set(&synchro, 1);
> -	load_loop_fn(&lock);
> -	printf("Core [%u] Cost Time = %"PRIu64" us\n",
> -			lcore, time_count[lcore]);
> -	memset(time_count, 0, sizeof(time_count));
> -
> -	printf("\nTest with lock on single core...\n");
> -	lock = 1;
> -	rte_atomic32_set(&synchro, 1);
> -	load_loop_fn(&lock);
> -	printf("Core [%u] Cost Time = %"PRIu64" us\n",
> -			lcore, time_count[lcore]);
> -	memset(time_count, 0, sizeof(time_count));
> -
> -	printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
> -
> -	rte_atomic32_set(&synchro, 0);
> -	rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER);
> -
> -	/* start synchro and launch test on master */
> -	rte_atomic32_set(&synchro, 1);
> -	load_loop_fn(&lock);
> -
> -	rte_eal_mp_wait_lcore();
> -
> -	RTE_LCORE_FOREACH(i) {
> -		printf("Core [%u] Cost Time = %"PRIu64" us\n",
> -				i, time_count[i]);
> -		total += time_count[i];
> -	}
> -
> -	printf("Total Cost Time = %"PRIu64" us\n", total);
> -
> -	return 0;
> -}
> -
>  /*
>   * Use rte_mcslock_trylock() to trylock a mcs lock object,
>   * If it could not lock the object successfully, it would @@ -240,10 +156,6 @@
> test_mcslock(void)
>  		ret = -1;
>  	rte_mcslock_unlock(&p_ml, &ml_me);
> 
> -	/* mcs lock perf test */
> -	if (test_mcslock_perf() < 0)
> -		return -1;
> -
>  	return ret;
>  }
> 
> diff --git a/app/test/test_mcslock_perf.c b/app/test/test_mcslock_perf.c new
> file mode 100644 index 0000000..6948344
> --- /dev/null
> +++ b/app/test/test_mcslock_perf.c
> @@ -0,0 +1,121 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2019 Arm Limited
> + */
> +
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <inttypes.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <sys/queue.h>
> +
> +#include <rte_common.h>
> +#include <rte_memory.h>
> +#include <rte_per_lcore.h>
> +#include <rte_launch.h>
> +#include <rte_eal.h>
> +#include <rte_lcore.h>
> +#include <rte_cycles.h>
> +#include <rte_mcslock.h>
> +#include <rte_atomic.h>
> +
> +#include "test.h"
> +
> +/*
> + * RTE MCS lock perf test
> + * ======================
> + *
> + * These tests are derived from spin lock perf test cases.
> + *
> + * - A load test is carried out, with all cores attempting to lock a single
> + *   lock multiple times.
> + */
> +
> +RTE_DEFINE_PER_LCORE(rte_mcslock_t, _ml_perf_me); rte_mcslock_t
> +*p_ml_perf;
> +
> +static rte_atomic32_t synchro;
> +static uint64_t time_count[RTE_MAX_LCORE] = {0};
> +
> +#define MAX_LOOP 1000000
> +
> +static int
> +load_loop_fn(void *func_param)
> +{
> +	uint64_t time_diff = 0, begin;
> +	uint64_t hz = rte_get_timer_hz();
> +	volatile uint64_t lcount = 0;
> +	const int use_lock = *(int *)func_param;
> +	const unsigned int lcore = rte_lcore_id();
> +
> +	/**< Per core me node. */
> +	rte_mcslock_t ml_perf_me = RTE_PER_LCORE(_ml_perf_me);
> +
> +	/* wait synchro */
> +	while (rte_atomic32_read(&synchro) == 0)
> +		;
> +
> +	begin = rte_get_timer_cycles();
> +	while (lcount < MAX_LOOP) {
> +		if (use_lock)
> +			rte_mcslock_lock(&p_ml_perf, &ml_perf_me);
> +
> +		lcount++;
> +		if (use_lock)
> +			rte_mcslock_unlock(&p_ml_perf, &ml_perf_me);
> +	}
> +	time_diff = rte_get_timer_cycles() - begin;
> +	time_count[lcore] = time_diff * 1000000 / hz;
> +	return 0;
> +}
> +
> +/*
> + * Test rte_eal_get_lcore_state() in addition to mcs locks
> + * as we have "waiting" then "running" lcores.
> + */
> +static int
> +test_mcslock_perf(void)
> +{
> +	unsigned int i;
> +	uint64_t total = 0;
> +	int lock = 0;
> +	const unsigned int lcore = rte_lcore_id();
> +
> +	printf("\nTest with no lock on single core...\n");
> +	rte_atomic32_set(&synchro, 1);
> +	load_loop_fn(&lock);
> +	printf("Core [%u] Cost Time = %"PRIu64" us\n",
> +			lcore, time_count[lcore]);
> +	memset(time_count, 0, sizeof(time_count));
> +
> +	printf("\nTest with lock on single core...\n");
> +	lock = 1;
> +	rte_atomic32_set(&synchro, 1);
> +	load_loop_fn(&lock);
> +	printf("Core [%u] Cost Time = %"PRIu64" us\n",
> +			lcore, time_count[lcore]);
> +	memset(time_count, 0, sizeof(time_count));
> +
> +	printf("\nTest with lock on %u cores...\n", (rte_lcore_count()));
> +
> +	rte_atomic32_set(&synchro, 0);
> +	rte_eal_mp_remote_launch(load_loop_fn, &lock, SKIP_MASTER);
> +
> +	/* start synchro and launch test on master */
> +	rte_atomic32_set(&synchro, 1);
> +	load_loop_fn(&lock);
> +
> +	rte_eal_mp_wait_lcore();
> +
> +	RTE_LCORE_FOREACH(i) {
> +		printf("Core [%u] Cost Time = %"PRIu64" us\n",
> +				i, time_count[i]);
> +		total += time_count[i];
> +	}
> +
> +	printf("Total Cost Time = %"PRIu64" us\n", total);
> +
> +	return 0;
> +}
> +
> +REGISTER_TEST_COMMAND(mcslock_perf_autotest, test_mcslock_perf);
> --
> 2.7.4
> 

Reviewed-by: Juraj Linkeš <juraj.linkes@pantheon.tech>


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [dpdk-dev] [PATCH 1/2] test/mcslock: move performance test to perf tests
  2020-03-23  4:16 [dpdk-dev] [PATCH 1/2] test/mcslock: move performance test to perf tests Phil Yang
                   ` (2 preceding siblings ...)
  2020-07-28  3:24 ` [dpdk-dev] [PATCH v2] " Phil Yang
@ 2023-06-12 21:23 ` Stephen Hemminger
  3 siblings, 0 replies; 10+ messages in thread
From: Stephen Hemminger @ 2023-06-12 21:23 UTC (permalink / raw)
  To: Phil Yang
  Cc: aconole, maicolgabriel, dev, david.marchand, drc, gavin.hu,
	Honnappa.Nagarahalli, ruifeng.wang, nd

On Mon, 23 Mar 2020 12:16:17 +0800
Phil Yang <phil.yang@arm.com> wrote:

> The MCS lock performance test takes more than 10 seconds and leads
> to meson test timeout on some platforms. Move the performance test
> into perf tests.
> 
> Signed-off-by: Phil Yang <phil.yang@arm.com>
> Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> ---

No, fix the test.
There is no reason that mcslock should be in perf section
but not all the other lock types.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2023-06-13  3:38 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-23  4:16 [dpdk-dev] [PATCH 1/2] test/mcslock: move performance test to perf tests Phil Yang
2020-03-23  4:16 ` [dpdk-dev] [PATCH 2/2] test/atomic: reduce the number of loops to avoid timeouts Phil Yang
2020-07-28  3:41   ` [dpdk-dev] [PATCH v2] " Phil Yang
2020-09-17  9:17     ` Juraj Linkeš
2023-06-13  3:37     ` Stephen Hemminger
2020-07-23  2:36 ` [dpdk-dev] [PATCH 1/2] test/mcslock: move performance test to perf tests Phil Yang
2020-07-23 15:14   ` Aaron Conole
2020-07-28  3:24 ` [dpdk-dev] [PATCH v2] " Phil Yang
2020-09-17  9:16   ` Juraj Linkeš
2023-06-12 21:23 ` [dpdk-dev] [PATCH 1/2] " Stephen Hemminger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).