* [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
@ 2020-10-29 15:36 Dharmik Thakkar
  2020-10-29 15:36 ` [dpdk-dev] [PATCH 2/4] test/lpm: return error on failure " Dharmik Thakkar
                   ` (5 more replies)
  0 siblings, 6 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-10-29 15:36 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Honnappa Nagarahalli,
	Ruifeng Wang, Gavin Hu
  Cc: dev, nd, Dharmik Thakkar, stable
Fix incorrect calculations for LPM adds, LPM deletes,
and average cycles in RCU QSBR perf tests
Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org
Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 43 ++++++++++++++--------------------------
 1 file changed, 15 insertions(+), 28 deletions(-)
diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index c5a238b9d1e8..0a2d76a983c3 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -23,7 +23,6 @@ static struct rte_rcu_qsbr *rv;
 static volatile uint8_t writer_done;
 static volatile uint32_t thr_id;
 static uint64_t gwrite_cycles;
-static uint64_t gwrites;
 /* LPM APIs are not thread safe, use mutex to provide thread safety */
 static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
 
@@ -60,6 +59,8 @@ static uint32_t num_ldepth_route_entries;
 #define NUM_ROUTE_ENTRIES num_route_entries
 #define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries
 
+#define TOTAL_WRITES (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES)
+
 enum {
 	IP_CLASS_A,
 	IP_CLASS_B,
@@ -432,7 +433,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	uint8_t core_id = (uint8_t)((uintptr_t)arg);
 	uint32_t next_hop_add = 0xAA;
 
-	RTE_SET_USED(arg);
 	/* 2 writer threads are used */
 	if (core_id % 2 == 0) {
 		si = 0;
@@ -472,9 +472,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	total_cycles = rte_rdtsc_precise() - begin;
 
 	__atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED);
-	__atomic_fetch_add(&gwrites,
-			2 * NUM_LDEPTH_ROUTE_ENTRIES * RCU_ITERATIONS,
-			__ATOMIC_RELAXED);
 
 	return 0;
 }
@@ -528,7 +525,6 @@ test_lpm_rcu_perf_multi_writer(void)
 
 	writer_done = 0;
 	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
 
 	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
@@ -548,13 +544,10 @@ test_lpm_rcu_perf_multi_writer(void)
 		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 			goto error;
 
-	printf("Total LPM Adds: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
-			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
+		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / TOTAL_WRITES
 		);
 
 	/* Wait and check return value from reader threads */
@@ -581,7 +574,6 @@ test_lpm_rcu_perf_multi_writer(void)
 
 	writer_done = 0;
 	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
 	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
 	/* Launch reader threads */
@@ -600,14 +592,11 @@ test_lpm_rcu_perf_multi_writer(void)
 		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 			goto error;
 
-	printf("Total LPM Adds: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
-			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
-		);
+		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+		/ TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
@@ -711,11 +700,10 @@ test_lpm_rcu_perf(void)
 	}
 	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
+		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
@@ -771,11 +759,10 @@ test_lpm_rcu_perf(void)
 	}
 	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
+		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH 2/4] test/lpm: return error on failure in rcu qsbr perf
  2020-10-29 15:36 [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
@ 2020-10-29 15:36 ` Dharmik Thakkar
  2020-10-29 15:36 ` [dpdk-dev] [PATCH 3/4] test/lpm: remove error checking " Dharmik Thakkar
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-10-29 15:36 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Gavin Hu, Ruifeng Wang,
	Honnappa Nagarahalli
  Cc: dev, nd, Dharmik Thakkar, stable
Return error if Add/Delete fail in multiwriter perf test
Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org
Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 4 ++++
 1 file changed, 4 insertions(+)
diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 0a2d76a983c3..251ea12345ae 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -453,6 +453,8 @@ test_lpm_rcu_qsbr_writer(void *arg)
 					next_hop_add) != 0) {
 				printf("Failed to add iteration %d, route# %d\n",
 					i, j);
+				pthread_mutex_unlock(&lpm_mutex);
+				return -1;
 			}
 			pthread_mutex_unlock(&lpm_mutex);
 		}
@@ -464,6 +466,8 @@ test_lpm_rcu_qsbr_writer(void *arg)
 				large_ldepth_route_table[j].depth) != 0) {
 				printf("Failed to delete iteration %d, route# %d\n",
 					i, j);
+				pthread_mutex_unlock(&lpm_mutex);
+				return -1;
 			}
 			pthread_mutex_unlock(&lpm_mutex);
 		}
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH 3/4] test/lpm: remove error checking in rcu qsbr perf
  2020-10-29 15:36 [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
  2020-10-29 15:36 ` [dpdk-dev] [PATCH 2/4] test/lpm: return error on failure " Dharmik Thakkar
@ 2020-10-29 15:36 ` Dharmik Thakkar
  2020-10-29 15:36 ` [dpdk-dev] [PATCH 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-10-29 15:36 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Gavin Hu, Ruifeng Wang,
	Honnappa Nagarahalli
  Cc: dev, nd, Dharmik Thakkar, stable
Remove redundant error checking for reader threads
since they never return error.
Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org
Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)
diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 251ea12345ae..4f15db4f85ee 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -554,11 +554,10 @@ test_lpm_rcu_perf_multi_writer(void)
 		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / TOTAL_WRITES
 		);
 
-	/* Wait and check return value from reader threads */
 	writer_done = 1;
+	/* Wait until all readers have exited */
 	for (i = 2; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 	rte_free(rv);
@@ -603,10 +602,9 @@ test_lpm_rcu_perf_multi_writer(void)
 		/ TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 2; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 
@@ -710,10 +708,9 @@ test_lpm_rcu_perf(void)
 		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		if (rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 	rte_free(rv);
@@ -769,11 +766,9 @@ test_lpm_rcu_perf(void)
 		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			printf("Warning: lcore %u not finished.\n",
-				enabled_core_ids[i]);
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-10-29 15:36 [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
  2020-10-29 15:36 ` [dpdk-dev] [PATCH 2/4] test/lpm: return error on failure " Dharmik Thakkar
  2020-10-29 15:36 ` [dpdk-dev] [PATCH 3/4] test/lpm: remove error checking " Dharmik Thakkar
@ 2020-10-29 15:36 ` Dharmik Thakkar
  2020-11-02 17:17   ` Medvedkin, Vladimir
  2020-11-02 10:08 ` [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation " David Marchand
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-10-29 15:36 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin; +Cc: dev, nd, Dharmik Thakkar
Avoid code duplication by combining single and multi threaded tests
Signed-off-by: Dharmik Thakkar<dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 338 +++++++++------------------------------
 1 file changed, 73 insertions(+), 265 deletions(-)
diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 4f15db4f85ee..08312023b661 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -430,11 +430,16 @@ test_lpm_rcu_qsbr_writer(void *arg)
 {
 	unsigned int i, j, si, ei;
 	uint64_t begin, total_cycles;
-	uint8_t core_id = (uint8_t)((uintptr_t)arg);
+	uint8_t writer_id = (uint8_t)((uintptr_t)arg);
 	uint32_t next_hop_add = 0xAA;
 
-	/* 2 writer threads are used */
-	if (core_id % 2 == 0) {
+	/* Single writer (writer_id = 1) */
+	if (writer_id == 1) {
+		si = 0;
+		ei = NUM_LDEPTH_ROUTE_ENTRIES;
+	}
+	/* 2 Writers (writer_id = 2/3)*/
+	else if (writer_id == 2) {
 		si = 0;
 		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
 	} else {
@@ -482,16 +487,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
 
 /*
  * Functional test:
- * 2 writers, rest are readers
+ * 1/2 writers, rest are readers
  */
 static int
-test_lpm_rcu_perf_multi_writer(void)
+test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
 {
 	struct rte_lpm_config config;
 	size_t sz;
-	unsigned int i;
+	unsigned int i, j;
 	uint16_t core_id;
 	struct rte_lpm_rcu_config rcu_cfg = {0};
+	int (*reader_f)(void *arg) = NULL;
 
 	if (rte_lcore_count() < 3) {
 		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
@@ -504,273 +510,76 @@ test_lpm_rcu_perf_multi_writer(void)
 		num_cores++;
 	}
 
-	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
-		num_cores - 2);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	/* Init RCU variable */
-	sz = rte_rcu_qsbr_get_memsize(num_cores);
-	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-						RTE_CACHE_LINE_SIZE);
-	rte_rcu_qsbr_init(rv, num_cores);
-
-	rcu_cfg.v = rv;
-	/* Assign the RCU variable to LPM */
-	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
-		printf("RCU variable assignment failed\n");
-		goto error;
-	}
-
-	writer_done = 0;
-	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Launch writer threads */
-	for (i = 0; i < 2; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-					(void *)(uintptr_t)i,
-					enabled_core_ids[i]);
-
-	/* Wait for writer threads */
-	for (i = 0; i < 2; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / TOTAL_WRITES
-		);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-	lpm = NULL;
-	rv = NULL;
-
-	/* Test without RCU integration */
-	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
-		num_cores - 2);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	writer_done = 0;
-	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Launch writer threads */
-	for (i = 0; i < 2; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-					(void *)(uintptr_t)i,
-					enabled_core_ids[i]);
-
-	/* Wait for writer threads */
-	for (i = 0; i < 2; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
-		/ TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-
-	return 0;
-
-error:
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	rte_eal_mp_wait_lcore();
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-
-	return -1;
-}
-
-/*
- * Functional test:
- * Single writer, rest are readers
- */
-static int
-test_lpm_rcu_perf(void)
-{
-	struct rte_lpm_config config;
-	uint64_t begin, total_cycles;
-	size_t sz;
-	unsigned int i, j;
-	uint16_t core_id;
-	uint32_t next_hop_add = 0xAA;
-	struct rte_lpm_rcu_config rcu_cfg = {0};
-
-	if (rte_lcore_count() < 2) {
-		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
-		return TEST_SKIPPED;
-	}
-
-	num_cores = 0;
-	RTE_LCORE_FOREACH_WORKER(core_id) {
-		enabled_core_ids[num_cores] = core_id;
-		num_cores++;
-	}
-
-	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
-		num_cores);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	/* Init RCU variable */
-	sz = rte_rcu_qsbr_get_memsize(num_cores);
-	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-						RTE_CACHE_LINE_SIZE);
-	rte_rcu_qsbr_init(rv, num_cores);
-
-	rcu_cfg.v = rv;
-	/* Assign the RCU variable to LPM */
-	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
-		printf("RCU variable assignment failed\n");
-		goto error;
-	}
-
-	writer_done = 0;
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Measure add/delete. */
-	begin = rte_rdtsc_precise();
-	for (i = 0; i < RCU_ITERATIONS; i++) {
-		/* Add all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-					large_ldepth_route_table[j].depth,
-					next_hop_add) != 0) {
-				printf("Failed to add iteration %d, route# %d\n",
-					i, j);
+	for (j = 1; j < 3; j++) {
+		if (use_rcu)
+			printf("\nPerf test: %d writer(s), %d reader(s),"
+			       " RCU integration enabled\n", j, num_cores - j);
+		else
+			printf("\nPerf test: %d writer(s), %d reader(s),"
+			       " RCU integration disabled\n", j, num_cores - j);
+
+		/* Create LPM table */
+		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
+		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
+		config.flags = 0;
+		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
+		TEST_LPM_ASSERT(lpm != NULL);
+
+		/* Init RCU variable */
+		if (use_rcu) {
+			sz = rte_rcu_qsbr_get_memsize(num_cores);
+			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
+							RTE_CACHE_LINE_SIZE);
+			rte_rcu_qsbr_init(rv, num_cores);
+
+			rcu_cfg.v = rv;
+			/* Assign the RCU variable to LPM */
+			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
+				printf("RCU variable assignment failed\n");
 				goto error;
 			}
 
-		/* Delete all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-				large_ldepth_route_table[j].depth) != 0) {
-				printf("Failed to delete iteration %d, route# %d\n",
-					i, j);
-				goto error;
-			}
-	}
-	total_cycles = rte_rdtsc_precise() - begin;
+			reader_f = test_lpm_rcu_qsbr_reader;
+		} else
+			reader_f = test_lpm_reader;
 
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / TOTAL_WRITES);
+		writer_done = 0;
+		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
 
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]);
+		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
-	rte_lpm_free(lpm);
-	rte_free(rv);
-	lpm = NULL;
-	rv = NULL;
+		/* Launch reader threads */
+		for (i = j; i < num_cores; i++)
+			rte_eal_remote_launch(reader_f, NULL,
+						enabled_core_ids[i]);
 
-	/* Test without RCU integration */
-	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
-		num_cores);
+		/* Launch writer threads */
+		for (i = 0; i < j; i++)
+			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
+						(void *)(uintptr_t)(i + j),
+						enabled_core_ids[i]);
 
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	writer_done = 0;
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Measure add/delete. */
-	begin = rte_rdtsc_precise();
-	for (i = 0; i < RCU_ITERATIONS; i++) {
-		/* Add all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-					large_ldepth_route_table[j].depth,
-					next_hop_add) != 0) {
-				printf("Failed to add iteration %d, route# %d\n",
-					i, j);
+		/* Wait for writer threads */
+		for (i = 0; i < j; i++)
+			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 				goto error;
-			}
 
-		/* Delete all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-				large_ldepth_route_table[j].depth) != 0) {
-				printf("Failed to delete iteration %d, route# %d\n",
-					i, j);
-				goto error;
-			}
+		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
+		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
+			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+			/ TOTAL_WRITES);
+
+		writer_done = 1;
+		/* Wait until all readers have exited */
+		for (i = j; i < num_cores; i++)
+			rte_eal_wait_lcore(enabled_core_ids[i]);
+
+		rte_lpm_free(lpm);
+		rte_free(rv);
+		lpm = NULL;
+		rv = NULL;
 	}
-	total_cycles = rte_rdtsc_precise() - begin;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
 
 	return 0;
 
@@ -946,9 +755,8 @@ test_lpm_perf(void)
 	rte_lpm_delete_all(lpm);
 	rte_lpm_free(lpm);
 
-	test_lpm_rcu_perf();
-
-	test_lpm_rcu_perf_multi_writer();
+	test_lpm_rcu_perf_multi_writer(0);
+	test_lpm_rcu_perf_multi_writer(1);
 
 	return 0;
 }
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
  2020-10-29 15:36 [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
                   ` (2 preceding siblings ...)
  2020-10-29 15:36 ` [dpdk-dev] [PATCH 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
@ 2020-11-02 10:08 ` David Marchand
  2020-11-02 15:11 ` Bruce Richardson
  2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  5 siblings, 0 replies; 52+ messages in thread
From: David Marchand @ 2020-11-02 10:08 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin
  Cc: Honnappa Nagarahalli, Ruifeng Wang, Gavin Hu, dev, nd,
	Dharmik Thakkar, dpdk stable
On Thu, Oct 29, 2020 at 4:37 PM Dharmik Thakkar <dharmik.thakkar@arm.com> wrote:
>
> Fix incorrect calculations for LPM adds, LPM deletes,
> and average cycles in RCU QSBR perf tests
>
> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
> Cc: honnappa.nagarahalli@arm.com
> Cc: stable@dpdk.org
>
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Bruce, Vladimir, reviews for this series please?
Thanks.
-- 
David Marchand
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
  2020-10-29 15:36 [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
                   ` (3 preceding siblings ...)
  2020-11-02 10:08 ` [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation " David Marchand
@ 2020-11-02 15:11 ` Bruce Richardson
  2020-11-02 16:58   ` Dharmik Thakkar
  2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  5 siblings, 1 reply; 52+ messages in thread
From: Bruce Richardson @ 2020-11-02 15:11 UTC (permalink / raw)
  To: Dharmik Thakkar
  Cc: Vladimir Medvedkin, Honnappa Nagarahalli, Ruifeng Wang, Gavin Hu,
	dev, nd, stable
On Thu, Oct 29, 2020 at 10:36:31AM -0500, Dharmik Thakkar wrote:
> Fix incorrect calculations for LPM adds, LPM deletes,
> and average cycles in RCU QSBR perf tests
> 
To help review this patch, could you provide some more details in the
commit log as to what exactly was wrong with the calculation and how this
patch fixes things?
> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
> Cc: honnappa.nagarahalli@arm.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
  2020-11-02 15:11 ` Bruce Richardson
@ 2020-11-02 16:58   ` Dharmik Thakkar
  2020-11-02 17:21     ` Medvedkin, Vladimir
  2020-11-02 17:33     ` Bruce Richardson
  0 siblings, 2 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-02 16:58 UTC (permalink / raw)
  To: Bruce Richardson
  Cc: Vladimir Medvedkin, Honnappa Nagarahalli, Ruifeng Wang, Gavin Hu,
	dev, nd, dpdk stable
> On Nov 2, 2020, at 9:11 AM, Bruce Richardson <bruce.richardson@intel.com> wrote:
> 
> On Thu, Oct 29, 2020 at 10:36:31AM -0500, Dharmik Thakkar wrote:
>> Fix incorrect calculations for LPM adds, LPM deletes,
>> and average cycles in RCU QSBR perf tests
>> 
> 
> To help review this patch, could you provide some more details in the
> commit log as to what exactly was wrong with the calculation and how this
> patch fixes things?
> 
I will update the commit message in the next version. Adding it here as well:
Since, rcu qsbr tests run for ‘RCU_ITERATIONS’ and not ‘ITERATIONS’,
replace ‘ITERATIONS’ with ‘RCU_ITERATIONS’ for calculating adds, deletes, and cycles.
Also, for multi-writer perf test, each writer only writes half of NUM_LDEPTH_ROUTE_ENTRIES.
For 2 writers, total adds (or deletes) should be (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of
(2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES).
Since, for both the single and multi writer tests, total adds/deletes is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES),
this has been replaced with a macro ’TOTAL_WRITES’ and furthermore, ‘g_writes’ has been removed since it is always a fixed value 
equal to TOTAL_WRITES.
>> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
>> Cc: honnappa.nagarahalli@arm.com
>> Cc: stable@dpdk.org
>> 
>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-10-29 15:36 ` [dpdk-dev] [PATCH 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
@ 2020-11-02 17:17   ` Medvedkin, Vladimir
  2020-11-02 22:11     ` Dharmik Thakkar
  0 siblings, 1 reply; 52+ messages in thread
From: Medvedkin, Vladimir @ 2020-11-02 17:17 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson; +Cc: dev, nd
Hi Dharmik,
Thanks for the patches, see comments inlined
On 29/10/2020 15:36, Dharmik Thakkar wrote:
> Avoid code duplication by combining single and multi threaded tests
> 
> Signed-off-by: Dharmik Thakkar<dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>   app/test/test_lpm_perf.c | 338 +++++++++------------------------------
>   1 file changed, 73 insertions(+), 265 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
> index 4f15db4f85ee..08312023b661 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -430,11 +430,16 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   {
>   	unsigned int i, j, si, ei;
>   	uint64_t begin, total_cycles;
> -	uint8_t core_id = (uint8_t)((uintptr_t)arg);
> +	uint8_t writer_id = (uint8_t)((uintptr_t)arg);
>   	uint32_t next_hop_add = 0xAA;
>   
> -	/* 2 writer threads are used */
> -	if (core_id % 2 == 0) {
> +	/* Single writer (writer_id = 1) */
> +	if (writer_id == 1) {
Probably it would be better to use enum here instead of 1/2/3?
> +		si = 0;
> +		ei = NUM_LDEPTH_ROUTE_ENTRIES;
> +	}
> +	/* 2 Writers (writer_id = 2/3)*/
> +	else if (writer_id == 2) {
>   		si = 0;
>   		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
>   	} else {
> @@ -482,16 +487,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   
>   /*
>    * Functional test:
> - * 2 writers, rest are readers
> + * 1/2 writers, rest are readers
>    */
>   static int
> -test_lpm_rcu_perf_multi_writer(void)
> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>   {
>   	struct rte_lpm_config config;
>   	size_t sz;
> -	unsigned int i;
> +	unsigned int i, j;
>   	uint16_t core_id;
>   	struct rte_lpm_rcu_config rcu_cfg = {0};
> +	int (*reader_f)(void *arg) = NULL;
>   
>   	if (rte_lcore_count() < 3) {
>   		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
> @@ -504,273 +510,76 @@ test_lpm_rcu_perf_multi_writer(void)
>   		num_cores++;
>   	}
>   
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / TOTAL_WRITES
> -		);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> -
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> -		/ TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -
> -	return 0;
> -
> -error:
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	rte_eal_mp_wait_lcore();
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -
> -	return -1;
> -}
> -
> -/*
> - * Functional test:
> - * Single writer, rest are readers
> - */
> -static int
> -test_lpm_rcu_perf(void)
> -{
> -	struct rte_lpm_config config;
> -	uint64_t begin, total_cycles;
> -	size_t sz;
> -	unsigned int i, j;
> -	uint16_t core_id;
> -	uint32_t next_hop_add = 0xAA;
> -	struct rte_lpm_rcu_config rcu_cfg = {0};
> -
> -	if (rte_lcore_count() < 2) {
> -		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
> -		return TEST_SKIPPED;
> -	}
> -
> -	num_cores = 0;
> -	RTE_LCORE_FOREACH_WORKER(core_id) {
> -		enabled_core_ids[num_cores] = core_id;
> -		num_cores++;
> -	}
> -
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
> -		num_cores);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route# %d\n",
> -					i, j);
> +	for (j = 1; j < 3; j++) {
> +		if (use_rcu)
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration enabled\n", j, num_cores - j);
> +		else
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration disabled\n", j, num_cores - j);
> +
> +		/* Create LPM table */
> +		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.flags = 0;
> +		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> +		TEST_LPM_ASSERT(lpm != NULL);
> +
> +		/* Init RCU variable */
> +		if (use_rcu) {
> +			sz = rte_rcu_qsbr_get_memsize(num_cores);
> +			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> +							RTE_CACHE_LINE_SIZE);
> +			rte_rcu_qsbr_init(rv, num_cores);
> +
> +			rcu_cfg.v = rv;
> +			/* Assign the RCU variable to LPM */
> +			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> +				printf("RCU variable assignment failed\n");
>   				goto error;
>   			}
>   
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route# %d\n",
> -					i, j);
> -				goto error;
> -			}
> -	}
> -	total_cycles = rte_rdtsc_precise() - begin;
> +			reader_f = test_lpm_rcu_qsbr_reader;
> +		} else
> +			reader_f = test_lpm_reader;
>   
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> +		writer_done = 0;
> +		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>   
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]);
> +		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>   
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> +		/* Launch reader threads */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_remote_launch(reader_f, NULL,
> +						enabled_core_ids[i]);
>   
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
> -		num_cores);
> +		/* Launch writer threads */
> +		for (i = 0; i < j; i++)
> +			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
So now even single writer will acquire a lock for every _add/_delete 
operation. I don't think it is necessary.
> +						(void *)(uintptr_t)(i + j),
> +						enabled_core_ids[i]);
>   
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route# %d\n",
> -					i, j);
> +		/* Wait for writer threads */
> +		for (i = 0; i < j; i++)
> +			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>   				goto error;
> -			}
>   
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route# %d\n",
> -					i, j);
> -				goto error;
> -			}
> +		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> +		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> +			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> +			/ TOTAL_WRITES);
> +
> +		writer_done = 1;
> +		/* Wait until all readers have exited */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_wait_lcore(enabled_core_ids[i]);
> +
> +		rte_lpm_free(lpm);
> +		rte_free(rv);
> +		lpm = NULL;
> +		rv = NULL;
>   	}
> -	total_cycles = rte_rdtsc_precise() - begin;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
>   
>   	return 0;
>   
> @@ -946,9 +755,8 @@ test_lpm_perf(void)
>   	rte_lpm_delete_all(lpm);
>   	rte_lpm_free(lpm);
>   
> -	test_lpm_rcu_perf();
> -
> -	test_lpm_rcu_perf_multi_writer();
> +	test_lpm_rcu_perf_multi_writer(0);
> +	test_lpm_rcu_perf_multi_writer(1);
>   
>   	return 0;
>   }
> 
-- 
Regards,
Vladimir
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
  2020-11-02 16:58   ` Dharmik Thakkar
@ 2020-11-02 17:21     ` Medvedkin, Vladimir
  2020-11-02 17:33     ` Bruce Richardson
  1 sibling, 0 replies; 52+ messages in thread
From: Medvedkin, Vladimir @ 2020-11-02 17:21 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson
  Cc: Honnappa Nagarahalli, Ruifeng Wang, Gavin Hu, dev, nd, dpdk stable
Hi Dharmik,
On 02/11/2020 16:58, Dharmik Thakkar wrote:
> 
>> On Nov 2, 2020, at 9:11 AM, Bruce Richardson <bruce.richardson@intel.com> wrote:
>>
>> On Thu, Oct 29, 2020 at 10:36:31AM -0500, Dharmik Thakkar wrote:
>>> Fix incorrect calculations for LPM adds, LPM deletes,
>>> and average cycles in RCU QSBR perf tests
>>>
>>
>> To help review this patch, could you provide some more details in the
>> commit log as to what exactly was wrong with the calculation and how this
>> patch fixes things?
>>
> 
> I will update the commit message in the next version. Adding it here as well:
> 
> Since, rcu qsbr tests run for ‘RCU_ITERATIONS’ and not ‘ITERATIONS’,
> replace ‘ITERATIONS’ with ‘RCU_ITERATIONS’ for calculating adds, deletes, and cycles.
> 
> Also, for multi-writer perf test, each writer only writes half of NUM_LDEPTH_ROUTE_ENTRIES.
> For 2 writers, total adds (or deletes) should be (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of
> (2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES).
> 
> Since, for both the single and multi writer tests, total adds/deletes is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES),
> this has been replaced with a macro ’TOTAL_WRITES’ and furthermore, ‘g_writes’ has been removed since it is always a fixed value
> equal to TOTAL_WRITES.
> 
Thanks for the clarification. I left a few comments regarding 4-th 
patch. First 3 patches LGTM, just put more details in the commit message.
>>> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
>>> Cc: honnappa.nagarahalli@arm.com
>>> Cc: stable@dpdk.org
>>>
>>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> 
-- 
Regards,
Vladimir
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
  2020-11-02 16:58   ` Dharmik Thakkar
  2020-11-02 17:21     ` Medvedkin, Vladimir
@ 2020-11-02 17:33     ` Bruce Richardson
  1 sibling, 0 replies; 52+ messages in thread
From: Bruce Richardson @ 2020-11-02 17:33 UTC (permalink / raw)
  To: Dharmik Thakkar
  Cc: Vladimir Medvedkin, Honnappa Nagarahalli, Ruifeng Wang, Gavin Hu,
	dev, nd, dpdk stable
On Mon, Nov 02, 2020 at 04:58:43PM +0000, Dharmik Thakkar wrote:
> 
> > On Nov 2, 2020, at 9:11 AM, Bruce Richardson <bruce.richardson@intel.com> wrote:
> > 
> > On Thu, Oct 29, 2020 at 10:36:31AM -0500, Dharmik Thakkar wrote:
> >> Fix incorrect calculations for LPM adds, LPM deletes,
> >> and average cycles in RCU QSBR perf tests
> >> 
> > 
> > To help review this patch, could you provide some more details in the
> > commit log as to what exactly was wrong with the calculation and how this
> > patch fixes things?
> > 
> 
> I will update the commit message in the next version. Adding it here as well:
> 
> Since, rcu qsbr tests run for ‘RCU_ITERATIONS’ and not ‘ITERATIONS’,
> replace ‘ITERATIONS’ with ‘RCU_ITERATIONS’ for calculating adds, deletes, and cycles.
> 
> Also, for multi-writer perf test, each writer only writes half of NUM_LDEPTH_ROUTE_ENTRIES.
> For 2 writers, total adds (or deletes) should be (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of
> (2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES).
> 
> Since, for both the single and multi writer tests, total adds/deletes is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES),
> this has been replaced with a macro ’TOTAL_WRITES’ and furthermore, ‘g_writes’ has been removed since it is always a fixed value 
> equal to TOTAL_WRITES.
> 
Thanks for the clear explanation.
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-02 17:17   ` Medvedkin, Vladimir
@ 2020-11-02 22:11     ` Dharmik Thakkar
  0 siblings, 0 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-02 22:11 UTC (permalink / raw)
  To: Medvedkin, Vladimir; +Cc: Bruce Richardson, dev, nd
> On Nov 2, 2020, at 11:17 AM, Medvedkin, Vladimir <vladimir.medvedkin@intel.com> wrote:
> 
> Hi Dharmik,
> 
> Thanks for the patches, see comments inlined
> 
> 
> On 29/10/2020 15:36, Dharmik Thakkar wrote:
>> Avoid code duplication by combining single and multi threaded tests
>> Signed-off-by: Dharmik Thakkar<dharmik.thakkar@arm.com>
>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>> ---
>>  app/test/test_lpm_perf.c | 338 +++++++++------------------------------
>>  1 file changed, 73 insertions(+), 265 deletions(-)
>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
>> index 4f15db4f85ee..08312023b661 100644
>> --- a/app/test/test_lpm_perf.c
>> +++ b/app/test/test_lpm_perf.c
>> @@ -430,11 +430,16 @@ test_lpm_rcu_qsbr_writer(void *arg)
>>  {
>>  	unsigned int i, j, si, ei;
>>  	uint64_t begin, total_cycles;
>> -	uint8_t core_id = (uint8_t)((uintptr_t)arg);
>> +	uint8_t writer_id = (uint8_t)((uintptr_t)arg);
>>  	uint32_t next_hop_add = 0xAA;
>>  -	/* 2 writer threads are used */
>> -	if (core_id % 2 == 0) {
>> +	/* Single writer (writer_id = 1) */
>> +	if (writer_id == 1) {
> 
> Probably it would be better to use enum here instead of 1/2/3?
> 
Yes, I will update the patch.
>> +		si = 0;
>> +		ei = NUM_LDEPTH_ROUTE_ENTRIES;
>> +	}
>> +	/* 2 Writers (writer_id = 2/3)*/
>> +	else if (writer_id == 2) {
>>  		si = 0;
>>  		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
>>  	} else {
>> @@ -482,16 +487,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
>>    /*
>>   * Functional test:
>> - * 2 writers, rest are readers
>> + * 1/2 writers, rest are readers
>>   */
>>  static int
>> -test_lpm_rcu_perf_multi_writer(void)
>> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>>  {
>>  	struct rte_lpm_config config;
>>  	size_t sz;
>> -	unsigned int i;
>> +	unsigned int i, j;
>>  	uint16_t core_id;
>>  	struct rte_lpm_rcu_config rcu_cfg = {0};
>> +	int (*reader_f)(void *arg) = NULL;
>>    	if (rte_lcore_count() < 3) {
>>  		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
>> @@ -504,273 +510,76 @@ test_lpm_rcu_perf_multi_writer(void)
>>  		num_cores++;
>>  	}
>>  -	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
>> -		num_cores - 2);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	/* Init RCU variable */
>> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
>> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> -						RTE_CACHE_LINE_SIZE);
>> -	rte_rcu_qsbr_init(rv, num_cores);
>> -
>> -	rcu_cfg.v = rv;
>> -	/* Assign the RCU variable to LPM */
>> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> -		printf("RCU variable assignment failed\n");
>> -		goto error;
>> -	}
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>> -
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Launch writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> -					(void *)(uintptr_t)i,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Wait for writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> -			goto error;
>> -
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) / TOTAL_WRITES
>> -		);
>> -
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>> -	rte_free(rv);
>> -	lpm = NULL;
>> -	rv = NULL;
>> -
>> -	/* Test without RCU integration */
>> -	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
>> -		num_cores - 2);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Launch writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> -					(void *)(uintptr_t)i,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Wait for writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> -			goto error;
>> -
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> -		/ TOTAL_WRITES);
>> -
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>> -
>> -	return 0;
>> -
>> -error:
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	rte_eal_mp_wait_lcore();
>> -
>> -	rte_lpm_free(lpm);
>> -	rte_free(rv);
>> -
>> -	return -1;
>> -}
>> -
>> -/*
>> - * Functional test:
>> - * Single writer, rest are readers
>> - */
>> -static int
>> -test_lpm_rcu_perf(void)
>> -{
>> -	struct rte_lpm_config config;
>> -	uint64_t begin, total_cycles;
>> -	size_t sz;
>> -	unsigned int i, j;
>> -	uint16_t core_id;
>> -	uint32_t next_hop_add = 0xAA;
>> -	struct rte_lpm_rcu_config rcu_cfg = {0};
>> -
>> -	if (rte_lcore_count() < 2) {
>> -		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
>> -		return TEST_SKIPPED;
>> -	}
>> -
>> -	num_cores = 0;
>> -	RTE_LCORE_FOREACH_WORKER(core_id) {
>> -		enabled_core_ids[num_cores] = core_id;
>> -		num_cores++;
>> -	}
>> -
>> -	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
>> -		num_cores);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	/* Init RCU variable */
>> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
>> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> -						RTE_CACHE_LINE_SIZE);
>> -	rte_rcu_qsbr_init(rv, num_cores);
>> -
>> -	rcu_cfg.v = rv;
>> -	/* Assign the RCU variable to LPM */
>> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> -		printf("RCU variable assignment failed\n");
>> -		goto error;
>> -	}
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Measure add/delete. */
>> -	begin = rte_rdtsc_precise();
>> -	for (i = 0; i < RCU_ITERATIONS; i++) {
>> -		/* Add all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>> -					large_ldepth_route_table[j].depth,
>> -					next_hop_add) != 0) {
>> -				printf("Failed to add iteration %d, route# %d\n",
>> -					i, j);
>> +	for (j = 1; j < 3; j++) {
>> +		if (use_rcu)
>> +			printf("\nPerf test: %d writer(s), %d reader(s),"
>> +			       " RCU integration enabled\n", j, num_cores - j);
>> +		else
>> +			printf("\nPerf test: %d writer(s), %d reader(s),"
>> +			       " RCU integration disabled\n", j, num_cores - j);
>> +
>> +		/* Create LPM table */
>> +		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> +		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> +		config.flags = 0;
>> +		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> +		TEST_LPM_ASSERT(lpm != NULL);
>> +
>> +		/* Init RCU variable */
>> +		if (use_rcu) {
>> +			sz = rte_rcu_qsbr_get_memsize(num_cores);
>> +			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> +							RTE_CACHE_LINE_SIZE);
>> +			rte_rcu_qsbr_init(rv, num_cores);
>> +
>> +			rcu_cfg.v = rv;
>> +			/* Assign the RCU variable to LPM */
>> +			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> +				printf("RCU variable assignment failed\n");
>>  				goto error;
>>  			}
>>  -		/* Delete all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>> -				large_ldepth_route_table[j].depth) != 0) {
>> -				printf("Failed to delete iteration %d, route# %d\n",
>> -					i, j);
>> -				goto error;
>> -			}
>> -	}
>> -	total_cycles = rte_rdtsc_precise() - begin;
>> +			reader_f = test_lpm_rcu_qsbr_reader;
>> +		} else
>> +			reader_f = test_lpm_reader;
>>  -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %g cycles\n",
>> -		(double)total_cycles / TOTAL_WRITES);
>> +		writer_done = 0;
>> +		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>  -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 0; i < num_cores; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]);
>> +		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>  -	rte_lpm_free(lpm);
>> -	rte_free(rv);
>> -	lpm = NULL;
>> -	rv = NULL;
>> +		/* Launch reader threads */
>> +		for (i = j; i < num_cores; i++)
>> +			rte_eal_remote_launch(reader_f, NULL,
>> +						enabled_core_ids[i]);
>>  -	/* Test without RCU integration */
>> -	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
>> -		num_cores);
>> +		/* Launch writer threads */
>> +		for (i = 0; i < j; i++)
>> +			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> 
> So now even single writer will acquire a lock for every _add/_delete operation. I don't think it is necessary.
Yes, agreed it is not necessary. I wanted to avoid additional if () statement, but I can add it in the new version.
> 
>> +						(void *)(uintptr_t)(i + j),
>> +						enabled_core_ids[i]);
>>  -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Measure add/delete. */
>> -	begin = rte_rdtsc_precise();
>> -	for (i = 0; i < RCU_ITERATIONS; i++) {
>> -		/* Add all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>> -					large_ldepth_route_table[j].depth,
>> -					next_hop_add) != 0) {
>> -				printf("Failed to add iteration %d, route# %d\n",
>> -					i, j);
>> +		/* Wait for writer threads */
>> +		for (i = 0; i < j; i++)
>> +			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>>  				goto error;
>> -			}
>>  -		/* Delete all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>> -				large_ldepth_route_table[j].depth) != 0) {
>> -				printf("Failed to delete iteration %d, route# %d\n",
>> -					i, j);
>> -				goto error;
>> -			}
>> +		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> +		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> +		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> +			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> +			/ TOTAL_WRITES);
>> +
>> +		writer_done = 1;
>> +		/* Wait until all readers have exited */
>> +		for (i = j; i < num_cores; i++)
>> +			rte_eal_wait_lcore(enabled_core_ids[i]);
>> +
>> +		rte_lpm_free(lpm);
>> +		rte_free(rv);
>> +		lpm = NULL;
>> +		rv = NULL;
>>  	}
>> -	total_cycles = rte_rdtsc_precise() - begin;
>> -
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %g cycles\n",
>> -		(double)total_cycles / TOTAL_WRITES);
>> -
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>>    	return 0;
>>  @@ -946,9 +755,8 @@ test_lpm_perf(void)
>>  	rte_lpm_delete_all(lpm);
>>  	rte_lpm_free(lpm);
>>  -	test_lpm_rcu_perf();
>> -
>> -	test_lpm_rcu_perf_multi_writer();
>> +	test_lpm_rcu_perf_multi_writer(0);
>> +	test_lpm_rcu_perf_multi_writer(1);
>>    	return 0;
>>  }
> 
> -- 
> Regards,
> Vladimir
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test
  2020-10-29 15:36 [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
                   ` (4 preceding siblings ...)
  2020-11-02 15:11 ` Bruce Richardson
@ 2020-11-02 23:51 ` Dharmik Thakkar
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
                     ` (4 more replies)
  5 siblings, 5 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-02 23:51 UTC (permalink / raw)
  Cc: dev, nd, Dharmik Thakkar
Fix LPM adds, LPM deletes, and cycle calculation.
Return error if LPM add/delete fails in multi-writer test.
Remove redundant error checking for readers.
Combine single and multi threaded test cases to avoid code duplication.
---
v2:
 - Add more details about the fix to the commit message
 - Replace hard coded values with an enum
 - Remove lock acquire/release for single writer
Dharmik Thakkar (4):
  test/lpm: fix cycle calculation in rcu qsbr perf
  test/lpm: return error on failure in rcu qsbr perf
  test/lpm: remove error checking in rcu qsbr perf
  test/lpm: avoid code duplication in rcu qsbr perf
 app/test/test_lpm_perf.c | 384 ++++++++++-----------------------------
 1 file changed, 95 insertions(+), 289 deletions(-)
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v2 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
  2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
@ 2020-11-02 23:52   ` Dharmik Thakkar
  2020-11-03  1:30     ` Honnappa Nagarahalli
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 2/4] test/lpm: return error on failure " Dharmik Thakkar
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-02 23:52 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Honnappa Nagarahalli,
	Ruifeng Wang, Gavin Hu
  Cc: dev, nd, Dharmik Thakkar, stable
Fix incorrect calculations for LPM adds, LPM deletes,
and average cycles in RCU QSBR perf tests
Since, rcu qsbr tests run for 'RCU_ITERATIONS' and not
'ITERATIONS', replace 'ITERATIONS' with 'RCU_ITERATIONS'
for calculating adds, deletes, and cycles.
Also, for multi-writer perf test, each writer only writes
half of NUM_LDEPTH_ROUTE_ENTRIES.
For 2 writers, total adds (or deletes) should be
(RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of
(2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES).
Since, for both the single and multi writer tests, total adds/deletes
is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES),
this has been replaced with a macro 'TOTAL_WRITES' and furthermore,
'g_writes' has been removed since it is always a fixed value
equal to TOTAL_WRITES.
Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org
Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 45 ++++++++++++++--------------------------
 1 file changed, 16 insertions(+), 29 deletions(-)
diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index c5a238b9d1e8..45164b23214b 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -23,7 +23,6 @@ static struct rte_rcu_qsbr *rv;
 static volatile uint8_t writer_done;
 static volatile uint32_t thr_id;
 static uint64_t gwrite_cycles;
-static uint64_t gwrites;
 /* LPM APIs are not thread safe, use mutex to provide thread safety */
 static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
 
@@ -60,6 +59,8 @@ static uint32_t num_ldepth_route_entries;
 #define NUM_ROUTE_ENTRIES num_route_entries
 #define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries
 
+#define TOTAL_WRITES (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES)
+
 enum {
 	IP_CLASS_A,
 	IP_CLASS_B,
@@ -432,7 +433,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	uint8_t core_id = (uint8_t)((uintptr_t)arg);
 	uint32_t next_hop_add = 0xAA;
 
-	RTE_SET_USED(arg);
 	/* 2 writer threads are used */
 	if (core_id % 2 == 0) {
 		si = 0;
@@ -472,9 +472,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	total_cycles = rte_rdtsc_precise() - begin;
 
 	__atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED);
-	__atomic_fetch_add(&gwrites,
-			2 * NUM_LDEPTH_ROUTE_ENTRIES * RCU_ITERATIONS,
-			__ATOMIC_RELAXED);
 
 	return 0;
 }
@@ -528,7 +525,6 @@ test_lpm_rcu_perf_multi_writer(void)
 
 	writer_done = 0;
 	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
 
 	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
@@ -548,14 +544,11 @@ test_lpm_rcu_perf_multi_writer(void)
 		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 			goto error;
 
-	printf("Total LPM Adds: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
-			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
-		);
+		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+		/ TOTAL_WRITES);
 
 	/* Wait and check return value from reader threads */
 	writer_done = 1;
@@ -581,7 +574,6 @@ test_lpm_rcu_perf_multi_writer(void)
 
 	writer_done = 0;
 	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
 	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
 	/* Launch reader threads */
@@ -600,14 +592,11 @@ test_lpm_rcu_perf_multi_writer(void)
 		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 			goto error;
 
-	printf("Total LPM Adds: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
-			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
-		);
+		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+		/ TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
@@ -711,11 +700,10 @@ test_lpm_rcu_perf(void)
 	}
 	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
+		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
@@ -771,11 +759,10 @@ test_lpm_rcu_perf(void)
 	}
 	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
+		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v2 2/4] test/lpm: return error on failure in rcu qsbr perf
  2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
@ 2020-11-02 23:52   ` Dharmik Thakkar
  2020-11-03  1:28     ` Honnappa Nagarahalli
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 3/4] test/lpm: remove error checking " Dharmik Thakkar
                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-02 23:52 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Honnappa Nagarahalli,
	Gavin Hu, Ruifeng Wang
  Cc: dev, nd, Dharmik Thakkar, stable
Return error if Add/Delete fail in multiwriter perf test
Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org
Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 4 ++++
 1 file changed, 4 insertions(+)
diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 45164b23214b..55084816ab91 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -453,6 +453,8 @@ test_lpm_rcu_qsbr_writer(void *arg)
 					next_hop_add) != 0) {
 				printf("Failed to add iteration %d, route# %d\n",
 					i, j);
+				pthread_mutex_unlock(&lpm_mutex);
+				return -1;
 			}
 			pthread_mutex_unlock(&lpm_mutex);
 		}
@@ -464,6 +466,8 @@ test_lpm_rcu_qsbr_writer(void *arg)
 				large_ldepth_route_table[j].depth) != 0) {
 				printf("Failed to delete iteration %d, route# %d\n",
 					i, j);
+				pthread_mutex_unlock(&lpm_mutex);
+				return -1;
 			}
 			pthread_mutex_unlock(&lpm_mutex);
 		}
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v2 3/4] test/lpm: remove error checking in rcu qsbr perf
  2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 2/4] test/lpm: return error on failure " Dharmik Thakkar
@ 2020-11-02 23:52   ` Dharmik Thakkar
  2020-11-03  1:21     ` Honnappa Nagarahalli
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
  2020-11-03  5:12   ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  4 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-02 23:52 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Ruifeng Wang,
	Honnappa Nagarahalli, Gavin Hu
  Cc: dev, nd, Dharmik Thakkar, stable
Remove redundant error checking for reader threads
since they never return error.
Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org
Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)
diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 55084816ab91..224c92fa3d65 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -554,11 +554,10 @@ test_lpm_rcu_perf_multi_writer(void)
 		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
 		/ TOTAL_WRITES);
 
-	/* Wait and check return value from reader threads */
 	writer_done = 1;
+	/* Wait until all readers have exited */
 	for (i = 2; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 	rte_free(rv);
@@ -603,10 +602,9 @@ test_lpm_rcu_perf_multi_writer(void)
 		/ TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 2; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 
@@ -710,10 +708,9 @@ test_lpm_rcu_perf(void)
 		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		if (rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 	rte_free(rv);
@@ -769,11 +766,9 @@ test_lpm_rcu_perf(void)
 		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			printf("Warning: lcore %u not finished.\n",
-				enabled_core_ids[i]);
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
                     ` (2 preceding siblings ...)
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 3/4] test/lpm: remove error checking " Dharmik Thakkar
@ 2020-11-02 23:52   ` Dharmik Thakkar
  2020-11-03  4:21     ` Honnappa Nagarahalli
  2020-11-03  5:12   ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  4 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-02 23:52 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin; +Cc: dev, nd, Dharmik Thakkar
Avoid code duplication by combining single and multi threaded tests
Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 362 ++++++++++-----------------------------
 1 file changed, 91 insertions(+), 271 deletions(-)
diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 224c92fa3d65..229c835c23f7 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -67,6 +67,12 @@ enum {
 	IP_CLASS_C
 };
 
+enum {
+	SINGLE_WRITER = 1,
+	MULTI_WRITER_1,
+	MULTI_WRITER_2
+};
+
 /* struct route_rule_count defines the total number of rules in following a/b/c
  * each item in a[]/b[]/c[] is the number of common IP address class A/B/C, not
  * including the ones for private local network.
@@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg)
 {
 	unsigned int i, j, si, ei;
 	uint64_t begin, total_cycles;
-	uint8_t core_id = (uint8_t)((uintptr_t)arg);
+	uint8_t writer_id = (uint8_t)((uintptr_t)arg);
 	uint32_t next_hop_add = 0xAA;
 
-	/* 2 writer threads are used */
-	if (core_id % 2 == 0) {
+	/* Single writer (writer_id = 1) */
+	if (writer_id == SINGLE_WRITER) {
+		si = 0;
+		ei = NUM_LDEPTH_ROUTE_ENTRIES;
+	}
+	/* 2 Writers (writer_id = 2/3)*/
+	else if (writer_id == MULTI_WRITER_1) {
 		si = 0;
 		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
 	} else {
@@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	for (i = 0; i < RCU_ITERATIONS; i++) {
 		/* Add all the entries */
 		for (j = si; j < ei; j++) {
-			pthread_mutex_lock(&lpm_mutex);
+			if (writer_id != SINGLE_WRITER)
+				pthread_mutex_lock(&lpm_mutex);
 			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
 					large_ldepth_route_table[j].depth,
 					next_hop_add) != 0) {
 				printf("Failed to add iteration %d, route# %d\n",
 					i, j);
-				pthread_mutex_unlock(&lpm_mutex);
+				if (writer_id != SINGLE_WRITER)
+					pthread_mutex_unlock(&lpm_mutex);
 				return -1;
 			}
-			pthread_mutex_unlock(&lpm_mutex);
+			if (writer_id != SINGLE_WRITER)
+				pthread_mutex_unlock(&lpm_mutex);
 		}
 
 		/* Delete all the entries */
 		for (j = si; j < ei; j++) {
-			pthread_mutex_lock(&lpm_mutex);
+			if (writer_id != SINGLE_WRITER)
+				pthread_mutex_lock(&lpm_mutex);
 			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
 				large_ldepth_route_table[j].depth) != 0) {
 				printf("Failed to delete iteration %d, route# %d\n",
 					i, j);
-				pthread_mutex_unlock(&lpm_mutex);
+				if (writer_id != SINGLE_WRITER)
+					pthread_mutex_unlock(&lpm_mutex);
 				return -1;
 			}
-			pthread_mutex_unlock(&lpm_mutex);
+			if (writer_id != SINGLE_WRITER)
+				pthread_mutex_unlock(&lpm_mutex);
 		}
 	}
 
@@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
 
 /*
  * Functional test:
- * 2 writers, rest are readers
+ * 1/2 writers, rest are readers
  */
 static int
-test_lpm_rcu_perf_multi_writer(void)
+test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
 {
 	struct rte_lpm_config config;
 	size_t sz;
-	unsigned int i;
+	unsigned int i, j;
 	uint16_t core_id;
 	struct rte_lpm_rcu_config rcu_cfg = {0};
+	int (*reader_f)(void *arg) = NULL;
 
 	if (rte_lcore_count() < 3) {
 		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
@@ -504,273 +522,76 @@ test_lpm_rcu_perf_multi_writer(void)
 		num_cores++;
 	}
 
-	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
-		num_cores - 2);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	/* Init RCU variable */
-	sz = rte_rcu_qsbr_get_memsize(num_cores);
-	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-						RTE_CACHE_LINE_SIZE);
-	rte_rcu_qsbr_init(rv, num_cores);
-
-	rcu_cfg.v = rv;
-	/* Assign the RCU variable to LPM */
-	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
-		printf("RCU variable assignment failed\n");
-		goto error;
-	}
-
-	writer_done = 0;
-	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Launch writer threads */
-	for (i = 0; i < 2; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-					(void *)(uintptr_t)i,
-					enabled_core_ids[i]);
-
-	/* Wait for writer threads */
-	for (i = 0; i < 2; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
-		/ TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-	lpm = NULL;
-	rv = NULL;
-
-	/* Test without RCU integration */
-	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
-		num_cores - 2);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	writer_done = 0;
-	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Launch writer threads */
-	for (i = 0; i < 2; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-					(void *)(uintptr_t)i,
-					enabled_core_ids[i]);
-
-	/* Wait for writer threads */
-	for (i = 0; i < 2; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
-		/ TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-
-	return 0;
-
-error:
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	rte_eal_mp_wait_lcore();
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-
-	return -1;
-}
-
-/*
- * Functional test:
- * Single writer, rest are readers
- */
-static int
-test_lpm_rcu_perf(void)
-{
-	struct rte_lpm_config config;
-	uint64_t begin, total_cycles;
-	size_t sz;
-	unsigned int i, j;
-	uint16_t core_id;
-	uint32_t next_hop_add = 0xAA;
-	struct rte_lpm_rcu_config rcu_cfg = {0};
-
-	if (rte_lcore_count() < 2) {
-		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
-		return TEST_SKIPPED;
-	}
-
-	num_cores = 0;
-	RTE_LCORE_FOREACH_WORKER(core_id) {
-		enabled_core_ids[num_cores] = core_id;
-		num_cores++;
-	}
-
-	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
-		num_cores);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	/* Init RCU variable */
-	sz = rte_rcu_qsbr_get_memsize(num_cores);
-	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-						RTE_CACHE_LINE_SIZE);
-	rte_rcu_qsbr_init(rv, num_cores);
-
-	rcu_cfg.v = rv;
-	/* Assign the RCU variable to LPM */
-	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
-		printf("RCU variable assignment failed\n");
-		goto error;
-	}
-
-	writer_done = 0;
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Measure add/delete. */
-	begin = rte_rdtsc_precise();
-	for (i = 0; i < RCU_ITERATIONS; i++) {
-		/* Add all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-					large_ldepth_route_table[j].depth,
-					next_hop_add) != 0) {
-				printf("Failed to add iteration %d, route# %d\n",
-					i, j);
+	for (j = 1; j < 3; j++) {
+		if (use_rcu)
+			printf("\nPerf test: %d writer(s), %d reader(s),"
+			       " RCU integration enabled\n", j, num_cores - j);
+		else
+			printf("\nPerf test: %d writer(s), %d reader(s),"
+			       " RCU integration disabled\n", j, num_cores - j);
+
+		/* Create LPM table */
+		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
+		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
+		config.flags = 0;
+		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
+		TEST_LPM_ASSERT(lpm != NULL);
+
+		/* Init RCU variable */
+		if (use_rcu) {
+			sz = rte_rcu_qsbr_get_memsize(num_cores);
+			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
+							RTE_CACHE_LINE_SIZE);
+			rte_rcu_qsbr_init(rv, num_cores);
+
+			rcu_cfg.v = rv;
+			/* Assign the RCU variable to LPM */
+			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
+				printf("RCU variable assignment failed\n");
 				goto error;
 			}
 
-		/* Delete all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-				large_ldepth_route_table[j].depth) != 0) {
-				printf("Failed to delete iteration %d, route# %d\n",
-					i, j);
-				goto error;
-			}
-	}
-	total_cycles = rte_rdtsc_precise() - begin;
+			reader_f = test_lpm_rcu_qsbr_reader;
+		} else
+			reader_f = test_lpm_reader;
 
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / TOTAL_WRITES);
+		writer_done = 0;
+		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
 
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-	lpm = NULL;
-	rv = NULL;
-
-	/* Test without RCU integration */
-	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
-		num_cores);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
+		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
-	writer_done = 0;
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
+		/* Launch reader threads */
+		for (i = j; i < num_cores; i++)
+			rte_eal_remote_launch(reader_f, NULL,
+						enabled_core_ids[i]);
 
-	/* Launch reader threads */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_reader, NULL,
-					enabled_core_ids[i]);
+		/* Launch writer threads */
+		for (i = 0; i < j; i++)
+			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
+						(void *)(uintptr_t)(i + j),
+						enabled_core_ids[i]);
 
-	/* Measure add/delete. */
-	begin = rte_rdtsc_precise();
-	for (i = 0; i < RCU_ITERATIONS; i++) {
-		/* Add all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-					large_ldepth_route_table[j].depth,
-					next_hop_add) != 0) {
-				printf("Failed to add iteration %d, route# %d\n",
-					i, j);
+		/* Wait for writer threads */
+		for (i = 0; i < j; i++)
+			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 				goto error;
-			}
 
-		/* Delete all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-				large_ldepth_route_table[j].depth) != 0) {
-				printf("Failed to delete iteration %d, route# %d\n",
-					i, j);
-				goto error;
-			}
+		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
+		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
+			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+			/ TOTAL_WRITES);
+
+		writer_done = 1;
+		/* Wait until all readers have exited */
+		for (i = j; i < num_cores; i++)
+			rte_eal_wait_lcore(enabled_core_ids[i]);
+
+		rte_lpm_free(lpm);
+		rte_free(rv);
+		lpm = NULL;
+		rv = NULL;
 	}
-	total_cycles = rte_rdtsc_precise() - begin;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
 
 	return 0;
 
@@ -946,9 +767,8 @@ test_lpm_perf(void)
 	rte_lpm_delete_all(lpm);
 	rte_lpm_free(lpm);
 
-	test_lpm_rcu_perf();
-
-	test_lpm_rcu_perf_multi_writer();
+	test_lpm_rcu_perf_multi_writer(0);
+	test_lpm_rcu_perf_multi_writer(1);
 
 	return 0;
 }
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v2 3/4] test/lpm: remove error checking in rcu qsbr perf
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 3/4] test/lpm: remove error checking " Dharmik Thakkar
@ 2020-11-03  1:21     ` Honnappa Nagarahalli
  2020-11-03  4:56       ` Dharmik Thakkar
  0 siblings, 1 reply; 52+ messages in thread
From: Honnappa Nagarahalli @ 2020-11-03  1:21 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson, Vladimir Medvedkin,
	Ruifeng Wang, Gavin Hu
  Cc: dev, nd, Dharmik Thakkar, stable, Honnappa Nagarahalli, nd
<snip>
> 
> Remove redundant error checking for reader threads since they never return
> error.
> 
> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
> Cc: honnappa.nagarahalli@arm.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>  app/test/test_lpm_perf.c | 21 ++++++++-------------
>  1 file changed, 8 insertions(+), 13 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
> 55084816ab91..224c92fa3d65 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -554,11 +554,10 @@ test_lpm_rcu_perf_multi_writer(void)
>  		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>  		/ TOTAL_WRITES);
> 
> -	/* Wait and check return value from reader threads */
>  	writer_done = 1;
> +	/* Wait until all readers have exited */
>  	for (i = 2; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> +		rte_eal_wait_lcore(enabled_core_ids[i]);
> 
>  	rte_lpm_free(lpm);
>  	rte_free(rv);
> @@ -603,10 +602,9 @@ test_lpm_rcu_perf_multi_writer(void)
>  		/ TOTAL_WRITES);
> 
>  	writer_done = 1;
> -	/* Wait and check return value from reader threads */
> +	/* Wait until all readers have exited */
>  	for (i = 2; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> +		rte_eal_wait_lcore(enabled_core_ids[i]);
> 
>  	rte_lpm_free(lpm);
> 
> @@ -710,10 +708,9 @@ test_lpm_rcu_perf(void)
>  		(double)total_cycles / TOTAL_WRITES);
> 
>  	writer_done = 1;
> -	/* Wait and check return value from reader threads */
> +	/* Wait until all readers have exited */
>  	for (i = 0; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> +		if (rte_eal_wait_lcore(enabled_core_ids[i]);
                             ^^ Do we need the 'if' statement?
> 
>  	rte_lpm_free(lpm);
>  	rte_free(rv);
> @@ -769,11 +766,9 @@ test_lpm_rcu_perf(void)
>  		(double)total_cycles / TOTAL_WRITES);
> 
>  	writer_done = 1;
> -	/* Wait and check return value from reader threads */
> +	/* Wait until all readers have exited */
>  	for (i = 0; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			printf("Warning: lcore %u not finished.\n",
> -				enabled_core_ids[i]);
> +		rte_eal_wait_lcore(enabled_core_ids[i]);
> 
>  	rte_lpm_free(lpm);
> 
> --
> 2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v2 2/4] test/lpm: return error on failure in rcu qsbr perf
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 2/4] test/lpm: return error on failure " Dharmik Thakkar
@ 2020-11-03  1:28     ` Honnappa Nagarahalli
  2020-11-03  4:42       ` Dharmik Thakkar
  0 siblings, 1 reply; 52+ messages in thread
From: Honnappa Nagarahalli @ 2020-11-03  1:28 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson, Vladimir Medvedkin, Gavin Hu,
	Ruifeng Wang
  Cc: dev, nd, Dharmik Thakkar, stable, Honnappa Nagarahalli, nd
<snip>
> 
> Return error if Add/Delete fail in multiwriter perf test
> 
> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
> Cc: honnappa.nagarahalli@arm.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>  app/test/test_lpm_perf.c | 4 ++++
>  1 file changed, 4 insertions(+)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
> 45164b23214b..55084816ab91 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -453,6 +453,8 @@ test_lpm_rcu_qsbr_writer(void *arg)
>  					next_hop_add) != 0) {
>  				printf("Failed to add iteration %d, route#
> %d\n",
>  					i, j);
> +				pthread_mutex_unlock(&lpm_mutex);
> +				return -1;
Would be good to use the "goto error" method used in this file in other functions.
>  			}
>  			pthread_mutex_unlock(&lpm_mutex);
>  		}
> @@ -464,6 +466,8 @@ test_lpm_rcu_qsbr_writer(void *arg)
>  				large_ldepth_route_table[j].depth) != 0) {
>  				printf("Failed to delete iteration %d, route#
> %d\n",
>  					i, j);
> +				pthread_mutex_unlock(&lpm_mutex);
> +				return -1;
>  			}
>  			pthread_mutex_unlock(&lpm_mutex);
>  		}
> --
> 2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v2 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
@ 2020-11-03  1:30     ` Honnappa Nagarahalli
  0 siblings, 0 replies; 52+ messages in thread
From: Honnappa Nagarahalli @ 2020-11-03  1:30 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson, Vladimir Medvedkin,
	Ruifeng Wang, Gavin Hu
  Cc: dev, nd, Dharmik Thakkar, stable, Honnappa Nagarahalli, nd
<snip>
> 
> Fix incorrect calculations for LPM adds, LPM deletes, and average cycles in
> RCU QSBR perf tests
> 
> Since, rcu qsbr tests run for 'RCU_ITERATIONS' and not 'ITERATIONS', replace
> 'ITERATIONS' with 'RCU_ITERATIONS'
> for calculating adds, deletes, and cycles.
> 
> Also, for multi-writer perf test, each writer only writes half of
> NUM_LDEPTH_ROUTE_ENTRIES.
> For 2 writers, total adds (or deletes) should be (RCU_ITERATIONS *
> NUM_LDEPTH_ROUTE_ENTRIES) instead of
> (2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES).
> 
> Since, for both the single and multi writer tests, total adds/deletes is equal to
> (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES), this has been replaced
> with a macro 'TOTAL_WRITES' and furthermore, 'g_writes' has been removed
> since it is always a fixed value equal to TOTAL_WRITES.
> 
> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
> Cc: honnappa.nagarahalli@arm.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Looks good
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalliu@arm.com>
> ---
>  app/test/test_lpm_perf.c | 45 ++++++++++++++--------------------------
>  1 file changed, 16 insertions(+), 29 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
> c5a238b9d1e8..45164b23214b 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -23,7 +23,6 @@ static struct rte_rcu_qsbr *rv;  static volatile uint8_t
> writer_done;  static volatile uint32_t thr_id;  static uint64_t gwrite_cycles; -
> static uint64_t gwrites;
>  /* LPM APIs are not thread safe, use mutex to provide thread safety */
> static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
> 
> @@ -60,6 +59,8 @@ static uint32_t num_ldepth_route_entries;  #define
> NUM_ROUTE_ENTRIES num_route_entries  #define
> NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries
> 
> +#define TOTAL_WRITES (RCU_ITERATIONS *
> NUM_LDEPTH_ROUTE_ENTRIES)
> +
>  enum {
>  	IP_CLASS_A,
>  	IP_CLASS_B,
> @@ -432,7 +433,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
>  	uint8_t core_id = (uint8_t)((uintptr_t)arg);
>  	uint32_t next_hop_add = 0xAA;
> 
> -	RTE_SET_USED(arg);
>  	/* 2 writer threads are used */
>  	if (core_id % 2 == 0) {
>  		si = 0;
> @@ -472,9 +472,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
>  	total_cycles = rte_rdtsc_precise() - begin;
> 
>  	__atomic_fetch_add(&gwrite_cycles, total_cycles,
> __ATOMIC_RELAXED);
> -	__atomic_fetch_add(&gwrites,
> -			2 * NUM_LDEPTH_ROUTE_ENTRIES *
> RCU_ITERATIONS,
> -			__ATOMIC_RELAXED);
> 
>  	return 0;
>  }
> @@ -528,7 +525,6 @@ test_lpm_rcu_perf_multi_writer(void)
> 
>  	writer_done = 0;
>  	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
> 
>  	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> 
> @@ -548,14 +544,11 @@ test_lpm_rcu_perf_multi_writer(void)
>  		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>  			goto error;
> 
> -	printf("Total LPM Adds: %d\n",
> -		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> -	printf("Total LPM Deletes: %d\n",
> -		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> +	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>  	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
> -			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
> -		);
> +		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> +		/ TOTAL_WRITES);
> 
>  	/* Wait and check return value from reader threads */
>  	writer_done = 1;
> @@ -581,7 +574,6 @@ test_lpm_rcu_perf_multi_writer(void)
> 
>  	writer_done = 0;
>  	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
>  	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> 
>  	/* Launch reader threads */
> @@ -600,14 +592,11 @@ test_lpm_rcu_perf_multi_writer(void)
>  		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>  			goto error;
> 
> -	printf("Total LPM Adds: %d\n",
> -		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> -	printf("Total LPM Deletes: %d\n",
> -		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> +	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>  	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
> -			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
> -		);
> +		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> +		/ TOTAL_WRITES);
> 
>  	writer_done = 1;
>  	/* Wait and check return value from reader threads */ @@ -711,11
> +700,10 @@ test_lpm_rcu_perf(void)
>  	}
>  	total_cycles = rte_rdtsc_precise() - begin;
> 
> -	printf("Total LPM Adds: %d\n", ITERATIONS *
> NUM_LDEPTH_ROUTE_ENTRIES);
> -	printf("Total LPM Deletes: %d\n",
> -		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> +	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>  	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES *
> ITERATIONS));
> +		(double)total_cycles / TOTAL_WRITES);
> 
>  	writer_done = 1;
>  	/* Wait and check return value from reader threads */ @@ -771,11
> +759,10 @@ test_lpm_rcu_perf(void)
>  	}
>  	total_cycles = rte_rdtsc_precise() - begin;
> 
> -	printf("Total LPM Adds: %d\n", ITERATIONS *
> NUM_LDEPTH_ROUTE_ENTRIES);
> -	printf("Total LPM Deletes: %d\n",
> -		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> +	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>  	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES *
> ITERATIONS));
> +		(double)total_cycles / TOTAL_WRITES);
> 
>  	writer_done = 1;
>  	/* Wait and check return value from reader threads */
> --
> 2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
@ 2020-11-03  4:21     ` Honnappa Nagarahalli
  2020-11-03  4:33       ` Dharmik Thakkar
  0 siblings, 1 reply; 52+ messages in thread
From: Honnappa Nagarahalli @ 2020-11-03  4:21 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson, Vladimir Medvedkin
  Cc: dev, nd, Dharmik Thakkar, Honnappa Nagarahalli, nd
<snip>
> 
> Avoid code duplication by combining single and multi threaded tests
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>  app/test/test_lpm_perf.c | 362 ++++++++++-----------------------------
>  1 file changed, 91 insertions(+), 271 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
> 224c92fa3d65..229c835c23f7 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -67,6 +67,12 @@ enum {
>  	IP_CLASS_C
>  };
> 
> +enum {
> +	SINGLE_WRITER = 1,
> +	MULTI_WRITER_1,
> +	MULTI_WRITER_2
> +};
Do we need this? Can we use the number of cores instead?
> +
>  /* struct route_rule_count defines the total number of rules in following
> a/b/c
>   * each item in a[]/b[]/c[] is the number of common IP address class A/B/C,
> not
>   * including the ones for private local network.
> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg)  {
>  	unsigned int i, j, si, ei;
>  	uint64_t begin, total_cycles;
> -	uint8_t core_id = (uint8_t)((uintptr_t)arg);
> +	uint8_t writer_id = (uint8_t)((uintptr_t)arg);
>  	uint32_t next_hop_add = 0xAA;
> 
> -	/* 2 writer threads are used */
> -	if (core_id % 2 == 0) {
> +	/* Single writer (writer_id = 1) */
> +	if (writer_id == SINGLE_WRITER) {
> +		si = 0;
> +		ei = NUM_LDEPTH_ROUTE_ENTRIES;
> +	}
> +	/* 2 Writers (writer_id = 2/3)*/
> +	else if (writer_id == MULTI_WRITER_1) {
>  		si = 0;
>  		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
>  	} else {
> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg)
>  	for (i = 0; i < RCU_ITERATIONS; i++) {
>  		/* Add all the entries */
>  		for (j = si; j < ei; j++) {
> -			pthread_mutex_lock(&lpm_mutex);
> +			if (writer_id != SINGLE_WRITER)
> +				pthread_mutex_lock(&lpm_mutex);
>  			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>  					large_ldepth_route_table[j].depth,
>  					next_hop_add) != 0) {
>  				printf("Failed to add iteration %d, route#
> %d\n",
>  					i, j);
> -				pthread_mutex_unlock(&lpm_mutex);
> +				if (writer_id != SINGLE_WRITER)
> +
> 	pthread_mutex_unlock(&lpm_mutex);
>  				return -1;
>  			}
> -			pthread_mutex_unlock(&lpm_mutex);
> +			if (writer_id != SINGLE_WRITER)
> +				pthread_mutex_unlock(&lpm_mutex);
>  		}
> 
>  		/* Delete all the entries */
>  		for (j = si; j < ei; j++) {
> -			pthread_mutex_lock(&lpm_mutex);
> +			if (writer_id != SINGLE_WRITER)
> +				pthread_mutex_lock(&lpm_mutex);
>  			if (rte_lpm_delete(lpm,
> large_ldepth_route_table[j].ip,
>  				large_ldepth_route_table[j].depth) != 0) {
>  				printf("Failed to delete iteration %d, route#
> %d\n",
>  					i, j);
> -				pthread_mutex_unlock(&lpm_mutex);
> +				if (writer_id != SINGLE_WRITER)
> +
> 	pthread_mutex_unlock(&lpm_mutex);
>  				return -1;
>  			}
> -			pthread_mutex_unlock(&lpm_mutex);
> +			if (writer_id != SINGLE_WRITER)
> +				pthread_mutex_unlock(&lpm_mutex);
>  		}
>  	}
> 
> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
> 
>  /*
>   * Functional test:
> - * 2 writers, rest are readers
> + * 1/2 writers, rest are readers
>   */
>  static int
> -test_lpm_rcu_perf_multi_writer(void)
> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>  {
>  	struct rte_lpm_config config;
>  	size_t sz;
> -	unsigned int i;
> +	unsigned int i, j;
>  	uint16_t core_id;
>  	struct rte_lpm_rcu_config rcu_cfg = {0};
> +	int (*reader_f)(void *arg) = NULL;
> 
>  	if (rte_lcore_count() < 3) {
>  		printf("Not enough cores for lpm_rcu_perf_autotest,
> expecting at least 3\n"); @@ -504,273 +522,76 @@
> test_lpm_rcu_perf_multi_writer(void)
>  		num_cores++;
>  	}
> 
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration
> enabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> -		/ TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> -
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration
> disabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> -		/ TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -
> -	return 0;
> -
> -error:
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	rte_eal_mp_wait_lcore();
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -
> -	return -1;
> -}
> -
> -/*
> - * Functional test:
> - * Single writer, rest are readers
> - */
> -static int
> -test_lpm_rcu_perf(void)
> -{
> -	struct rte_lpm_config config;
> -	uint64_t begin, total_cycles;
> -	size_t sz;
> -	unsigned int i, j;
> -	uint16_t core_id;
> -	uint32_t next_hop_add = 0xAA;
> -	struct rte_lpm_rcu_config rcu_cfg = {0};
> -
> -	if (rte_lcore_count() < 2) {
> -		printf("Not enough cores for lpm_rcu_perf_autotest,
> expecting at least 2\n");
> -		return TEST_SKIPPED;
> -	}
> -
> -	num_cores = 0;
> -	RTE_LCORE_FOREACH_WORKER(core_id) {
> -		enabled_core_ids[num_cores] = core_id;
> -		num_cores++;
> -	}
> -
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
> -		num_cores);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route#
> %d\n",
> -					i, j);
> +	for (j = 1; j < 3; j++) {
> +		if (use_rcu)
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration enabled\n", j, num_cores - j);
> +		else
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration disabled\n", j, num_cores - j);
> +
> +		/* Create LPM table */
> +		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.flags = 0;
> +		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> +		TEST_LPM_ASSERT(lpm != NULL);
> +
> +		/* Init RCU variable */
> +		if (use_rcu) {
> +			sz = rte_rcu_qsbr_get_memsize(num_cores);
> +			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> +
> 	RTE_CACHE_LINE_SIZE);
> +			rte_rcu_qsbr_init(rv, num_cores);
> +
> +			rcu_cfg.v = rv;
> +			/* Assign the RCU variable to LPM */
> +			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> +				printf("RCU variable assignment failed\n");
>  				goto error;
>  			}
> 
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm,
> large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route#
> %d\n",
> -					i, j);
> -				goto error;
> -			}
> -	}
> -	total_cycles = rte_rdtsc_precise() - begin;
> +			reader_f = test_lpm_rcu_qsbr_reader;
> +		} else
> +			reader_f = test_lpm_reader;
> 
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> +		writer_done = 0;
> +		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> 
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> -
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration
> disabled\n",
> -		num_cores);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> +		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> 
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> +		/* Launch reader threads */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_remote_launch(reader_f, NULL,
> +						enabled_core_ids[i]);
> 
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> +		/* Launch writer threads */
> +		for (i = 0; i < j; i++)
> +			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> +						(void *)(uintptr_t)(i + j),
This can be just 'j'?
> +						enabled_core_ids[i]);
> 
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route#
> %d\n",
> -					i, j);
> +		/* Wait for writer threads */
> +		for (i = 0; i < j; i++)
> +			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>  				goto error;
> -			}
> 
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm,
> large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route#
> %d\n",
> -					i, j);
> -				goto error;
> -			}
> +		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> +		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> +			__atomic_load_n(&gwrite_cycles,
> __ATOMIC_RELAXED)
> +			/ TOTAL_WRITES);
> +
> +		writer_done = 1;
> +		/* Wait until all readers have exited */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_wait_lcore(enabled_core_ids[i]);
> +
> +		rte_lpm_free(lpm);
> +		rte_free(rv);
> +		lpm = NULL;
> +		rv = NULL;
>  	}
> -	total_cycles = rte_rdtsc_precise() - begin;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> 
>  	return 0;
> 
> @@ -946,9 +767,8 @@ test_lpm_perf(void)
>  	rte_lpm_delete_all(lpm);
>  	rte_lpm_free(lpm);
> 
> -	test_lpm_rcu_perf();
> -
> -	test_lpm_rcu_perf_multi_writer();
> +	test_lpm_rcu_perf_multi_writer(0);
> +	test_lpm_rcu_perf_multi_writer(1);
> 
>  	return 0;
>  }
> --
> 2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-03  4:21     ` Honnappa Nagarahalli
@ 2020-11-03  4:33       ` Dharmik Thakkar
  2020-11-03  5:32         ` Honnappa Nagarahalli
  0 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03  4:33 UTC (permalink / raw)
  To: Honnappa Nagarahalli; +Cc: Bruce Richardson, Vladimir Medvedkin, dev, nd
> On Nov 2, 2020, at 10:21 PM, Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> 
> <snip>
>> 
>> Avoid code duplication by combining single and multi threaded tests
>> 
>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>> ---
>> app/test/test_lpm_perf.c | 362 ++++++++++-----------------------------
>> 1 file changed, 91 insertions(+), 271 deletions(-)
>> 
>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
>> 224c92fa3d65..229c835c23f7 100644
>> --- a/app/test/test_lpm_perf.c
>> +++ b/app/test/test_lpm_perf.c
>> @@ -67,6 +67,12 @@ enum {
>> 	IP_CLASS_C
>> };
>> 
>> +enum {
>> +	SINGLE_WRITER = 1,
>> +	MULTI_WRITER_1,
>> +	MULTI_WRITER_2
>> +};
> Do we need this? Can we use the number of cores instead?
> 
There are 3 combinations of writes (adds/deletes):
1. Write all the entries - in case of a single writer
2. Write half of the entries - in case of multiple writers
3. Write remaining half of the entries - in case of multiple writers
So, I think this is required.
>> +
>> /* struct route_rule_count defines the total number of rules in following
>> a/b/c
>>  * each item in a[]/b[]/c[] is the number of common IP address class A/B/C,
>> not
>>  * including the ones for private local network.
>> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg)  {
>> 	unsigned int i, j, si, ei;
>> 	uint64_t begin, total_cycles;
>> -	uint8_t core_id = (uint8_t)((uintptr_t)arg);
>> +	uint8_t writer_id = (uint8_t)((uintptr_t)arg);
>> 	uint32_t next_hop_add = 0xAA;
>> 
>> -	/* 2 writer threads are used */
>> -	if (core_id % 2 == 0) {
>> +	/* Single writer (writer_id = 1) */
>> +	if (writer_id == SINGLE_WRITER) {
>> +		si = 0;
>> +		ei = NUM_LDEPTH_ROUTE_ENTRIES;
>> +	}
>> +	/* 2 Writers (writer_id = 2/3)*/
>> +	else if (writer_id == MULTI_WRITER_1) {
>> 		si = 0;
>> 		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
>> 	} else {
>> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg)
>> 	for (i = 0; i < RCU_ITERATIONS; i++) {
>> 		/* Add all the entries */
>> 		for (j = si; j < ei; j++) {
>> -			pthread_mutex_lock(&lpm_mutex);
>> +			if (writer_id != SINGLE_WRITER)
>> +				pthread_mutex_lock(&lpm_mutex);
>> 			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>> 					large_ldepth_route_table[j].depth,
>> 					next_hop_add) != 0) {
>> 				printf("Failed to add iteration %d, route#
>> %d\n",
>> 					i, j);
>> -				pthread_mutex_unlock(&lpm_mutex);
>> +				if (writer_id != SINGLE_WRITER)
>> +
>> 	pthread_mutex_unlock(&lpm_mutex);
>> 				return -1;
>> 			}
>> -			pthread_mutex_unlock(&lpm_mutex);
>> +			if (writer_id != SINGLE_WRITER)
>> +				pthread_mutex_unlock(&lpm_mutex);
>> 		}
>> 
>> 		/* Delete all the entries */
>> 		for (j = si; j < ei; j++) {
>> -			pthread_mutex_lock(&lpm_mutex);
>> +			if (writer_id != SINGLE_WRITER)
>> +				pthread_mutex_lock(&lpm_mutex);
>> 			if (rte_lpm_delete(lpm,
>> large_ldepth_route_table[j].ip,
>> 				large_ldepth_route_table[j].depth) != 0) {
>> 				printf("Failed to delete iteration %d, route#
>> %d\n",
>> 					i, j);
>> -				pthread_mutex_unlock(&lpm_mutex);
>> +				if (writer_id != SINGLE_WRITER)
>> +
>> 	pthread_mutex_unlock(&lpm_mutex);
>> 				return -1;
>> 			}
>> -			pthread_mutex_unlock(&lpm_mutex);
>> +			if (writer_id != SINGLE_WRITER)
>> +				pthread_mutex_unlock(&lpm_mutex);
>> 		}
>> 	}
>> 
>> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
>> 
>> /*
>>  * Functional test:
>> - * 2 writers, rest are readers
>> + * 1/2 writers, rest are readers
>>  */
>> static int
>> -test_lpm_rcu_perf_multi_writer(void)
>> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>> {
>> 	struct rte_lpm_config config;
>> 	size_t sz;
>> -	unsigned int i;
>> +	unsigned int i, j;
>> 	uint16_t core_id;
>> 	struct rte_lpm_rcu_config rcu_cfg = {0};
>> +	int (*reader_f)(void *arg) = NULL;
>> 
>> 	if (rte_lcore_count() < 3) {
>> 		printf("Not enough cores for lpm_rcu_perf_autotest,
>> expecting at least 3\n"); @@ -504,273 +522,76 @@
>> test_lpm_rcu_perf_multi_writer(void)
>> 		num_cores++;
>> 	}
>> 
>> -	printf("\nPerf test: 2 writers, %d readers, RCU integration
>> enabled\n",
>> -		num_cores - 2);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	/* Init RCU variable */
>> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
>> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> -						RTE_CACHE_LINE_SIZE);
>> -	rte_rcu_qsbr_init(rv, num_cores);
>> -
>> -	rcu_cfg.v = rv;
>> -	/* Assign the RCU variable to LPM */
>> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> -		printf("RCU variable assignment failed\n");
>> -		goto error;
>> -	}
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>> -
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Launch writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> -					(void *)(uintptr_t)i,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Wait for writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> -			goto error;
>> -
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> -		/ TOTAL_WRITES);
>> -
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>> -	rte_free(rv);
>> -	lpm = NULL;
>> -	rv = NULL;
>> -
>> -	/* Test without RCU integration */
>> -	printf("\nPerf test: 2 writers, %d readers, RCU integration
>> disabled\n",
>> -		num_cores - 2);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Launch writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> -					(void *)(uintptr_t)i,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Wait for writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> -			goto error;
>> -
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> -		/ TOTAL_WRITES);
>> -
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>> -
>> -	return 0;
>> -
>> -error:
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	rte_eal_mp_wait_lcore();
>> -
>> -	rte_lpm_free(lpm);
>> -	rte_free(rv);
>> -
>> -	return -1;
>> -}
>> -
>> -/*
>> - * Functional test:
>> - * Single writer, rest are readers
>> - */
>> -static int
>> -test_lpm_rcu_perf(void)
>> -{
>> -	struct rte_lpm_config config;
>> -	uint64_t begin, total_cycles;
>> -	size_t sz;
>> -	unsigned int i, j;
>> -	uint16_t core_id;
>> -	uint32_t next_hop_add = 0xAA;
>> -	struct rte_lpm_rcu_config rcu_cfg = {0};
>> -
>> -	if (rte_lcore_count() < 2) {
>> -		printf("Not enough cores for lpm_rcu_perf_autotest,
>> expecting at least 2\n");
>> -		return TEST_SKIPPED;
>> -	}
>> -
>> -	num_cores = 0;
>> -	RTE_LCORE_FOREACH_WORKER(core_id) {
>> -		enabled_core_ids[num_cores] = core_id;
>> -		num_cores++;
>> -	}
>> -
>> -	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
>> -		num_cores);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	/* Init RCU variable */
>> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
>> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> -						RTE_CACHE_LINE_SIZE);
>> -	rte_rcu_qsbr_init(rv, num_cores);
>> -
>> -	rcu_cfg.v = rv;
>> -	/* Assign the RCU variable to LPM */
>> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> -		printf("RCU variable assignment failed\n");
>> -		goto error;
>> -	}
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Measure add/delete. */
>> -	begin = rte_rdtsc_precise();
>> -	for (i = 0; i < RCU_ITERATIONS; i++) {
>> -		/* Add all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>> -					large_ldepth_route_table[j].depth,
>> -					next_hop_add) != 0) {
>> -				printf("Failed to add iteration %d, route#
>> %d\n",
>> -					i, j);
>> +	for (j = 1; j < 3; j++) {
>> +		if (use_rcu)
>> +			printf("\nPerf test: %d writer(s), %d reader(s),"
>> +			       " RCU integration enabled\n", j, num_cores - j);
>> +		else
>> +			printf("\nPerf test: %d writer(s), %d reader(s),"
>> +			       " RCU integration disabled\n", j, num_cores - j);
>> +
>> +		/* Create LPM table */
>> +		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> +		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> +		config.flags = 0;
>> +		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> +		TEST_LPM_ASSERT(lpm != NULL);
>> +
>> +		/* Init RCU variable */
>> +		if (use_rcu) {
>> +			sz = rte_rcu_qsbr_get_memsize(num_cores);
>> +			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> +
>> 	RTE_CACHE_LINE_SIZE);
>> +			rte_rcu_qsbr_init(rv, num_cores);
>> +
>> +			rcu_cfg.v = rv;
>> +			/* Assign the RCU variable to LPM */
>> +			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> +				printf("RCU variable assignment failed\n");
>> 				goto error;
>> 			}
>> 
>> -		/* Delete all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_delete(lpm,
>> large_ldepth_route_table[j].ip,
>> -				large_ldepth_route_table[j].depth) != 0) {
>> -				printf("Failed to delete iteration %d, route#
>> %d\n",
>> -					i, j);
>> -				goto error;
>> -			}
>> -	}
>> -	total_cycles = rte_rdtsc_precise() - begin;
>> +			reader_f = test_lpm_rcu_qsbr_reader;
>> +		} else
>> +			reader_f = test_lpm_reader;
>> 
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %g cycles\n",
>> -		(double)total_cycles / TOTAL_WRITES);
>> +		writer_done = 0;
>> +		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>> 
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 0; i < num_cores; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>> -	rte_free(rv);
>> -	lpm = NULL;
>> -	rv = NULL;
>> -
>> -	/* Test without RCU integration */
>> -	printf("\nPerf test: 1 writer, %d readers, RCU integration
>> disabled\n",
>> -		num_cores);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> +		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> 
>> -	writer_done = 0;
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> +		/* Launch reader threads */
>> +		for (i = j; i < num_cores; i++)
>> +			rte_eal_remote_launch(reader_f, NULL,
>> +						enabled_core_ids[i]);
>> 
>> -	/* Launch reader threads */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_reader, NULL,
>> -					enabled_core_ids[i]);
>> +		/* Launch writer threads */
>> +		for (i = 0; i < j; i++)
>> +			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> +						(void *)(uintptr_t)(i + j),
> This can be just 'j'?
> 
>> +						enabled_core_ids[i]);
>> 
>> -	/* Measure add/delete. */
>> -	begin = rte_rdtsc_precise();
>> -	for (i = 0; i < RCU_ITERATIONS; i++) {
>> -		/* Add all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>> -					large_ldepth_route_table[j].depth,
>> -					next_hop_add) != 0) {
>> -				printf("Failed to add iteration %d, route#
>> %d\n",
>> -					i, j);
>> +		/* Wait for writer threads */
>> +		for (i = 0; i < j; i++)
>> +			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> 				goto error;
>> -			}
>> 
>> -		/* Delete all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_delete(lpm,
>> large_ldepth_route_table[j].ip,
>> -				large_ldepth_route_table[j].depth) != 0) {
>> -				printf("Failed to delete iteration %d, route#
>> %d\n",
>> -					i, j);
>> -				goto error;
>> -			}
>> +		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> +		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> +		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> +			__atomic_load_n(&gwrite_cycles,
>> __ATOMIC_RELAXED)
>> +			/ TOTAL_WRITES);
>> +
>> +		writer_done = 1;
>> +		/* Wait until all readers have exited */
>> +		for (i = j; i < num_cores; i++)
>> +			rte_eal_wait_lcore(enabled_core_ids[i]);
>> +
>> +		rte_lpm_free(lpm);
>> +		rte_free(rv);
>> +		lpm = NULL;
>> +		rv = NULL;
>> 	}
>> -	total_cycles = rte_rdtsc_precise() - begin;
>> -
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %g cycles\n",
>> -		(double)total_cycles / TOTAL_WRITES);
>> -
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>> 
>> 	return 0;
>> 
>> @@ -946,9 +767,8 @@ test_lpm_perf(void)
>> 	rte_lpm_delete_all(lpm);
>> 	rte_lpm_free(lpm);
>> 
>> -	test_lpm_rcu_perf();
>> -
>> -	test_lpm_rcu_perf_multi_writer();
>> +	test_lpm_rcu_perf_multi_writer(0);
>> +	test_lpm_rcu_perf_multi_writer(1);
>> 
>> 	return 0;
>> }
>> --
>> 2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v2 2/4] test/lpm: return error on failure in rcu qsbr perf
  2020-11-03  1:28     ` Honnappa Nagarahalli
@ 2020-11-03  4:42       ` Dharmik Thakkar
  0 siblings, 0 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03  4:42 UTC (permalink / raw)
  To: Honnappa Nagarahalli
  Cc: Bruce Richardson, Vladimir Medvedkin, Gavin Hu, Ruifeng Wang,
	dev, nd, stable
> On Nov 2, 2020, at 7:28 PM, Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> 
> <snip>
> 
>> 
>> Return error if Add/Delete fail in multiwriter perf test
>> 
>> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
>> Cc: honnappa.nagarahalli@arm.com
>> Cc: stable@dpdk.org
>> 
>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>> ---
>> app/test/test_lpm_perf.c | 4 ++++
>> 1 file changed, 4 insertions(+)
>> 
>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
>> 45164b23214b..55084816ab91 100644
>> --- a/app/test/test_lpm_perf.c
>> +++ b/app/test/test_lpm_perf.c
>> @@ -453,6 +453,8 @@ test_lpm_rcu_qsbr_writer(void *arg)
>> 					next_hop_add) != 0) {
>> 				printf("Failed to add iteration %d, route#
>> %d\n",
>> 					i, j);
>> +				pthread_mutex_unlock(&lpm_mutex);
>> +				return -1;
> Would be good to use the "goto error" method used in this file in other functions.
Yes, will update in the next version.
> 
>> 			}
>> 			pthread_mutex_unlock(&lpm_mutex);
>> 		}
>> @@ -464,6 +466,8 @@ test_lpm_rcu_qsbr_writer(void *arg)
>> 				large_ldepth_route_table[j].depth) != 0) {
>> 				printf("Failed to delete iteration %d, route#
>> %d\n",
>> 					i, j);
>> +				pthread_mutex_unlock(&lpm_mutex);
>> +				return -1;
>> 			}
>> 			pthread_mutex_unlock(&lpm_mutex);
>> 		}
>> --
>> 2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v2 3/4] test/lpm: remove error checking in rcu qsbr perf
  2020-11-03  1:21     ` Honnappa Nagarahalli
@ 2020-11-03  4:56       ` Dharmik Thakkar
  0 siblings, 0 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03  4:56 UTC (permalink / raw)
  To: Honnappa Nagarahalli
  Cc: Bruce Richardson, Vladimir Medvedkin, Ruifeng Wang, Gavin Hu,
	dev, nd, stable
> On Nov 2, 2020, at 7:21 PM, Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> 
> <snip>
> 
>> 
>> Remove redundant error checking for reader threads since they never return
>> error.
>> 
>> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
>> Cc: honnappa.nagarahalli@arm.com
>> Cc: stable@dpdk.org
>> 
>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>> ---
>> app/test/test_lpm_perf.c | 21 ++++++++-------------
>> 1 file changed, 8 insertions(+), 13 deletions(-)
>> 
>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
>> 55084816ab91..224c92fa3d65 100644
>> --- a/app/test/test_lpm_perf.c
>> +++ b/app/test/test_lpm_perf.c
>> @@ -554,11 +554,10 @@ test_lpm_rcu_perf_multi_writer(void)
>> 		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> 		/ TOTAL_WRITES);
>> 
>> -	/* Wait and check return value from reader threads */
>> 	writer_done = 1;
>> +	/* Wait until all readers have exited */
>> 	for (i = 2; i < num_cores; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> -			goto error;
>> +		rte_eal_wait_lcore(enabled_core_ids[i]);
>> 
>> 	rte_lpm_free(lpm);
>> 	rte_free(rv);
>> @@ -603,10 +602,9 @@ test_lpm_rcu_perf_multi_writer(void)
>> 		/ TOTAL_WRITES);
>> 
>> 	writer_done = 1;
>> -	/* Wait and check return value from reader threads */
>> +	/* Wait until all readers have exited */
>> 	for (i = 2; i < num_cores; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> -			goto error;
>> +		rte_eal_wait_lcore(enabled_core_ids[i]);
>> 
>> 	rte_lpm_free(lpm);
>> 
>> @@ -710,10 +708,9 @@ test_lpm_rcu_perf(void)
>> 		(double)total_cycles / TOTAL_WRITES);
>> 
>> 	writer_done = 1;
>> -	/* Wait and check return value from reader threads */
>> +	/* Wait until all readers have exited */
>> 	for (i = 0; i < num_cores; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> -			goto error;
>> +		if (rte_eal_wait_lcore(enabled_core_ids[i]);
>                             ^^ Do we need the 'if' statement?
No, will remove in the next version.
>> 
>> 	rte_lpm_free(lpm);
>> 	rte_free(rv);
>> @@ -769,11 +766,9 @@ test_lpm_rcu_perf(void)
>> 		(double)total_cycles / TOTAL_WRITES);
>> 
>> 	writer_done = 1;
>> -	/* Wait and check return value from reader threads */
>> +	/* Wait until all readers have exited */
>> 	for (i = 0; i < num_cores; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> -			printf("Warning: lcore %u not finished.\n",
>> -				enabled_core_ids[i]);
>> +		rte_eal_wait_lcore(enabled_core_ids[i]);
>> 
>> 	rte_lpm_free(lpm);
>> 
>> --
>> 2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test
  2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
                     ` (3 preceding siblings ...)
  2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
@ 2020-11-03  5:12   ` Dharmik Thakkar
  2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
                       ` (4 more replies)
  4 siblings, 5 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03  5:12 UTC (permalink / raw)
  Cc: dev, nd, Dharmik Thakkar
Fix LPM adds, LPM deletes, and cycle calculation.
Return error if LPM add/delete fails in multi-writer test.
Remove redundant error checking for readers.
Combine single and multi threaded test cases to avoid code duplication.
---
v3:
 - Add 'goto error'
 - Remove unnecessary if statement
v2:
 - Add more details about the fix to the commit message
 - Replace hard coded values with an enum
 - Remove lock acquire/release for single writer
Dharmik Thakkar (4):
  test/lpm: fix cycle calculation in rcu qsbr perf
  test/lpm: return error on failure in rcu qsbr perf
  test/lpm: remove error checking in rcu qsbr perf
  test/lpm: avoid code duplication in rcu qsbr perf
 app/test/test_lpm_perf.c | 381 ++++++++++-----------------------------
 1 file changed, 94 insertions(+), 287 deletions(-)
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v3 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
  2020-11-03  5:12   ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
@ 2020-11-03  5:12     ` Dharmik Thakkar
  2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 2/4] test/lpm: return error on failure " Dharmik Thakkar
                       ` (3 subsequent siblings)
  4 siblings, 0 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03  5:12 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Gavin Hu,
	Honnappa Nagarahalli, Ruifeng Wang
  Cc: dev, nd, Dharmik Thakkar, stable
Fix incorrect calculations for LPM adds, LPM deletes,
and average cycles in RCU QSBR perf tests
Since, rcu qsbr tests run for 'RCU_ITERATIONS' and not
'ITERATIONS', replace 'ITERATIONS' with 'RCU_ITERATIONS'
for calculating adds, deletes, and cycles.
Also, for multi-writer perf test, each writer only writes
half of NUM_LDEPTH_ROUTE_ENTRIES.
For 2 writers, total adds (or deletes) should be
(RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of
(2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES).
Since, for both the single and multi writer tests, total adds/deletes
is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES),
this has been replaced with a macro 'TOTAL_WRITES' and furthermore,
'g_writes' has been removed since it is always a fixed value
equal to TOTAL_WRITES.
Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org
Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 app/test/test_lpm_perf.c | 45 ++++++++++++++--------------------------
 1 file changed, 16 insertions(+), 29 deletions(-)
diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index c5a238b9d1e8..45164b23214b 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -23,7 +23,6 @@ static struct rte_rcu_qsbr *rv;
 static volatile uint8_t writer_done;
 static volatile uint32_t thr_id;
 static uint64_t gwrite_cycles;
-static uint64_t gwrites;
 /* LPM APIs are not thread safe, use mutex to provide thread safety */
 static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
 
@@ -60,6 +59,8 @@ static uint32_t num_ldepth_route_entries;
 #define NUM_ROUTE_ENTRIES num_route_entries
 #define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries
 
+#define TOTAL_WRITES (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES)
+
 enum {
 	IP_CLASS_A,
 	IP_CLASS_B,
@@ -432,7 +433,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	uint8_t core_id = (uint8_t)((uintptr_t)arg);
 	uint32_t next_hop_add = 0xAA;
 
-	RTE_SET_USED(arg);
 	/* 2 writer threads are used */
 	if (core_id % 2 == 0) {
 		si = 0;
@@ -472,9 +472,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	total_cycles = rte_rdtsc_precise() - begin;
 
 	__atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED);
-	__atomic_fetch_add(&gwrites,
-			2 * NUM_LDEPTH_ROUTE_ENTRIES * RCU_ITERATIONS,
-			__ATOMIC_RELAXED);
 
 	return 0;
 }
@@ -528,7 +525,6 @@ test_lpm_rcu_perf_multi_writer(void)
 
 	writer_done = 0;
 	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
 
 	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
@@ -548,14 +544,11 @@ test_lpm_rcu_perf_multi_writer(void)
 		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 			goto error;
 
-	printf("Total LPM Adds: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
-			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
-		);
+		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+		/ TOTAL_WRITES);
 
 	/* Wait and check return value from reader threads */
 	writer_done = 1;
@@ -581,7 +574,6 @@ test_lpm_rcu_perf_multi_writer(void)
 
 	writer_done = 0;
 	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
 	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
 	/* Launch reader threads */
@@ -600,14 +592,11 @@ test_lpm_rcu_perf_multi_writer(void)
 		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 			goto error;
 
-	printf("Total LPM Adds: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
-			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
-		);
+		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+		/ TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
@@ -711,11 +700,10 @@ test_lpm_rcu_perf(void)
 	}
 	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
+		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
@@ -771,11 +759,10 @@ test_lpm_rcu_perf(void)
 	}
 	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
+		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v3 2/4] test/lpm: return error on failure in rcu qsbr perf
  2020-11-03  5:12   ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
@ 2020-11-03  5:12     ` Dharmik Thakkar
  2020-11-03  5:21       ` Honnappa Nagarahalli
  2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 3/4] test/lpm: remove error checking " Dharmik Thakkar
                       ` (2 subsequent siblings)
  4 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03  5:12 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Ruifeng Wang, Gavin Hu,
	Honnappa Nagarahalli
  Cc: dev, nd, Dharmik Thakkar, stable
Return error if Add/Delete fail in multiwriter perf test
Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org
Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 6 ++++++
 1 file changed, 6 insertions(+)
diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 45164b23214b..fc4c9b60cbbc 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -453,6 +453,7 @@ test_lpm_rcu_qsbr_writer(void *arg)
 					next_hop_add) != 0) {
 				printf("Failed to add iteration %d, route# %d\n",
 					i, j);
+				goto error;
 			}
 			pthread_mutex_unlock(&lpm_mutex);
 		}
@@ -464,6 +465,7 @@ test_lpm_rcu_qsbr_writer(void *arg)
 				large_ldepth_route_table[j].depth) != 0) {
 				printf("Failed to delete iteration %d, route# %d\n",
 					i, j);
+				goto error;
 			}
 			pthread_mutex_unlock(&lpm_mutex);
 		}
@@ -474,6 +476,10 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	__atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED);
 
 	return 0;
+
+error:
+	pthread_mutex_unlock(&lpm_mutex);
+	return -1;
 }
 
 /*
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v3 3/4] test/lpm: remove error checking in rcu qsbr perf
  2020-11-03  5:12   ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
  2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 2/4] test/lpm: return error on failure " Dharmik Thakkar
@ 2020-11-03  5:12     ` Dharmik Thakkar
  2020-11-03  5:22       ` Honnappa Nagarahalli
  2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
  2020-11-03 22:23     ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  4 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03  5:12 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Gavin Hu,
	Honnappa Nagarahalli, Ruifeng Wang
  Cc: dev, nd, Dharmik Thakkar, stable
Remove redundant error checking for reader threads
since they never return error.
Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org
Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)
diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index fc4c9b60cbbc..fa6ebc4f7547 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -556,11 +556,10 @@ test_lpm_rcu_perf_multi_writer(void)
 		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
 		/ TOTAL_WRITES);
 
-	/* Wait and check return value from reader threads */
 	writer_done = 1;
+	/* Wait until all readers have exited */
 	for (i = 2; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 	rte_free(rv);
@@ -605,10 +604,9 @@ test_lpm_rcu_perf_multi_writer(void)
 		/ TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 2; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 
@@ -712,10 +710,9 @@ test_lpm_rcu_perf(void)
 		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 	rte_free(rv);
@@ -771,11 +768,9 @@ test_lpm_rcu_perf(void)
 		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			printf("Warning: lcore %u not finished.\n",
-				enabled_core_ids[i]);
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v3 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-03  5:12   ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
                       ` (2 preceding siblings ...)
  2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 3/4] test/lpm: remove error checking " Dharmik Thakkar
@ 2020-11-03  5:12     ` Dharmik Thakkar
  2020-11-03 22:23     ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  4 siblings, 0 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03  5:12 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin; +Cc: dev, nd, Dharmik Thakkar
Avoid code duplication by combining single and multi threaded tests
Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 359 ++++++++++-----------------------------
 1 file changed, 89 insertions(+), 270 deletions(-)
diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index fa6ebc4f7547..147801634210 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -67,6 +67,12 @@ enum {
 	IP_CLASS_C
 };
 
+enum {
+	SINGLE_WRITER = 1,
+	MULTI_WRITER_1,
+	MULTI_WRITER_2
+};
+
 /* struct route_rule_count defines the total number of rules in following a/b/c
  * each item in a[]/b[]/c[] is the number of common IP address class A/B/C, not
  * including the ones for private local network.
@@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg)
 {
 	unsigned int i, j, si, ei;
 	uint64_t begin, total_cycles;
-	uint8_t core_id = (uint8_t)((uintptr_t)arg);
+	uint8_t writer_id = (uint8_t)((uintptr_t)arg);
 	uint32_t next_hop_add = 0xAA;
 
-	/* 2 writer threads are used */
-	if (core_id % 2 == 0) {
+	/* Single writer (writer_id = 1) */
+	if (writer_id == SINGLE_WRITER) {
+		si = 0;
+		ei = NUM_LDEPTH_ROUTE_ENTRIES;
+	}
+	/* 2 Writers (writer_id = 2/3)*/
+	else if (writer_id == MULTI_WRITER_1) {
 		si = 0;
 		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
 	} else {
@@ -447,7 +458,8 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	for (i = 0; i < RCU_ITERATIONS; i++) {
 		/* Add all the entries */
 		for (j = si; j < ei; j++) {
-			pthread_mutex_lock(&lpm_mutex);
+			if (writer_id != SINGLE_WRITER)
+				pthread_mutex_lock(&lpm_mutex);
 			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
 					large_ldepth_route_table[j].depth,
 					next_hop_add) != 0) {
@@ -455,19 +467,22 @@ test_lpm_rcu_qsbr_writer(void *arg)
 					i, j);
 				goto error;
 			}
-			pthread_mutex_unlock(&lpm_mutex);
+			if (writer_id != SINGLE_WRITER)
+				pthread_mutex_unlock(&lpm_mutex);
 		}
 
 		/* Delete all the entries */
 		for (j = si; j < ei; j++) {
-			pthread_mutex_lock(&lpm_mutex);
+			if (writer_id != SINGLE_WRITER)
+				pthread_mutex_lock(&lpm_mutex);
 			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
 				large_ldepth_route_table[j].depth) != 0) {
 				printf("Failed to delete iteration %d, route# %d\n",
 					i, j);
 				goto error;
 			}
-			pthread_mutex_unlock(&lpm_mutex);
+			if (writer_id != SINGLE_WRITER)
+				pthread_mutex_unlock(&lpm_mutex);
 		}
 	}
 
@@ -478,22 +493,24 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	return 0;
 
 error:
-	pthread_mutex_unlock(&lpm_mutex);
+	if (writer_id != SINGLE_WRITER)
+		pthread_mutex_unlock(&lpm_mutex);
 	return -1;
 }
 
 /*
  * Functional test:
- * 2 writers, rest are readers
+ * 1/2 writers, rest are readers
  */
 static int
-test_lpm_rcu_perf_multi_writer(void)
+test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
 {
 	struct rte_lpm_config config;
 	size_t sz;
-	unsigned int i;
+	unsigned int i, j;
 	uint16_t core_id;
 	struct rte_lpm_rcu_config rcu_cfg = {0};
+	int (*reader_f)(void *arg) = NULL;
 
 	if (rte_lcore_count() < 3) {
 		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
@@ -506,273 +523,76 @@ test_lpm_rcu_perf_multi_writer(void)
 		num_cores++;
 	}
 
-	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
-		num_cores - 2);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	/* Init RCU variable */
-	sz = rte_rcu_qsbr_get_memsize(num_cores);
-	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-						RTE_CACHE_LINE_SIZE);
-	rte_rcu_qsbr_init(rv, num_cores);
-
-	rcu_cfg.v = rv;
-	/* Assign the RCU variable to LPM */
-	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
-		printf("RCU variable assignment failed\n");
-		goto error;
-	}
-
-	writer_done = 0;
-	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Launch writer threads */
-	for (i = 0; i < 2; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-					(void *)(uintptr_t)i,
-					enabled_core_ids[i]);
-
-	/* Wait for writer threads */
-	for (i = 0; i < 2; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
-		/ TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-	lpm = NULL;
-	rv = NULL;
-
-	/* Test without RCU integration */
-	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
-		num_cores - 2);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	writer_done = 0;
-	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Launch writer threads */
-	for (i = 0; i < 2; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-					(void *)(uintptr_t)i,
-					enabled_core_ids[i]);
-
-	/* Wait for writer threads */
-	for (i = 0; i < 2; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
-		/ TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-
-	return 0;
-
-error:
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	rte_eal_mp_wait_lcore();
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-
-	return -1;
-}
-
-/*
- * Functional test:
- * Single writer, rest are readers
- */
-static int
-test_lpm_rcu_perf(void)
-{
-	struct rte_lpm_config config;
-	uint64_t begin, total_cycles;
-	size_t sz;
-	unsigned int i, j;
-	uint16_t core_id;
-	uint32_t next_hop_add = 0xAA;
-	struct rte_lpm_rcu_config rcu_cfg = {0};
-
-	if (rte_lcore_count() < 2) {
-		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
-		return TEST_SKIPPED;
-	}
-
-	num_cores = 0;
-	RTE_LCORE_FOREACH_WORKER(core_id) {
-		enabled_core_ids[num_cores] = core_id;
-		num_cores++;
-	}
-
-	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
-		num_cores);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	/* Init RCU variable */
-	sz = rte_rcu_qsbr_get_memsize(num_cores);
-	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-						RTE_CACHE_LINE_SIZE);
-	rte_rcu_qsbr_init(rv, num_cores);
-
-	rcu_cfg.v = rv;
-	/* Assign the RCU variable to LPM */
-	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
-		printf("RCU variable assignment failed\n");
-		goto error;
-	}
-
-	writer_done = 0;
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Measure add/delete. */
-	begin = rte_rdtsc_precise();
-	for (i = 0; i < RCU_ITERATIONS; i++) {
-		/* Add all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-					large_ldepth_route_table[j].depth,
-					next_hop_add) != 0) {
-				printf("Failed to add iteration %d, route# %d\n",
-					i, j);
+	for (j = 1; j < 3; j++) {
+		if (use_rcu)
+			printf("\nPerf test: %d writer(s), %d reader(s),"
+			       " RCU integration enabled\n", j, num_cores - j);
+		else
+			printf("\nPerf test: %d writer(s), %d reader(s),"
+			       " RCU integration disabled\n", j, num_cores - j);
+
+		/* Create LPM table */
+		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
+		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
+		config.flags = 0;
+		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
+		TEST_LPM_ASSERT(lpm != NULL);
+
+		/* Init RCU variable */
+		if (use_rcu) {
+			sz = rte_rcu_qsbr_get_memsize(num_cores);
+			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
+							RTE_CACHE_LINE_SIZE);
+			rte_rcu_qsbr_init(rv, num_cores);
+
+			rcu_cfg.v = rv;
+			/* Assign the RCU variable to LPM */
+			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
+				printf("RCU variable assignment failed\n");
 				goto error;
 			}
 
-		/* Delete all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-				large_ldepth_route_table[j].depth) != 0) {
-				printf("Failed to delete iteration %d, route# %d\n",
-					i, j);
-				goto error;
-			}
-	}
-	total_cycles = rte_rdtsc_precise() - begin;
+			reader_f = test_lpm_rcu_qsbr_reader;
+		} else
+			reader_f = test_lpm_reader;
 
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / TOTAL_WRITES);
+		writer_done = 0;
+		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
 
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-	lpm = NULL;
-	rv = NULL;
-
-	/* Test without RCU integration */
-	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
-		num_cores);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
+		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
-	writer_done = 0;
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
+		/* Launch reader threads */
+		for (i = j; i < num_cores; i++)
+			rte_eal_remote_launch(reader_f, NULL,
+						enabled_core_ids[i]);
 
-	/* Launch reader threads */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_reader, NULL,
-					enabled_core_ids[i]);
+		/* Launch writer threads */
+		for (i = 0; i < j; i++)
+			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
+						(void *)(uintptr_t)(i + j),
+						enabled_core_ids[i]);
 
-	/* Measure add/delete. */
-	begin = rte_rdtsc_precise();
-	for (i = 0; i < RCU_ITERATIONS; i++) {
-		/* Add all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-					large_ldepth_route_table[j].depth,
-					next_hop_add) != 0) {
-				printf("Failed to add iteration %d, route# %d\n",
-					i, j);
+		/* Wait for writer threads */
+		for (i = 0; i < j; i++)
+			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 				goto error;
-			}
 
-		/* Delete all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-				large_ldepth_route_table[j].depth) != 0) {
-				printf("Failed to delete iteration %d, route# %d\n",
-					i, j);
-				goto error;
-			}
+		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
+		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
+			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+			/ TOTAL_WRITES);
+
+		writer_done = 1;
+		/* Wait until all readers have exited */
+		for (i = j; i < num_cores; i++)
+			rte_eal_wait_lcore(enabled_core_ids[i]);
+
+		rte_lpm_free(lpm);
+		rte_free(rv);
+		lpm = NULL;
+		rv = NULL;
 	}
-	total_cycles = rte_rdtsc_precise() - begin;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
 
 	return 0;
 
@@ -948,9 +768,8 @@ test_lpm_perf(void)
 	rte_lpm_delete_all(lpm);
 	rte_lpm_free(lpm);
 
-	test_lpm_rcu_perf();
-
-	test_lpm_rcu_perf_multi_writer();
+	test_lpm_rcu_perf_multi_writer(0);
+	test_lpm_rcu_perf_multi_writer(1);
 
 	return 0;
 }
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v3 2/4] test/lpm: return error on failure in rcu qsbr perf
  2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 2/4] test/lpm: return error on failure " Dharmik Thakkar
@ 2020-11-03  5:21       ` Honnappa Nagarahalli
  0 siblings, 0 replies; 52+ messages in thread
From: Honnappa Nagarahalli @ 2020-11-03  5:21 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson, Vladimir Medvedkin,
	Ruifeng Wang, Gavin Hu
  Cc: dev, nd, Dharmik Thakkar, stable, Honnappa Nagarahalli, nd
<snip>
> 
> Return error if Add/Delete fail in multiwriter perf test
> 
> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
> Cc: honnappa.nagarahalli@arm.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Looks good
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
>  app/test/test_lpm_perf.c | 6 ++++++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
> 45164b23214b..fc4c9b60cbbc 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -453,6 +453,7 @@ test_lpm_rcu_qsbr_writer(void *arg)
>  					next_hop_add) != 0) {
>  				printf("Failed to add iteration %d, route#
> %d\n",
>  					i, j);
> +				goto error;
>  			}
>  			pthread_mutex_unlock(&lpm_mutex);
>  		}
> @@ -464,6 +465,7 @@ test_lpm_rcu_qsbr_writer(void *arg)
>  				large_ldepth_route_table[j].depth) != 0) {
>  				printf("Failed to delete iteration %d, route#
> %d\n",
>  					i, j);
> +				goto error;
>  			}
>  			pthread_mutex_unlock(&lpm_mutex);
>  		}
> @@ -474,6 +476,10 @@ test_lpm_rcu_qsbr_writer(void *arg)
>  	__atomic_fetch_add(&gwrite_cycles, total_cycles,
> __ATOMIC_RELAXED);
> 
>  	return 0;
> +
> +error:
> +	pthread_mutex_unlock(&lpm_mutex);
> +	return -1;
>  }
> 
>  /*
> --
> 2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v3 3/4] test/lpm: remove error checking in rcu qsbr perf
  2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 3/4] test/lpm: remove error checking " Dharmik Thakkar
@ 2020-11-03  5:22       ` Honnappa Nagarahalli
  0 siblings, 0 replies; 52+ messages in thread
From: Honnappa Nagarahalli @ 2020-11-03  5:22 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson, Vladimir Medvedkin, Gavin Hu,
	Ruifeng Wang
  Cc: dev, nd, Dharmik Thakkar, stable, Honnappa Nagarahalli, nd
> -----Original Message-----
> From: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Sent: Monday, November 2, 2020 11:12 PM
> To: Bruce Richardson <bruce.richardson@intel.com>; Vladimir Medvedkin
> <vladimir.medvedkin@intel.com>; Gavin Hu <Gavin.Hu@arm.com>;
> Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>; Dharmik Thakkar
> <Dharmik.Thakkar@arm.com>; stable@dpdk.org
> Subject: [PATCH v3 3/4] test/lpm: remove error checking in rcu qsbr perf
> 
> Remove redundant error checking for reader threads since they never return
> error.
> 
> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
> Cc: honnappa.nagarahalli@arm.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Looks good
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
>  app/test/test_lpm_perf.c | 21 ++++++++-------------
>  1 file changed, 8 insertions(+), 13 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
> fc4c9b60cbbc..fa6ebc4f7547 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -556,11 +556,10 @@ test_lpm_rcu_perf_multi_writer(void)
>  		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>  		/ TOTAL_WRITES);
> 
> -	/* Wait and check return value from reader threads */
>  	writer_done = 1;
> +	/* Wait until all readers have exited */
>  	for (i = 2; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> +		rte_eal_wait_lcore(enabled_core_ids[i]);
> 
>  	rte_lpm_free(lpm);
>  	rte_free(rv);
> @@ -605,10 +604,9 @@ test_lpm_rcu_perf_multi_writer(void)
>  		/ TOTAL_WRITES);
> 
>  	writer_done = 1;
> -	/* Wait and check return value from reader threads */
> +	/* Wait until all readers have exited */
>  	for (i = 2; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> +		rte_eal_wait_lcore(enabled_core_ids[i]);
> 
>  	rte_lpm_free(lpm);
> 
> @@ -712,10 +710,9 @@ test_lpm_rcu_perf(void)
>  		(double)total_cycles / TOTAL_WRITES);
> 
>  	writer_done = 1;
> -	/* Wait and check return value from reader threads */
> +	/* Wait until all readers have exited */
>  	for (i = 0; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> +		rte_eal_wait_lcore(enabled_core_ids[i]);
> 
>  	rte_lpm_free(lpm);
>  	rte_free(rv);
> @@ -771,11 +768,9 @@ test_lpm_rcu_perf(void)
>  		(double)total_cycles / TOTAL_WRITES);
> 
>  	writer_done = 1;
> -	/* Wait and check return value from reader threads */
> +	/* Wait until all readers have exited */
>  	for (i = 0; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			printf("Warning: lcore %u not finished.\n",
> -				enabled_core_ids[i]);
> +		rte_eal_wait_lcore(enabled_core_ids[i]);
> 
>  	rte_lpm_free(lpm);
> 
> --
> 2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-03  4:33       ` Dharmik Thakkar
@ 2020-11-03  5:32         ` Honnappa Nagarahalli
  2020-11-03 14:03           ` Dharmik Thakkar
  0 siblings, 1 reply; 52+ messages in thread
From: Honnappa Nagarahalli @ 2020-11-03  5:32 UTC (permalink / raw)
  To: Dharmik Thakkar
  Cc: Bruce Richardson, Vladimir Medvedkin, dev, nd, Honnappa Nagarahalli, nd
<snip>
> >>
> >> Avoid code duplication by combining single and multi threaded tests
> >>
> >> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> >> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> >> ---
> >> app/test/test_lpm_perf.c | 362
> >> ++++++++++-----------------------------
> >> 1 file changed, 91 insertions(+), 271 deletions(-)
> >>
> >> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
> >> index
> >> 224c92fa3d65..229c835c23f7 100644
> >> --- a/app/test/test_lpm_perf.c
> >> +++ b/app/test/test_lpm_perf.c
> >> @@ -67,6 +67,12 @@ enum {
> >> IP_CLASS_C
> >> };
> >>
> >> +enum {
> >> +SINGLE_WRITER = 1,
> >> +MULTI_WRITER_1,
> >> +MULTI_WRITER_2
> >> +};
> > Do we need this? Can we use the number of cores instead?
> >
> 
> There are 3 combinations of writes (adds/deletes):
> 1. Write all the entries - in case of a single writer 2. Write half of the entries -
> in case of multiple writers 3. Write remaining half of the entries - in case of
> multiple writers
> 
> So, I think this is required.
IMO, this is not scalable. Essentially, we need 2 parameters to divide the routes among each writer thread. We need 2 parameters, 1) total number of writers 2) the core ID in the linear space.
Creating a structure with these 2 and passing that to the writer thread would be better and scalable.
> 
> >> +
> >> /* struct route_rule_count defines the total number of rules in
> >> following a/b/c
> >>  * each item in a[]/b[]/c[] is the number of common IP address class
> >> A/B/C, not
> >>  * including the ones for private local network.
> >> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg)  {
> unsigned
> >> int i, j, si, ei; uint64_t begin, total_cycles; -uint8_t core_id =
> >> (uint8_t)((uintptr_t)arg);
> >> +uint8_t writer_id = (uint8_t)((uintptr_t)arg);
> >> uint32_t next_hop_add = 0xAA;
> >>
> >> -/* 2 writer threads are used */
> >> -if (core_id % 2 == 0) {
> >> +/* Single writer (writer_id = 1) */
> >> +if (writer_id == SINGLE_WRITER) {
> >> +si = 0;
> >> +ei = NUM_LDEPTH_ROUTE_ENTRIES;
> >> +}
> >> +/* 2 Writers (writer_id = 2/3)*/
> >> +else if (writer_id == MULTI_WRITER_1) {
> >> si = 0;
> >> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> >> } else {
> >> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg) for (i = 0;
> >> i < RCU_ITERATIONS; i++) {
> >> /* Add all the entries */
> >> for (j = si; j < ei; j++) {
> >> -pthread_mutex_lock(&lpm_mutex);
> >> +if (writer_id != SINGLE_WRITER)
> >> +pthread_mutex_lock(&lpm_mutex);
> >> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> >> large_ldepth_route_table[j].depth,
> >> next_hop_add) != 0) {
> >> printf("Failed to add iteration %d, route# %d\n", i, j);
> >> -pthread_mutex_unlock(&lpm_mutex);
> >> +if (writer_id != SINGLE_WRITER)
> >> +
> >> pthread_mutex_unlock(&lpm_mutex);
> >> return -1;
> >> }
> >> -pthread_mutex_unlock(&lpm_mutex);
> >> +if (writer_id != SINGLE_WRITER)
> >> +pthread_mutex_unlock(&lpm_mutex);
> >> }
> >>
> >> /* Delete all the entries */
> >> for (j = si; j < ei; j++) {
> >> -pthread_mutex_lock(&lpm_mutex);
> >> +if (writer_id != SINGLE_WRITER)
> >> +pthread_mutex_lock(&lpm_mutex);
> >> if (rte_lpm_delete(lpm,
> >> large_ldepth_route_table[j].ip,
> >> large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete
> >> iteration %d, route# %d\n", i, j); -pthread_mutex_unlock(&lpm_mutex);
> >> +if (writer_id != SINGLE_WRITER)
> >> +
> >> pthread_mutex_unlock(&lpm_mutex);
> >> return -1;
> >> }
> >> -pthread_mutex_unlock(&lpm_mutex);
> >> +if (writer_id != SINGLE_WRITER)
> >> +pthread_mutex_unlock(&lpm_mutex);
> >> }
> >> }
> >>
> >> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
> >>
> >> /*
> >>  * Functional test:
> >> - * 2 writers, rest are readers
> >> + * 1/2 writers, rest are readers
> >>  */
> >> static int
> >> -test_lpm_rcu_perf_multi_writer(void)
> >> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
> >> {
> >> struct rte_lpm_config config;
> >> size_t sz;
> >> -unsigned int i;
> >> +unsigned int i, j;
> >> uint16_t core_id;
> >> struct rte_lpm_rcu_config rcu_cfg = {0};
> >> +int (*reader_f)(void *arg) = NULL;
> >>
> >> if (rte_lcore_count() < 3) {
> >> printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
> >> least 3\n"); @@ -504,273 +522,76 @@
> >> test_lpm_rcu_perf_multi_writer(void)
> >> num_cores++;
> >> }
> >>
> >> -printf("\nPerf test: 2 writers, %d readers, RCU integration
> >> enabled\n", -num_cores - 2);
> >> -
> >> -/* Create LPM table */
> >> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> config.number_tbl8s =
> >> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >> -TEST_LPM_ASSERT(lpm != NULL);
> >> -
> >> -/* Init RCU variable */
> >> -sz = rte_rcu_qsbr_get_memsize(num_cores);
> >> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> >> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
> >> -
> >> -rcu_cfg.v = rv;
> >> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm,
> >> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto
> >> error; -}
> >> -
> >> -writer_done = 0;
> >> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> >> -
> >> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >> -
> >> -/* Launch reader threads */
> >> -for (i = 2; i < num_cores; i++)
> >> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> >> -enabled_core_ids[i]);
> >> -
> >> -/* Launch writer threads */
> >> -for (i = 0; i < 2; i++)
> >> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> >> -(void *)(uintptr_t)i,
> >> -enabled_core_ids[i]);
> >> -
> >> -/* Wait for writer threads */
> >> -for (i = 0; i < 2; i++)
> >> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
> >> -
> >> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
> >> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
> >> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
> >> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
> >> -
> >> -writer_done = 1;
> >> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores;
> >> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
> >> -
> >> -rte_lpm_free(lpm);
> >> -rte_free(rv);
> >> -lpm = NULL;
> >> -rv = NULL;
> >> -
> >> -/* Test without RCU integration */
> >> -printf("\nPerf test: 2 writers, %d readers, RCU integration
> >> disabled\n", -num_cores - 2);
> >> -
> >> -/* Create LPM table */
> >> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> config.number_tbl8s =
> >> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >> -TEST_LPM_ASSERT(lpm != NULL);
> >> -
> >> -writer_done = 0;
> >> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> >> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >> -
> >> -/* Launch reader threads */
> >> -for (i = 2; i < num_cores; i++)
> >> -rte_eal_remote_launch(test_lpm_reader, NULL, -enabled_core_ids[i]);
> >> -
> >> -/* Launch writer threads */
> >> -for (i = 0; i < 2; i++)
> >> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> >> -(void *)(uintptr_t)i,
> >> -enabled_core_ids[i]);
> >> -
> >> -/* Wait for writer threads */
> >> -for (i = 0; i < 2; i++)
> >> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
> >> -
> >> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
> >> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
> >> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
> >> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
> >> -
> >> -writer_done = 1;
> >> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores;
> >> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
> >> -
> >> -rte_lpm_free(lpm);
> >> -
> >> -return 0;
> >> -
> >> -error:
> >> -writer_done = 1;
> >> -/* Wait until all readers have exited */ -rte_eal_mp_wait_lcore();
> >> -
> >> -rte_lpm_free(lpm);
> >> -rte_free(rv);
> >> -
> >> -return -1;
> >> -}
> >> -
> >> -/*
> >> - * Functional test:
> >> - * Single writer, rest are readers
> >> - */
> >> -static int
> >> -test_lpm_rcu_perf(void)
> >> -{
> >> -struct rte_lpm_config config;
> >> -uint64_t begin, total_cycles;
> >> -size_t sz;
> >> -unsigned int i, j;
> >> -uint16_t core_id;
> >> -uint32_t next_hop_add = 0xAA;
> >> -struct rte_lpm_rcu_config rcu_cfg = {0};
> >> -
> >> -if (rte_lcore_count() < 2) {
> >> -printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
> >> least 2\n"); -return TEST_SKIPPED; -}
> >> -
> >> -num_cores = 0;
> >> -RTE_LCORE_FOREACH_WORKER(core_id) {
> >> -enabled_core_ids[num_cores] = core_id; -num_cores++; -}
> >> -
> >> -printf("\nPerf test: 1 writer, %d readers, RCU integration
> >> enabled\n", -num_cores);
> >> -
> >> -/* Create LPM table */
> >> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> config.number_tbl8s =
> >> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >> -TEST_LPM_ASSERT(lpm != NULL);
> >> -
> >> -/* Init RCU variable */
> >> -sz = rte_rcu_qsbr_get_memsize(num_cores);
> >> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> >> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
> >> -
> >> -rcu_cfg.v = rv;
> >> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm,
> >> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto
> >> error; -}
> >> -
> >> -writer_done = 0;
> >> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >> -
> >> -/* Launch reader threads */
> >> -for (i = 0; i < num_cores; i++)
> >> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> >> -enabled_core_ids[i]);
> >> -
> >> -/* Measure add/delete. */
> >> -begin = rte_rdtsc_precise();
> >> -for (i = 0; i < RCU_ITERATIONS; i++) {
> >> -/* Add all the entries */
> >> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if (rte_lpm_add(lpm,
> >> large_ldepth_route_table[j].ip, -large_ldepth_route_table[j].depth,
> >> -next_hop_add) != 0) {
> >> -printf("Failed to add iteration %d, route# %d\n", -i, j);
> >> +for (j = 1; j < 3; j++) {
> >> +if (use_rcu)
> >> +printf("\nPerf test: %d writer(s), %d reader(s),"
> >> +       " RCU integration enabled\n", j, num_cores - j); else
> >> +printf("\nPerf test: %d writer(s), %d reader(s),"
> >> +       " RCU integration disabled\n", j, num_cores - j);
> >> +
> >> +/* Create LPM table */
> >> +config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> config.number_tbl8s =
> >> +NUM_LDEPTH_ROUTE_ENTRIES; config.flags = 0; lpm =
> >> +rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >> +TEST_LPM_ASSERT(lpm != NULL);
> >> +
> >> +/* Init RCU variable */
> >> +if (use_rcu) {
> >> +sz = rte_rcu_qsbr_get_memsize(num_cores);
> >> +rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> >> +
> >> RTE_CACHE_LINE_SIZE);
> >> +rte_rcu_qsbr_init(rv, num_cores);
> >> +
> >> +rcu_cfg.v = rv;
> >> +/* Assign the RCU variable to LPM */ if (rte_lpm_rcu_qsbr_add(lpm,
> >> +&rcu_cfg) != 0) { printf("RCU variable assignment failed\n");
> >> goto error;
> >> }
> >>
> >> -/* Delete all the entries */
> >> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if
> >> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> >> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to delete
> >> iteration %d, route# %d\n", -i, j); -goto error; -} -} -total_cycles
> >> = rte_rdtsc_precise() - begin;
> >> +reader_f = test_lpm_rcu_qsbr_reader; } else reader_f =
> >> +test_lpm_reader;
> >>
> >> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
> >> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g
> >> cycles\n", -(double)total_cycles / TOTAL_WRITES);
> >> +writer_done = 0;
> >> +__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> >>
> >> -writer_done = 1;
> >> -/* Wait until all readers have exited */ -for (i = 0; i < num_cores;
> >> i++) -if (rte_eal_wait_lcore(enabled_core_ids[i]);
> >> -
> >> -rte_lpm_free(lpm);
> >> -rte_free(rv);
> >> -lpm = NULL;
> >> -rv = NULL;
> >> -
> >> -/* Test without RCU integration */
> >> -printf("\nPerf test: 1 writer, %d readers, RCU integration
> >> disabled\n", -num_cores);
> >> -
> >> -/* Create LPM table */
> >> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> config.number_tbl8s =
> >> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >> -TEST_LPM_ASSERT(lpm != NULL);
> >> +__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >>
> >> -writer_done = 0;
> >> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >> +/* Launch reader threads */
> >> +for (i = j; i < num_cores; i++)
> >> +rte_eal_remote_launch(reader_f, NULL,
> >> +enabled_core_ids[i]);
> >>
> >> -/* Launch reader threads */
> >> -for (i = 0; i < num_cores; i++)
> >> -rte_eal_remote_launch(test_lpm_reader, NULL,
> >> -enabled_core_ids[i]);
> >> +/* Launch writer threads */
> >> +for (i = 0; i < j; i++)
> >> +rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> >> +(void *)(uintptr_t)(i + j),
> > This can be just 'j'?
> >
> >> +enabled_core_ids[i]);
> >>
> >> -/* Measure add/delete. */
> >> -begin = rte_rdtsc_precise();
> >> -for (i = 0; i < RCU_ITERATIONS; i++) {
> >> -/* Add all the entries */
> >> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> >> -if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> >> -large_ldepth_route_table[j].depth,
> >> -next_hop_add) != 0) {
> >> -printf("Failed to add iteration %d, route#
> >> %d\n",
> >> -i, j);
> >> +/* Wait for writer threads */
> >> +for (i = 0; i < j; i++)
> >> +if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> >> goto error;
> >> -}
> >>
> >> -/* Delete all the entries */
> >> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> >> -if (rte_lpm_delete(lpm,
> >> large_ldepth_route_table[j].ip,
> >> -large_ldepth_route_table[j].depth) != 0) {
> >> -printf("Failed to delete iteration %d, route#
> >> %d\n",
> >> -i, j);
> >> -goto error;
> >> -}
> >> +printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> >> +printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> >> +printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> >> +__atomic_load_n(&gwrite_cycles,
> >> __ATOMIC_RELAXED)
> >> +/ TOTAL_WRITES);
> >> +
> >> +writer_done = 1;
> >> +/* Wait until all readers have exited */
> >> +for (i = j; i < num_cores; i++)
> >> +rte_eal_wait_lcore(enabled_core_ids[i]);
> >> +
> >> +rte_lpm_free(lpm);
> >> +rte_free(rv);
> >> +lpm = NULL;
> >> +rv = NULL;
> >> }
> >> -total_cycles = rte_rdtsc_precise() - begin;
> >> -
> >> -printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> >> -printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> >> -printf("Average LPM Add/Del: %g cycles\n",
> >> -(double)total_cycles / TOTAL_WRITES);
> >> -
> >> -writer_done = 1;
> >> -/* Wait until all readers have exited */
> >> -for (i = 0; i < num_cores; i++)
> >> -rte_eal_wait_lcore(enabled_core_ids[i]);
> >> -
> >> -rte_lpm_free(lpm);
> >>
> >> return 0;
> >>
> >> @@ -946,9 +767,8 @@ test_lpm_perf(void)
> >> rte_lpm_delete_all(lpm);
> >> rte_lpm_free(lpm);
> >>
> >> -test_lpm_rcu_perf();
> >> -
> >> -test_lpm_rcu_perf_multi_writer();
> >> +test_lpm_rcu_perf_multi_writer(0);
> >> +test_lpm_rcu_perf_multi_writer(1);
> >>
> >> return 0;
> >> }
> >> --
> >> 2.17.1
> 
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-03  5:32         ` Honnappa Nagarahalli
@ 2020-11-03 14:03           ` Dharmik Thakkar
  2020-11-03 14:51             ` Honnappa Nagarahalli
  2020-11-03 18:01             ` Medvedkin, Vladimir
  0 siblings, 2 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03 14:03 UTC (permalink / raw)
  To: Honnappa Nagarahalli; +Cc: Bruce Richardson, Vladimir Medvedkin, dev, nd
> On Nov 2, 2020, at 11:32 PM, Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> 
> <snip>
> 
>>>> 
>>>> Avoid code duplication by combining single and multi threaded tests
>>>> 
>>>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>>>> ---
>>>> app/test/test_lpm_perf.c | 362
>>>> ++++++++++-----------------------------
>>>> 1 file changed, 91 insertions(+), 271 deletions(-)
>>>> 
>>>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
>>>> index
>>>> 224c92fa3d65..229c835c23f7 100644
>>>> --- a/app/test/test_lpm_perf.c
>>>> +++ b/app/test/test_lpm_perf.c
>>>> @@ -67,6 +67,12 @@ enum {
>>>> IP_CLASS_C
>>>> };
>>>> 
>>>> +enum {
>>>> +SINGLE_WRITER = 1,
>>>> +MULTI_WRITER_1,
>>>> +MULTI_WRITER_2
>>>> +};
>>> Do we need this? Can we use the number of cores instead?
>>> 
>> 
>> There are 3 combinations of writes (adds/deletes):
>> 1. Write all the entries - in case of a single writer 2. Write half of the entries -
>> in case of multiple writers 3. Write remaining half of the entries - in case of
>> multiple writers
>> 
>> So, I think this is required.
> IMO, this is not scalable. Essentially, we need 2 parameters to divide the routes among each writer thread. We need 2 parameters, 1) total number of writers 2) the core ID in the linear space.
> Creating a structure with these 2 and passing that to the writer thread would be better and scalable.
Yes, agreed this is only applicable for 2 writers. Currently, the multi writer test is only limited to a maximum of 2 writers.
To support more number of writers, we need something like this (which I believe is in lines with your suggestion):
1. Calculate what each writer will write: single_insert = TOTAL_WRITES / num_writers
2. Pass core ID in linear space as an argument to the writer function: pos_core
3. Calculate si and ei in the writer function: si = pos_core * single_insert; ei = si + single_insert
I can update the patch to enable more than 2 writers.
Do you also suggest we expand the scope of the test to test with more than 2 writers?
This will increase the time for which the test is running (which currently is significant even with 2 writers).
> 
>> 
>>>> +
>>>> /* struct route_rule_count defines the total number of rules in
>>>> following a/b/c
>>>> * each item in a[]/b[]/c[] is the number of common IP address class
>>>> A/B/C, not
>>>> * including the ones for private local network.
>>>> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg)  {
>> unsigned
>>>> int i, j, si, ei; uint64_t begin, total_cycles; -uint8_t core_id =
>>>> (uint8_t)((uintptr_t)arg);
>>>> +uint8_t writer_id = (uint8_t)((uintptr_t)arg);
>>>> uint32_t next_hop_add = 0xAA;
>>>> 
>>>> -/* 2 writer threads are used */
>>>> -if (core_id % 2 == 0) {
>>>> +/* Single writer (writer_id = 1) */
>>>> +if (writer_id == SINGLE_WRITER) {
>>>> +si = 0;
>>>> +ei = NUM_LDEPTH_ROUTE_ENTRIES;
>>>> +}
>>>> +/* 2 Writers (writer_id = 2/3)*/
>>>> +else if (writer_id == MULTI_WRITER_1) {
>>>> si = 0;
>>>> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
>>>> } else {
>>>> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg) for (i = 0;
>>>> i < RCU_ITERATIONS; i++) {
>>>> /* Add all the entries */
>>>> for (j = si; j < ei; j++) {
>>>> -pthread_mutex_lock(&lpm_mutex);
>>>> +if (writer_id != SINGLE_WRITER)
>>>> +pthread_mutex_lock(&lpm_mutex);
>>>> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>>>> large_ldepth_route_table[j].depth,
>>>> next_hop_add) != 0) {
>>>> printf("Failed to add iteration %d, route# %d\n", i, j);
>>>> -pthread_mutex_unlock(&lpm_mutex);
>>>> +if (writer_id != SINGLE_WRITER)
>>>> +
>>>> pthread_mutex_unlock(&lpm_mutex);
>>>> return -1;
>>>> }
>>>> -pthread_mutex_unlock(&lpm_mutex);
>>>> +if (writer_id != SINGLE_WRITER)
>>>> +pthread_mutex_unlock(&lpm_mutex);
>>>> }
>>>> 
>>>> /* Delete all the entries */
>>>> for (j = si; j < ei; j++) {
>>>> -pthread_mutex_lock(&lpm_mutex);
>>>> +if (writer_id != SINGLE_WRITER)
>>>> +pthread_mutex_lock(&lpm_mutex);
>>>> if (rte_lpm_delete(lpm,
>>>> large_ldepth_route_table[j].ip,
>>>> large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete
>>>> iteration %d, route# %d\n", i, j); -pthread_mutex_unlock(&lpm_mutex);
>>>> +if (writer_id != SINGLE_WRITER)
>>>> +
>>>> pthread_mutex_unlock(&lpm_mutex);
>>>> return -1;
>>>> }
>>>> -pthread_mutex_unlock(&lpm_mutex);
>>>> +if (writer_id != SINGLE_WRITER)
>>>> +pthread_mutex_unlock(&lpm_mutex);
>>>> }
>>>> }
>>>> 
>>>> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
>>>> 
>>>> /*
>>>> * Functional test:
>>>> - * 2 writers, rest are readers
>>>> + * 1/2 writers, rest are readers
>>>> */
>>>> static int
>>>> -test_lpm_rcu_perf_multi_writer(void)
>>>> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>>>> {
>>>> struct rte_lpm_config config;
>>>> size_t sz;
>>>> -unsigned int i;
>>>> +unsigned int i, j;
>>>> uint16_t core_id;
>>>> struct rte_lpm_rcu_config rcu_cfg = {0};
>>>> +int (*reader_f)(void *arg) = NULL;
>>>> 
>>>> if (rte_lcore_count() < 3) {
>>>> printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
>>>> least 3\n"); @@ -504,273 +522,76 @@
>>>> test_lpm_rcu_perf_multi_writer(void)
>>>> num_cores++;
>>>> }
>>>> 
>>>> -printf("\nPerf test: 2 writers, %d readers, RCU integration
>>>> enabled\n", -num_cores - 2);
>>>> -
>>>> -/* Create LPM table */
>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>> config.number_tbl8s =
>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>> -
>>>> -/* Init RCU variable */
>>>> -sz = rte_rcu_qsbr_get_memsize(num_cores);
>>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
>>>> -
>>>> -rcu_cfg.v = rv;
>>>> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm,
>>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto
>>>> error; -}
>>>> -
>>>> -writer_done = 0;
>>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>>> -
>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>> -
>>>> -/* Launch reader threads */
>>>> -for (i = 2; i < num_cores; i++)
>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>>>> -enabled_core_ids[i]);
>>>> -
>>>> -/* Launch writer threads */
>>>> -for (i = 0; i < 2; i++)
>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>>>> -(void *)(uintptr_t)i,
>>>> -enabled_core_ids[i]);
>>>> -
>>>> -/* Wait for writer threads */
>>>> -for (i = 0; i < 2; i++)
>>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
>>>> -
>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
>>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
>>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
>>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
>>>> -
>>>> -writer_done = 1;
>>>> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores;
>>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
>>>> -
>>>> -rte_lpm_free(lpm);
>>>> -rte_free(rv);
>>>> -lpm = NULL;
>>>> -rv = NULL;
>>>> -
>>>> -/* Test without RCU integration */
>>>> -printf("\nPerf test: 2 writers, %d readers, RCU integration
>>>> disabled\n", -num_cores - 2);
>>>> -
>>>> -/* Create LPM table */
>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>> config.number_tbl8s =
>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>> -
>>>> -writer_done = 0;
>>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>> -
>>>> -/* Launch reader threads */
>>>> -for (i = 2; i < num_cores; i++)
>>>> -rte_eal_remote_launch(test_lpm_reader, NULL, -enabled_core_ids[i]);
>>>> -
>>>> -/* Launch writer threads */
>>>> -for (i = 0; i < 2; i++)
>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>>>> -(void *)(uintptr_t)i,
>>>> -enabled_core_ids[i]);
>>>> -
>>>> -/* Wait for writer threads */
>>>> -for (i = 0; i < 2; i++)
>>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
>>>> -
>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
>>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
>>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
>>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
>>>> -
>>>> -writer_done = 1;
>>>> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores;
>>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
>>>> -
>>>> -rte_lpm_free(lpm);
>>>> -
>>>> -return 0;
>>>> -
>>>> -error:
>>>> -writer_done = 1;
>>>> -/* Wait until all readers have exited */ -rte_eal_mp_wait_lcore();
>>>> -
>>>> -rte_lpm_free(lpm);
>>>> -rte_free(rv);
>>>> -
>>>> -return -1;
>>>> -}
>>>> -
>>>> -/*
>>>> - * Functional test:
>>>> - * Single writer, rest are readers
>>>> - */
>>>> -static int
>>>> -test_lpm_rcu_perf(void)
>>>> -{
>>>> -struct rte_lpm_config config;
>>>> -uint64_t begin, total_cycles;
>>>> -size_t sz;
>>>> -unsigned int i, j;
>>>> -uint16_t core_id;
>>>> -uint32_t next_hop_add = 0xAA;
>>>> -struct rte_lpm_rcu_config rcu_cfg = {0};
>>>> -
>>>> -if (rte_lcore_count() < 2) {
>>>> -printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
>>>> least 2\n"); -return TEST_SKIPPED; -}
>>>> -
>>>> -num_cores = 0;
>>>> -RTE_LCORE_FOREACH_WORKER(core_id) {
>>>> -enabled_core_ids[num_cores] = core_id; -num_cores++; -}
>>>> -
>>>> -printf("\nPerf test: 1 writer, %d readers, RCU integration
>>>> enabled\n", -num_cores);
>>>> -
>>>> -/* Create LPM table */
>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>> config.number_tbl8s =
>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>> -
>>>> -/* Init RCU variable */
>>>> -sz = rte_rcu_qsbr_get_memsize(num_cores);
>>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
>>>> -
>>>> -rcu_cfg.v = rv;
>>>> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm,
>>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto
>>>> error; -}
>>>> -
>>>> -writer_done = 0;
>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>> -
>>>> -/* Launch reader threads */
>>>> -for (i = 0; i < num_cores; i++)
>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>>>> -enabled_core_ids[i]);
>>>> -
>>>> -/* Measure add/delete. */
>>>> -begin = rte_rdtsc_precise();
>>>> -for (i = 0; i < RCU_ITERATIONS; i++) {
>>>> -/* Add all the entries */
>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if (rte_lpm_add(lpm,
>>>> large_ldepth_route_table[j].ip, -large_ldepth_route_table[j].depth,
>>>> -next_hop_add) != 0) {
>>>> -printf("Failed to add iteration %d, route# %d\n", -i, j);
>>>> +for (j = 1; j < 3; j++) {
>>>> +if (use_rcu)
>>>> +printf("\nPerf test: %d writer(s), %d reader(s),"
>>>> +       " RCU integration enabled\n", j, num_cores - j); else
>>>> +printf("\nPerf test: %d writer(s), %d reader(s),"
>>>> +       " RCU integration disabled\n", j, num_cores - j);
>>>> +
>>>> +/* Create LPM table */
>>>> +config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> config.number_tbl8s =
>>>> +NUM_LDEPTH_ROUTE_ENTRIES; config.flags = 0; lpm =
>>>> +rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>> +TEST_LPM_ASSERT(lpm != NULL);
>>>> +
>>>> +/* Init RCU variable */
>>>> +if (use_rcu) {
>>>> +sz = rte_rcu_qsbr_get_memsize(num_cores);
>>>> +rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>>>> +
>>>> RTE_CACHE_LINE_SIZE);
>>>> +rte_rcu_qsbr_init(rv, num_cores);
>>>> +
>>>> +rcu_cfg.v = rv;
>>>> +/* Assign the RCU variable to LPM */ if (rte_lpm_rcu_qsbr_add(lpm,
>>>> +&rcu_cfg) != 0) { printf("RCU variable assignment failed\n");
>>>> goto error;
>>>> }
>>>> 
>>>> -/* Delete all the entries */
>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if
>>>> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>>>> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to delete
>>>> iteration %d, route# %d\n", -i, j); -goto error; -} -} -total_cycles
>>>> = rte_rdtsc_precise() - begin;
>>>> +reader_f = test_lpm_rcu_qsbr_reader; } else reader_f =
>>>> +test_lpm_reader;
>>>> 
>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
>>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g
>>>> cycles\n", -(double)total_cycles / TOTAL_WRITES);
>>>> +writer_done = 0;
>>>> +__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>>> 
>>>> -writer_done = 1;
>>>> -/* Wait until all readers have exited */ -for (i = 0; i < num_cores;
>>>> i++) -if (rte_eal_wait_lcore(enabled_core_ids[i]);
>>>> -
>>>> -rte_lpm_free(lpm);
>>>> -rte_free(rv);
>>>> -lpm = NULL;
>>>> -rv = NULL;
>>>> -
>>>> -/* Test without RCU integration */
>>>> -printf("\nPerf test: 1 writer, %d readers, RCU integration
>>>> disabled\n", -num_cores);
>>>> -
>>>> -/* Create LPM table */
>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>> config.number_tbl8s =
>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>> +__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>> 
>>>> -writer_done = 0;
>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>> +/* Launch reader threads */
>>>> +for (i = j; i < num_cores; i++)
>>>> +rte_eal_remote_launch(reader_f, NULL,
>>>> +enabled_core_ids[i]);
>>>> 
>>>> -/* Launch reader threads */
>>>> -for (i = 0; i < num_cores; i++)
>>>> -rte_eal_remote_launch(test_lpm_reader, NULL,
>>>> -enabled_core_ids[i]);
>>>> +/* Launch writer threads */
>>>> +for (i = 0; i < j; i++)
>>>> +rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>>>> +(void *)(uintptr_t)(i + j),
>>> This can be just 'j'?
>>> 
>>>> +enabled_core_ids[i]);
>>>> 
>>>> -/* Measure add/delete. */
>>>> -begin = rte_rdtsc_precise();
>>>> -for (i = 0; i < RCU_ITERATIONS; i++) {
>>>> -/* Add all the entries */
>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>>>> -if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>>>> -large_ldepth_route_table[j].depth,
>>>> -next_hop_add) != 0) {
>>>> -printf("Failed to add iteration %d, route#
>>>> %d\n",
>>>> -i, j);
>>>> +/* Wait for writer threads */
>>>> +for (i = 0; i < j; i++)
>>>> +if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>>>> goto error;
>>>> -}
>>>> 
>>>> -/* Delete all the entries */
>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>>>> -if (rte_lpm_delete(lpm,
>>>> large_ldepth_route_table[j].ip,
>>>> -large_ldepth_route_table[j].depth) != 0) {
>>>> -printf("Failed to delete iteration %d, route#
>>>> %d\n",
>>>> -i, j);
>>>> -goto error;
>>>> -}
>>>> +printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>>>> +printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>>>> +printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>>>> +__atomic_load_n(&gwrite_cycles,
>>>> __ATOMIC_RELAXED)
>>>> +/ TOTAL_WRITES);
>>>> +
>>>> +writer_done = 1;
>>>> +/* Wait until all readers have exited */
>>>> +for (i = j; i < num_cores; i++)
>>>> +rte_eal_wait_lcore(enabled_core_ids[i]);
>>>> +
>>>> +rte_lpm_free(lpm);
>>>> +rte_free(rv);
>>>> +lpm = NULL;
>>>> +rv = NULL;
>>>> }
>>>> -total_cycles = rte_rdtsc_precise() - begin;
>>>> -
>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>>>> -printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>>>> -printf("Average LPM Add/Del: %g cycles\n",
>>>> -(double)total_cycles / TOTAL_WRITES);
>>>> -
>>>> -writer_done = 1;
>>>> -/* Wait until all readers have exited */
>>>> -for (i = 0; i < num_cores; i++)
>>>> -rte_eal_wait_lcore(enabled_core_ids[i]);
>>>> -
>>>> -rte_lpm_free(lpm);
>>>> 
>>>> return 0;
>>>> 
>>>> @@ -946,9 +767,8 @@ test_lpm_perf(void)
>>>> rte_lpm_delete_all(lpm);
>>>> rte_lpm_free(lpm);
>>>> 
>>>> -test_lpm_rcu_perf();
>>>> -
>>>> -test_lpm_rcu_perf_multi_writer();
>>>> +test_lpm_rcu_perf_multi_writer(0);
>>>> +test_lpm_rcu_perf_multi_writer(1);
>>>> 
>>>> return 0;
>>>> }
>>>> --
>>>> 2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-03 14:03           ` Dharmik Thakkar
@ 2020-11-03 14:51             ` Honnappa Nagarahalli
  2020-11-03 18:01             ` Medvedkin, Vladimir
  1 sibling, 0 replies; 52+ messages in thread
From: Honnappa Nagarahalli @ 2020-11-03 14:51 UTC (permalink / raw)
  To: Dharmik Thakkar
  Cc: Bruce Richardson, Vladimir Medvedkin, dev, nd, Honnappa Nagarahalli, nd
<snip>
> >>>>
> >>>> Avoid code duplication by combining single and multi threaded tests
> >>>>
> >>>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> >>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> >>>> ---
> >>>> app/test/test_lpm_perf.c | 362
> >>>> ++++++++++-----------------------------
> >>>> 1 file changed, 91 insertions(+), 271 deletions(-)
> >>>>
> >>>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
> >>>> index
> >>>> 224c92fa3d65..229c835c23f7 100644
> >>>> --- a/app/test/test_lpm_perf.c
> >>>> +++ b/app/test/test_lpm_perf.c
> >>>> @@ -67,6 +67,12 @@ enum {
> >>>> IP_CLASS_C
> >>>> };
> >>>>
> >>>> +enum {
> >>>> +SINGLE_WRITER = 1,
> >>>> +MULTI_WRITER_1,
> >>>> +MULTI_WRITER_2
> >>>> +};
> >>> Do we need this? Can we use the number of cores instead?
> >>>
> >>
> >> There are 3 combinations of writes (adds/deletes):
> >> 1. Write all the entries - in case of a single writer 2. Write half
> >> of the entries - in case of multiple writers 3. Write remaining half
> >> of the entries - in case of multiple writers
> >>
> >> So, I think this is required.
> > IMO, this is not scalable. Essentially, we need 2 parameters to divide the
> routes among each writer thread. We need 2 parameters, 1) total number of
> writers 2) the core ID in the linear space.
> > Creating a structure with these 2 and passing that to the writer thread
> would be better and scalable.
> 
> Yes, agreed this is only applicable for 2 writers. Currently, the multi writer
> test is only limited to a maximum of 2 writers.
> To support more number of writers, we need something like this (which I
> believe is in lines with your suggestion):
> 1. Calculate what each writer will write: single_insert = TOTAL_WRITES /
> num_writers 2. Pass core ID in linear space as an argument to the writer
> function: pos_core 3. Calculate si and ei in the writer function: si = pos_core *
> single_insert; ei = si + single_insert
> 
> I can update the patch to enable more than 2 writers.
> Do you also suggest we expand the scope of the test to test with more than
> 2 writers?
> This will increase the time for which the test is running (which currently is
> significant even with 2 writers).
Agree, no to increasing the number of writers. Yes for making the code more generic.
> 
> >
> >>
> >>>> +
> >>>> /* struct route_rule_count defines the total number of rules in
> >>>> following a/b/c
> >>>> * each item in a[]/b[]/c[] is the number of common IP address class
> >>>> A/B/C, not
> >>>> * including the ones for private local network.
> >>>> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg)  {
> >> unsigned
> >>>> int i, j, si, ei; uint64_t begin, total_cycles; -uint8_t core_id =
> >>>> (uint8_t)((uintptr_t)arg);
> >>>> +uint8_t writer_id = (uint8_t)((uintptr_t)arg);
> >>>> uint32_t next_hop_add = 0xAA;
> >>>>
> >>>> -/* 2 writer threads are used */
> >>>> -if (core_id % 2 == 0) {
> >>>> +/* Single writer (writer_id = 1) */ if (writer_id ==
> >>>> +SINGLE_WRITER) { si = 0; ei = NUM_LDEPTH_ROUTE_ENTRIES; }
> >>>> +/* 2 Writers (writer_id = 2/3)*/
> >>>> +else if (writer_id == MULTI_WRITER_1) {
> >>>> si = 0;
> >>>> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> >>>> } else {
> >>>> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg) for (i =
> >>>> 0; i < RCU_ITERATIONS; i++) {
> >>>> /* Add all the entries */
> >>>> for (j = si; j < ei; j++) {
> >>>> -pthread_mutex_lock(&lpm_mutex);
> >>>> +if (writer_id != SINGLE_WRITER)
> >>>> +pthread_mutex_lock(&lpm_mutex);
> >>>> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> >>>> large_ldepth_route_table[j].depth,
> >>>> next_hop_add) != 0) {
> >>>> printf("Failed to add iteration %d, route# %d\n", i, j);
> >>>> -pthread_mutex_unlock(&lpm_mutex);
> >>>> +if (writer_id != SINGLE_WRITER)
> >>>> +
> >>>> pthread_mutex_unlock(&lpm_mutex);
> >>>> return -1;
> >>>> }
> >>>> -pthread_mutex_unlock(&lpm_mutex);
> >>>> +if (writer_id != SINGLE_WRITER)
> >>>> +pthread_mutex_unlock(&lpm_mutex);
> >>>> }
> >>>>
> >>>> /* Delete all the entries */
> >>>> for (j = si; j < ei; j++) {
> >>>> -pthread_mutex_lock(&lpm_mutex);
> >>>> +if (writer_id != SINGLE_WRITER)
> >>>> +pthread_mutex_lock(&lpm_mutex);
> >>>> if (rte_lpm_delete(lpm,
> >>>> large_ldepth_route_table[j].ip,
> >>>> large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete
> >>>> iteration %d, route# %d\n", i, j);
> >>>> -pthread_mutex_unlock(&lpm_mutex);
> >>>> +if (writer_id != SINGLE_WRITER)
> >>>> +
> >>>> pthread_mutex_unlock(&lpm_mutex);
> >>>> return -1;
> >>>> }
> >>>> -pthread_mutex_unlock(&lpm_mutex);
> >>>> +if (writer_id != SINGLE_WRITER)
> >>>> +pthread_mutex_unlock(&lpm_mutex);
> >>>> }
> >>>> }
> >>>>
> >>>> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
> >>>>
> >>>> /*
> >>>> * Functional test:
> >>>> - * 2 writers, rest are readers
> >>>> + * 1/2 writers, rest are readers
> >>>> */
> >>>> static int
> >>>> -test_lpm_rcu_perf_multi_writer(void)
> >>>> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
> >>>> {
> >>>> struct rte_lpm_config config;
> >>>> size_t sz;
> >>>> -unsigned int i;
> >>>> +unsigned int i, j;
> >>>> uint16_t core_id;
> >>>> struct rte_lpm_rcu_config rcu_cfg = {0};
> >>>> +int (*reader_f)(void *arg) = NULL;
> >>>>
> >>>> if (rte_lcore_count() < 3) {
> >>>> printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
> >>>> least 3\n"); @@ -504,273 +522,76 @@
> >>>> test_lpm_rcu_perf_multi_writer(void)
> >>>> num_cores++;
> >>>> }
> >>>>
> >>>> -printf("\nPerf test: 2 writers, %d readers, RCU integration
> >>>> enabled\n", -num_cores - 2);
> >>>> -
> >>>> -/* Create LPM table */
> >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> >> config.number_tbl8s =
> >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >>>> -TEST_LPM_ASSERT(lpm != NULL);
> >>>> -
> >>>> -/* Init RCU variable */
> >>>> -sz = rte_rcu_qsbr_get_memsize(num_cores);
> >>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> >>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
> >>>> -
> >>>> -rcu_cfg.v = rv;
> >>>> -/* Assign the RCU variable to LPM */ -if
> >>>> (rte_lpm_rcu_qsbr_add(lpm,
> >>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n");
> >>>> -goto error; -}
> >>>> -
> >>>> -writer_done = 0;
> >>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> >>>> -
> >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >>>> -
> >>>> -/* Launch reader threads */
> >>>> -for (i = 2; i < num_cores; i++)
> >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> >>>> -enabled_core_ids[i]);
> >>>> -
> >>>> -/* Launch writer threads */
> >>>> -for (i = 0; i < 2; i++)
> >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> >>>> -(void *)(uintptr_t)i,
> >>>> -enabled_core_ids[i]);
> >>>> -
> >>>> -/* Wait for writer threads */
> >>>> -for (i = 0; i < 2; i++)
> >>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
> >>>> -
> >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
> >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
> >>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
> >>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
> >>>> -
> >>>> -writer_done = 1;
> >>>> -/* Wait until all readers have exited */ -for (i = 2; i <
> >>>> num_cores;
> >>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
> >>>> -
> >>>> -rte_lpm_free(lpm);
> >>>> -rte_free(rv);
> >>>> -lpm = NULL;
> >>>> -rv = NULL;
> >>>> -
> >>>> -/* Test without RCU integration */ -printf("\nPerf test: 2
> >>>> writers, %d readers, RCU integration disabled\n", -num_cores - 2);
> >>>> -
> >>>> -/* Create LPM table */
> >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> >> config.number_tbl8s =
> >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >>>> -TEST_LPM_ASSERT(lpm != NULL);
> >>>> -
> >>>> -writer_done = 0;
> >>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >>>> -
> >>>> -/* Launch reader threads */
> >>>> -for (i = 2; i < num_cores; i++)
> >>>> -rte_eal_remote_launch(test_lpm_reader, NULL,
> >>>> -enabled_core_ids[i]);
> >>>> -
> >>>> -/* Launch writer threads */
> >>>> -for (i = 0; i < 2; i++)
> >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> >>>> -(void *)(uintptr_t)i,
> >>>> -enabled_core_ids[i]);
> >>>> -
> >>>> -/* Wait for writer threads */
> >>>> -for (i = 0; i < 2; i++)
> >>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
> >>>> -
> >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
> >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
> >>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
> >>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
> >>>> -
> >>>> -writer_done = 1;
> >>>> -/* Wait until all readers have exited */ -for (i = 2; i <
> >>>> num_cores;
> >>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
> >>>> -
> >>>> -rte_lpm_free(lpm);
> >>>> -
> >>>> -return 0;
> >>>> -
> >>>> -error:
> >>>> -writer_done = 1;
> >>>> -/* Wait until all readers have exited */ -rte_eal_mp_wait_lcore();
> >>>> -
> >>>> -rte_lpm_free(lpm);
> >>>> -rte_free(rv);
> >>>> -
> >>>> -return -1;
> >>>> -}
> >>>> -
> >>>> -/*
> >>>> - * Functional test:
> >>>> - * Single writer, rest are readers
> >>>> - */
> >>>> -static int
> >>>> -test_lpm_rcu_perf(void)
> >>>> -{
> >>>> -struct rte_lpm_config config;
> >>>> -uint64_t begin, total_cycles;
> >>>> -size_t sz;
> >>>> -unsigned int i, j;
> >>>> -uint16_t core_id;
> >>>> -uint32_t next_hop_add = 0xAA;
> >>>> -struct rte_lpm_rcu_config rcu_cfg = {0};
> >>>> -
> >>>> -if (rte_lcore_count() < 2) {
> >>>> -printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
> >>>> least 2\n"); -return TEST_SKIPPED; -}
> >>>> -
> >>>> -num_cores = 0;
> >>>> -RTE_LCORE_FOREACH_WORKER(core_id) { -
> enabled_core_ids[num_cores] =
> >>>> core_id; -num_cores++; -}
> >>>> -
> >>>> -printf("\nPerf test: 1 writer, %d readers, RCU integration
> >>>> enabled\n", -num_cores);
> >>>> -
> >>>> -/* Create LPM table */
> >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> >> config.number_tbl8s =
> >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >>>> -TEST_LPM_ASSERT(lpm != NULL);
> >>>> -
> >>>> -/* Init RCU variable */
> >>>> -sz = rte_rcu_qsbr_get_memsize(num_cores);
> >>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> >>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
> >>>> -
> >>>> -rcu_cfg.v = rv;
> >>>> -/* Assign the RCU variable to LPM */ -if
> >>>> (rte_lpm_rcu_qsbr_add(lpm,
> >>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n");
> >>>> -goto error; -}
> >>>> -
> >>>> -writer_done = 0;
> >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >>>> -
> >>>> -/* Launch reader threads */
> >>>> -for (i = 0; i < num_cores; i++)
> >>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> >>>> -enabled_core_ids[i]);
> >>>> -
> >>>> -/* Measure add/delete. */
> >>>> -begin = rte_rdtsc_precise();
> >>>> -for (i = 0; i < RCU_ITERATIONS; i++) {
> >>>> -/* Add all the entries */
> >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if
> >>>> (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> >>>> -large_ldepth_route_table[j].depth,
> >>>> -next_hop_add) != 0) {
> >>>> -printf("Failed to add iteration %d, route# %d\n", -i, j);
> >>>> +for (j = 1; j < 3; j++) {
> >>>> +if (use_rcu)
> >>>> +printf("\nPerf test: %d writer(s), %d reader(s),"
> >>>> +       " RCU integration enabled\n", j, num_cores - j); else
> >>>> +printf("\nPerf test: %d writer(s), %d reader(s),"
> >>>> +       " RCU integration disabled\n", j, num_cores - j);
> >>>> +
> >>>> +/* Create LPM table */
> >>>> +config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> >> config.number_tbl8s =
> >>>> +NUM_LDEPTH_ROUTE_ENTRIES; config.flags = 0; lpm =
> >>>> +rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >>>> +TEST_LPM_ASSERT(lpm != NULL);
> >>>> +
> >>>> +/* Init RCU variable */
> >>>> +if (use_rcu) {
> >>>> +sz = rte_rcu_qsbr_get_memsize(num_cores);
> >>>> +rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> >>>> +
> >>>> RTE_CACHE_LINE_SIZE);
> >>>> +rte_rcu_qsbr_init(rv, num_cores);
> >>>> +
> >>>> +rcu_cfg.v = rv;
> >>>> +/* Assign the RCU variable to LPM */ if (rte_lpm_rcu_qsbr_add(lpm,
> >>>> +&rcu_cfg) != 0) { printf("RCU variable assignment failed\n");
> >>>> goto error;
> >>>> }
> >>>>
> >>>> -/* Delete all the entries */
> >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if
> >>>> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> >>>> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to
> >>>> delete iteration %d, route# %d\n", -i, j); -goto error; -} -}
> >>>> -total_cycles = rte_rdtsc_precise() - begin;
> >>>> +reader_f = test_lpm_rcu_qsbr_reader; } else reader_f =
> >>>> +test_lpm_reader;
> >>>>
> >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
> >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g
> >>>> cycles\n", -(double)total_cycles / TOTAL_WRITES);
> >>>> +writer_done = 0;
> >>>> +__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> >>>>
> >>>> -writer_done = 1;
> >>>> -/* Wait until all readers have exited */ -for (i = 0; i <
> >>>> num_cores;
> >>>> i++) -if (rte_eal_wait_lcore(enabled_core_ids[i]);
> >>>> -
> >>>> -rte_lpm_free(lpm);
> >>>> -rte_free(rv);
> >>>> -lpm = NULL;
> >>>> -rv = NULL;
> >>>> -
> >>>> -/* Test without RCU integration */ -printf("\nPerf test: 1 writer,
> >>>> %d readers, RCU integration disabled\n", -num_cores);
> >>>> -
> >>>> -/* Create LPM table */
> >>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
> >> config.number_tbl8s =
> >>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
> >>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> >>>> -TEST_LPM_ASSERT(lpm != NULL);
> >>>> +__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >>>>
> >>>> -writer_done = 0;
> >>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> >>>> +/* Launch reader threads */
> >>>> +for (i = j; i < num_cores; i++)
> >>>> +rte_eal_remote_launch(reader_f, NULL, enabled_core_ids[i]);
> >>>>
> >>>> -/* Launch reader threads */
> >>>> -for (i = 0; i < num_cores; i++)
> >>>> -rte_eal_remote_launch(test_lpm_reader, NULL,
> >>>> -enabled_core_ids[i]);
> >>>> +/* Launch writer threads */
> >>>> +for (i = 0; i < j; i++)
> >>>> +rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> >>>> +(void *)(uintptr_t)(i + j),
> >>> This can be just 'j'?
> >>>
> >>>> +enabled_core_ids[i]);
> >>>>
> >>>> -/* Measure add/delete. */
> >>>> -begin = rte_rdtsc_precise();
> >>>> -for (i = 0; i < RCU_ITERATIONS; i++) {
> >>>> -/* Add all the entries */
> >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if
> >>>> (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> >>>> -large_ldepth_route_table[j].depth,
> >>>> -next_hop_add) != 0) {
> >>>> -printf("Failed to add iteration %d, route# %d\n", -i, j);
> >>>> +/* Wait for writer threads */
> >>>> +for (i = 0; i < j; i++)
> >>>> +if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> >>>> goto error;
> >>>> -}
> >>>>
> >>>> -/* Delete all the entries */
> >>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if
> >>>> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> >>>> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to
> >>>> delete iteration %d, route# %d\n", -i, j); -goto error; -}
> >>>> +printf("Total LPM Adds: %d\n", TOTAL_WRITES); printf("Total LPM
> >>>> +Deletes: %d\n", TOTAL_WRITES); printf("Average LPM Add/Del:
> >>>> +%"PRIu64" cycles\n", __atomic_load_n(&gwrite_cycles,
> >>>> __ATOMIC_RELAXED)
> >>>> +/ TOTAL_WRITES);
> >>>> +
> >>>> +writer_done = 1;
> >>>> +/* Wait until all readers have exited */ for (i = j; i <
> >>>> +num_cores; i++) rte_eal_wait_lcore(enabled_core_ids[i]);
> >>>> +
> >>>> +rte_lpm_free(lpm);
> >>>> +rte_free(rv);
> >>>> +lpm = NULL;
> >>>> +rv = NULL;
> >>>> }
> >>>> -total_cycles = rte_rdtsc_precise() - begin;
> >>>> -
> >>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
> >>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g
> >>>> cycles\n", -(double)total_cycles / TOTAL_WRITES);
> >>>> -
> >>>> -writer_done = 1;
> >>>> -/* Wait until all readers have exited */ -for (i = 0; i <
> >>>> num_cores; i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
> >>>> -
> >>>> -rte_lpm_free(lpm);
> >>>>
> >>>> return 0;
> >>>>
> >>>> @@ -946,9 +767,8 @@ test_lpm_perf(void) rte_lpm_delete_all(lpm);
> >>>> rte_lpm_free(lpm);
> >>>>
> >>>> -test_lpm_rcu_perf();
> >>>> -
> >>>> -test_lpm_rcu_perf_multi_writer();
> >>>> +test_lpm_rcu_perf_multi_writer(0);
> >>>> +test_lpm_rcu_perf_multi_writer(1);
> >>>>
> >>>> return 0;
> >>>> }
> >>>> --
> >>>> 2.17.1
> 
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-03 14:03           ` Dharmik Thakkar
  2020-11-03 14:51             ` Honnappa Nagarahalli
@ 2020-11-03 18:01             ` Medvedkin, Vladimir
  1 sibling, 0 replies; 52+ messages in thread
From: Medvedkin, Vladimir @ 2020-11-03 18:01 UTC (permalink / raw)
  To: Dharmik Thakkar, Honnappa Nagarahalli; +Cc: Bruce Richardson, dev, nd
Hi,
On 03/11/2020 14:03, Dharmik Thakkar wrote:
> 
> 
>> On Nov 2, 2020, at 11:32 PM, Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
>>
>> <snip>
>>
>>>>>
>>>>> Avoid code duplication by combining single and multi threaded tests
>>>>>
>>>>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>>>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>>>>> ---
>>>>> app/test/test_lpm_perf.c | 362
>>>>> ++++++++++-----------------------------
>>>>> 1 file changed, 91 insertions(+), 271 deletions(-)
>>>>>
>>>>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
>>>>> index
>>>>> 224c92fa3d65..229c835c23f7 100644
>>>>> --- a/app/test/test_lpm_perf.c
>>>>> +++ b/app/test/test_lpm_perf.c
>>>>> @@ -67,6 +67,12 @@ enum {
>>>>> IP_CLASS_C
>>>>> };
>>>>>
>>>>> +enum {
>>>>> +SINGLE_WRITER = 1,
>>>>> +MULTI_WRITER_1,
>>>>> +MULTI_WRITER_2
>>>>> +};
>>>> Do we need this? Can we use the number of cores instead?
>>>>
>>>
>>> There are 3 combinations of writes (adds/deletes):
>>> 1. Write all the entries - in case of a single writer 2. Write half of the entries -
>>> in case of multiple writers 3. Write remaining half of the entries - in case of
>>> multiple writers
>>>
>>> So, I think this is required.
>> IMO, this is not scalable. Essentially, we need 2 parameters to divide the routes among each writer thread. We need 2 parameters, 1) total number of writers 2) the core ID in the linear space.
>> Creating a structure with these 2 and passing that to the writer thread would be better and scalable.
> 
> Yes, agreed this is only applicable for 2 writers. Currently, the multi writer test is only limited to a maximum of 2 writers.
> To support more number of writers, we need something like this (which I believe is in lines with your suggestion):
> 1. Calculate what each writer will write: single_insert = TOTAL_WRITES / num_writers
> 2. Pass core ID in linear space as an argument to the writer function: pos_core
> 3. Calculate si and ei in the writer function: si = pos_core * single_insert; ei = si + single_insert
> 
Agree to Honnappa suggestion, for me it looks good, better than previous 
implementation.
> I can update the patch to enable more than 2 writers.
> Do you also suggest we expand the scope of the test to test with more than 2 writers?
> This will increase the time for which the test is running (which currently is significant even with 2 writers).
> 
I don't see any reason to increase the number of writers more than 2.
>>
>>>
>>>>> +
>>>>> /* struct route_rule_count defines the total number of rules in
>>>>> following a/b/c
>>>>> * each item in a[]/b[]/c[] is the number of common IP address class
>>>>> A/B/C, not
>>>>> * including the ones for private local network.
>>>>> @@ -430,11 +436,16 @@ test_lpm_rcu_qsbr_writer(void *arg)  {
>>> unsigned
>>>>> int i, j, si, ei; uint64_t begin, total_cycles; -uint8_t core_id =
>>>>> (uint8_t)((uintptr_t)arg);
>>>>> +uint8_t writer_id = (uint8_t)((uintptr_t)arg);
>>>>> uint32_t next_hop_add = 0xAA;
>>>>>
>>>>> -/* 2 writer threads are used */
>>>>> -if (core_id % 2 == 0) {
>>>>> +/* Single writer (writer_id = 1) */
>>>>> +if (writer_id == SINGLE_WRITER) {
>>>>> +si = 0;
>>>>> +ei = NUM_LDEPTH_ROUTE_ENTRIES;
>>>>> +}
>>>>> +/* 2 Writers (writer_id = 2/3)*/
>>>>> +else if (writer_id == MULTI_WRITER_1) {
>>>>> si = 0;
>>>>> ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
>>>>> } else {
>>>>> @@ -447,29 +458,35 @@ test_lpm_rcu_qsbr_writer(void *arg) for (i = 0;
>>>>> i < RCU_ITERATIONS; i++) {
>>>>> /* Add all the entries */
>>>>> for (j = si; j < ei; j++) {
>>>>> -pthread_mutex_lock(&lpm_mutex);
>>>>> +if (writer_id != SINGLE_WRITER)
>>>>> +pthread_mutex_lock(&lpm_mutex);
>>>>> if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>>>>> large_ldepth_route_table[j].depth,
>>>>> next_hop_add) != 0) {
>>>>> printf("Failed to add iteration %d, route# %d\n", i, j);
>>>>> -pthread_mutex_unlock(&lpm_mutex);
>>>>> +if (writer_id != SINGLE_WRITER)
>>>>> +
>>>>> pthread_mutex_unlock(&lpm_mutex);
>>>>> return -1;
>>>>> }
>>>>> -pthread_mutex_unlock(&lpm_mutex);
>>>>> +if (writer_id != SINGLE_WRITER)
>>>>> +pthread_mutex_unlock(&lpm_mutex);
>>>>> }
>>>>>
>>>>> /* Delete all the entries */
>>>>> for (j = si; j < ei; j++) {
>>>>> -pthread_mutex_lock(&lpm_mutex);
>>>>> +if (writer_id != SINGLE_WRITER)
>>>>> +pthread_mutex_lock(&lpm_mutex);
>>>>> if (rte_lpm_delete(lpm,
>>>>> large_ldepth_route_table[j].ip,
>>>>> large_ldepth_route_table[j].depth) != 0) { printf("Failed to delete
>>>>> iteration %d, route# %d\n", i, j); -pthread_mutex_unlock(&lpm_mutex);
>>>>> +if (writer_id != SINGLE_WRITER)
>>>>> +
>>>>> pthread_mutex_unlock(&lpm_mutex);
>>>>> return -1;
>>>>> }
>>>>> -pthread_mutex_unlock(&lpm_mutex);
>>>>> +if (writer_id != SINGLE_WRITER)
>>>>> +pthread_mutex_unlock(&lpm_mutex);
>>>>> }
>>>>> }
>>>>>
>>>>> @@ -482,16 +499,17 @@ test_lpm_rcu_qsbr_writer(void *arg)
>>>>>
>>>>> /*
>>>>> * Functional test:
>>>>> - * 2 writers, rest are readers
>>>>> + * 1/2 writers, rest are readers
>>>>> */
>>>>> static int
>>>>> -test_lpm_rcu_perf_multi_writer(void)
>>>>> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>>>>> {
>>>>> struct rte_lpm_config config;
>>>>> size_t sz;
>>>>> -unsigned int i;
>>>>> +unsigned int i, j;
>>>>> uint16_t core_id;
>>>>> struct rte_lpm_rcu_config rcu_cfg = {0};
>>>>> +int (*reader_f)(void *arg) = NULL;
>>>>>
>>>>> if (rte_lcore_count() < 3) {
>>>>> printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
>>>>> least 3\n"); @@ -504,273 +522,76 @@
>>>>> test_lpm_rcu_perf_multi_writer(void)
>>>>> num_cores++;
>>>>> }
>>>>>
>>>>> -printf("\nPerf test: 2 writers, %d readers, RCU integration
>>>>> enabled\n", -num_cores - 2);
>>>>> -
>>>>> -/* Create LPM table */
>>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>>> config.number_tbl8s =
>>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>>> -
>>>>> -/* Init RCU variable */
>>>>> -sz = rte_rcu_qsbr_get_memsize(num_cores);
>>>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>>>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
>>>>> -
>>>>> -rcu_cfg.v = rv;
>>>>> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm,
>>>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto
>>>>> error; -}
>>>>> -
>>>>> -writer_done = 0;
>>>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>>>> -
>>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>>> -
>>>>> -/* Launch reader threads */
>>>>> -for (i = 2; i < num_cores; i++)
>>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>>>>> -enabled_core_ids[i]);
>>>>> -
>>>>> -/* Launch writer threads */
>>>>> -for (i = 0; i < 2; i++)
>>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>>>>> -(void *)(uintptr_t)i,
>>>>> -enabled_core_ids[i]);
>>>>> -
>>>>> -/* Wait for writer threads */
>>>>> -for (i = 0; i < 2; i++)
>>>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
>>>>> -
>>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
>>>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
>>>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
>>>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
>>>>> -
>>>>> -writer_done = 1;
>>>>> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores;
>>>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
>>>>> -
>>>>> -rte_lpm_free(lpm);
>>>>> -rte_free(rv);
>>>>> -lpm = NULL;
>>>>> -rv = NULL;
>>>>> -
>>>>> -/* Test without RCU integration */
>>>>> -printf("\nPerf test: 2 writers, %d readers, RCU integration
>>>>> disabled\n", -num_cores - 2);
>>>>> -
>>>>> -/* Create LPM table */
>>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>>> config.number_tbl8s =
>>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>>> -
>>>>> -writer_done = 0;
>>>>> -__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>>> -
>>>>> -/* Launch reader threads */
>>>>> -for (i = 2; i < num_cores; i++)
>>>>> -rte_eal_remote_launch(test_lpm_reader, NULL, -enabled_core_ids[i]);
>>>>> -
>>>>> -/* Launch writer threads */
>>>>> -for (i = 0; i < 2; i++)
>>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>>>>> -(void *)(uintptr_t)i,
>>>>> -enabled_core_ids[i]);
>>>>> -
>>>>> -/* Wait for writer threads */
>>>>> -for (i = 0; i < 2; i++)
>>>>> -if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0) -goto error;
>>>>> -
>>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
>>>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del:
>>>>> %"PRIu64" cycles\n", -__atomic_load_n(&gwrite_cycles,
>>>>> __ATOMIC_RELAXED) -/ TOTAL_WRITES);
>>>>> -
>>>>> -writer_done = 1;
>>>>> -/* Wait until all readers have exited */ -for (i = 2; i < num_cores;
>>>>> i++) -rte_eal_wait_lcore(enabled_core_ids[i]);
>>>>> -
>>>>> -rte_lpm_free(lpm);
>>>>> -
>>>>> -return 0;
>>>>> -
>>>>> -error:
>>>>> -writer_done = 1;
>>>>> -/* Wait until all readers have exited */ -rte_eal_mp_wait_lcore();
>>>>> -
>>>>> -rte_lpm_free(lpm);
>>>>> -rte_free(rv);
>>>>> -
>>>>> -return -1;
>>>>> -}
>>>>> -
>>>>> -/*
>>>>> - * Functional test:
>>>>> - * Single writer, rest are readers
>>>>> - */
>>>>> -static int
>>>>> -test_lpm_rcu_perf(void)
>>>>> -{
>>>>> -struct rte_lpm_config config;
>>>>> -uint64_t begin, total_cycles;
>>>>> -size_t sz;
>>>>> -unsigned int i, j;
>>>>> -uint16_t core_id;
>>>>> -uint32_t next_hop_add = 0xAA;
>>>>> -struct rte_lpm_rcu_config rcu_cfg = {0};
>>>>> -
>>>>> -if (rte_lcore_count() < 2) {
>>>>> -printf("Not enough cores for lpm_rcu_perf_autotest, expecting at
>>>>> least 2\n"); -return TEST_SKIPPED; -}
>>>>> -
>>>>> -num_cores = 0;
>>>>> -RTE_LCORE_FOREACH_WORKER(core_id) {
>>>>> -enabled_core_ids[num_cores] = core_id; -num_cores++; -}
>>>>> -
>>>>> -printf("\nPerf test: 1 writer, %d readers, RCU integration
>>>>> enabled\n", -num_cores);
>>>>> -
>>>>> -/* Create LPM table */
>>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>>> config.number_tbl8s =
>>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>>> -
>>>>> -/* Init RCU variable */
>>>>> -sz = rte_rcu_qsbr_get_memsize(num_cores);
>>>>> -rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>>>>> -RTE_CACHE_LINE_SIZE); -rte_rcu_qsbr_init(rv, num_cores);
>>>>> -
>>>>> -rcu_cfg.v = rv;
>>>>> -/* Assign the RCU variable to LPM */ -if (rte_lpm_rcu_qsbr_add(lpm,
>>>>> &rcu_cfg) != 0) { -printf("RCU variable assignment failed\n"); -goto
>>>>> error; -}
>>>>> -
>>>>> -writer_done = 0;
>>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>>> -
>>>>> -/* Launch reader threads */
>>>>> -for (i = 0; i < num_cores; i++)
>>>>> -rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>>>>> -enabled_core_ids[i]);
>>>>> -
>>>>> -/* Measure add/delete. */
>>>>> -begin = rte_rdtsc_precise();
>>>>> -for (i = 0; i < RCU_ITERATIONS; i++) {
>>>>> -/* Add all the entries */
>>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if (rte_lpm_add(lpm,
>>>>> large_ldepth_route_table[j].ip, -large_ldepth_route_table[j].depth,
>>>>> -next_hop_add) != 0) {
>>>>> -printf("Failed to add iteration %d, route# %d\n", -i, j);
>>>>> +for (j = 1; j < 3; j++) {
>>>>> +if (use_rcu)
>>>>> +printf("\nPerf test: %d writer(s), %d reader(s),"
>>>>> +       " RCU integration enabled\n", j, num_cores - j); else
>>>>> +printf("\nPerf test: %d writer(s), %d reader(s),"
>>>>> +       " RCU integration disabled\n", j, num_cores - j);
>>>>> +
>>>>> +/* Create LPM table */
>>>>> +config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>>> config.number_tbl8s =
>>>>> +NUM_LDEPTH_ROUTE_ENTRIES; config.flags = 0; lpm =
>>>>> +rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>>> +TEST_LPM_ASSERT(lpm != NULL);
>>>>> +
>>>>> +/* Init RCU variable */
>>>>> +if (use_rcu) {
>>>>> +sz = rte_rcu_qsbr_get_memsize(num_cores);
>>>>> +rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>>>>> +
>>>>> RTE_CACHE_LINE_SIZE);
>>>>> +rte_rcu_qsbr_init(rv, num_cores);
>>>>> +
>>>>> +rcu_cfg.v = rv;
>>>>> +/* Assign the RCU variable to LPM */ if (rte_lpm_rcu_qsbr_add(lpm,
>>>>> +&rcu_cfg) != 0) { printf("RCU variable assignment failed\n");
>>>>> goto error;
>>>>> }
>>>>>
>>>>> -/* Delete all the entries */
>>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++) -if
>>>>> (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>>>>> -large_ldepth_route_table[j].depth) != 0) { -printf("Failed to delete
>>>>> iteration %d, route# %d\n", -i, j); -goto error; -} -} -total_cycles
>>>>> = rte_rdtsc_precise() - begin;
>>>>> +reader_f = test_lpm_rcu_qsbr_reader; } else reader_f =
>>>>> +test_lpm_reader;
>>>>>
>>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES); -printf("Total LPM
>>>>> Deletes: %d\n", TOTAL_WRITES); -printf("Average LPM Add/Del: %g
>>>>> cycles\n", -(double)total_cycles / TOTAL_WRITES);
>>>>> +writer_done = 0;
>>>>> +__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>>>>
>>>>> -writer_done = 1;
>>>>> -/* Wait until all readers have exited */ -for (i = 0; i < num_cores;
>>>>> i++) -if (rte_eal_wait_lcore(enabled_core_ids[i]);
>>>>> -
>>>>> -rte_lpm_free(lpm);
>>>>> -rte_free(rv);
>>>>> -lpm = NULL;
>>>>> -rv = NULL;
>>>>> -
>>>>> -/* Test without RCU integration */
>>>>> -printf("\nPerf test: 1 writer, %d readers, RCU integration
>>>>> disabled\n", -num_cores);
>>>>> -
>>>>> -/* Create LPM table */
>>>>> -config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES; -
>>> config.number_tbl8s =
>>>>> NUM_LDEPTH_ROUTE_ENTRIES; -config.flags = 0; -lpm =
>>>>> rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>>>>> -TEST_LPM_ASSERT(lpm != NULL);
>>>>> +__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>>>
>>>>> -writer_done = 0;
>>>>> -__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>>>> +/* Launch reader threads */
>>>>> +for (i = j; i < num_cores; i++)
>>>>> +rte_eal_remote_launch(reader_f, NULL,
>>>>> +enabled_core_ids[i]);
>>>>>
>>>>> -/* Launch reader threads */
>>>>> -for (i = 0; i < num_cores; i++)
>>>>> -rte_eal_remote_launch(test_lpm_reader, NULL,
>>>>> -enabled_core_ids[i]);
>>>>> +/* Launch writer threads */
>>>>> +for (i = 0; i < j; i++)
>>>>> +rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>>>>> +(void *)(uintptr_t)(i + j),
>>>> This can be just 'j'?
>>>>
>>>>> +enabled_core_ids[i]);
>>>>>
>>>>> -/* Measure add/delete. */
>>>>> -begin = rte_rdtsc_precise();
>>>>> -for (i = 0; i < RCU_ITERATIONS; i++) {
>>>>> -/* Add all the entries */
>>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>>>>> -if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>>>>> -large_ldepth_route_table[j].depth,
>>>>> -next_hop_add) != 0) {
>>>>> -printf("Failed to add iteration %d, route#
>>>>> %d\n",
>>>>> -i, j);
>>>>> +/* Wait for writer threads */
>>>>> +for (i = 0; i < j; i++)
>>>>> +if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>>>>> goto error;
>>>>> -}
>>>>>
>>>>> -/* Delete all the entries */
>>>>> -for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>>>>> -if (rte_lpm_delete(lpm,
>>>>> large_ldepth_route_table[j].ip,
>>>>> -large_ldepth_route_table[j].depth) != 0) {
>>>>> -printf("Failed to delete iteration %d, route#
>>>>> %d\n",
>>>>> -i, j);
>>>>> -goto error;
>>>>> -}
>>>>> +printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>>>>> +printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>>>>> +printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>>>>> +__atomic_load_n(&gwrite_cycles,
>>>>> __ATOMIC_RELAXED)
>>>>> +/ TOTAL_WRITES);
>>>>> +
>>>>> +writer_done = 1;
>>>>> +/* Wait until all readers have exited */
>>>>> +for (i = j; i < num_cores; i++)
>>>>> +rte_eal_wait_lcore(enabled_core_ids[i]);
>>>>> +
>>>>> +rte_lpm_free(lpm);
>>>>> +rte_free(rv);
>>>>> +lpm = NULL;
>>>>> +rv = NULL;
>>>>> }
>>>>> -total_cycles = rte_rdtsc_precise() - begin;
>>>>> -
>>>>> -printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>>>>> -printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>>>>> -printf("Average LPM Add/Del: %g cycles\n",
>>>>> -(double)total_cycles / TOTAL_WRITES);
>>>>> -
>>>>> -writer_done = 1;
>>>>> -/* Wait until all readers have exited */
>>>>> -for (i = 0; i < num_cores; i++)
>>>>> -rte_eal_wait_lcore(enabled_core_ids[i]);
>>>>> -
>>>>> -rte_lpm_free(lpm);
>>>>>
>>>>> return 0;
>>>>>
>>>>> @@ -946,9 +767,8 @@ test_lpm_perf(void)
>>>>> rte_lpm_delete_all(lpm);
>>>>> rte_lpm_free(lpm);
>>>>>
>>>>> -test_lpm_rcu_perf();
>>>>> -
>>>>> -test_lpm_rcu_perf_multi_writer();
>>>>> +test_lpm_rcu_perf_multi_writer(0);
>>>>> +test_lpm_rcu_perf_multi_writer(1);
>>>>>
>>>>> return 0;
>>>>> }
>>>>> --
>>>>> 2.17.1
> 
-- 
Regards,
Vladimir
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test
  2020-11-03  5:12   ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
                       ` (3 preceding siblings ...)
  2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
@ 2020-11-03 22:23     ` Dharmik Thakkar
  2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
                         ` (4 more replies)
  4 siblings, 5 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03 22:23 UTC (permalink / raw)
  Cc: dev, nd, Dharmik Thakkar
Fix LPM adds, LPM deletes, and cycle calculation.
Return error if LPM add/delete fails in multi-writer test.
Return error if single or multi writer test fails
Remove redundant error checking for readers.
Combine single and multi threaded test cases to avoid code duplication.
---
v4:
 - Return error if rcu qsbr test fails
 - Improve multi writer test to enable more than 2 writers
v3:
 - Add 'goto error'
 - Remove unnecessary if statement
v2:
 - Add more details about the fix to the commit message
 - Replace hard coded values with an enum
 - Remove lock acquire/release for single writer
Dharmik Thakkar (4):
  test/lpm: fix cycle calculation in rcu qsbr perf
  test/lpm: return error on failure in rcu qsbr perf
  test/lpm: remove error checking in rcu qsbr perf
  test/lpm: avoid code duplication in rcu qsbr perf
 app/test/test_lpm_perf.c | 383 ++++++++++-----------------------------
 1 file changed, 91 insertions(+), 292 deletions(-)
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v4 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
  2020-11-03 22:23     ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
@ 2020-11-03 22:23       ` Dharmik Thakkar
  2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 2/4] test/lpm: return error on failure " Dharmik Thakkar
                         ` (3 subsequent siblings)
  4 siblings, 0 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03 22:23 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Ruifeng Wang,
	Honnappa Nagarahalli, Gavin Hu
  Cc: dev, nd, Dharmik Thakkar, stable
Fix incorrect calculations for LPM adds, LPM deletes,
and average cycles in RCU QSBR perf tests
Since, rcu qsbr tests run for 'RCU_ITERATIONS' and not
'ITERATIONS', replace 'ITERATIONS' with 'RCU_ITERATIONS'
for calculating adds, deletes, and cycles.
Also, for multi-writer perf test, each writer only writes
half of NUM_LDEPTH_ROUTE_ENTRIES.
For 2 writers, total adds (or deletes) should be
(RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of
(2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES).
Since, for both the single and multi writer tests, total adds/deletes
is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES),
this has been replaced with a macro 'TOTAL_WRITES' and furthermore,
'g_writes' has been removed since it is always a fixed value
equal to TOTAL_WRITES.
Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org
Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 app/test/test_lpm_perf.c | 45 ++++++++++++++--------------------------
 1 file changed, 16 insertions(+), 29 deletions(-)
diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index c5a238b9d1e8..45164b23214b 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -23,7 +23,6 @@ static struct rte_rcu_qsbr *rv;
 static volatile uint8_t writer_done;
 static volatile uint32_t thr_id;
 static uint64_t gwrite_cycles;
-static uint64_t gwrites;
 /* LPM APIs are not thread safe, use mutex to provide thread safety */
 static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
 
@@ -60,6 +59,8 @@ static uint32_t num_ldepth_route_entries;
 #define NUM_ROUTE_ENTRIES num_route_entries
 #define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries
 
+#define TOTAL_WRITES (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES)
+
 enum {
 	IP_CLASS_A,
 	IP_CLASS_B,
@@ -432,7 +433,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	uint8_t core_id = (uint8_t)((uintptr_t)arg);
 	uint32_t next_hop_add = 0xAA;
 
-	RTE_SET_USED(arg);
 	/* 2 writer threads are used */
 	if (core_id % 2 == 0) {
 		si = 0;
@@ -472,9 +472,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	total_cycles = rte_rdtsc_precise() - begin;
 
 	__atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED);
-	__atomic_fetch_add(&gwrites,
-			2 * NUM_LDEPTH_ROUTE_ENTRIES * RCU_ITERATIONS,
-			__ATOMIC_RELAXED);
 
 	return 0;
 }
@@ -528,7 +525,6 @@ test_lpm_rcu_perf_multi_writer(void)
 
 	writer_done = 0;
 	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
 
 	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
@@ -548,14 +544,11 @@ test_lpm_rcu_perf_multi_writer(void)
 		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 			goto error;
 
-	printf("Total LPM Adds: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
-			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
-		);
+		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+		/ TOTAL_WRITES);
 
 	/* Wait and check return value from reader threads */
 	writer_done = 1;
@@ -581,7 +574,6 @@ test_lpm_rcu_perf_multi_writer(void)
 
 	writer_done = 0;
 	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
 	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
 	/* Launch reader threads */
@@ -600,14 +592,11 @@ test_lpm_rcu_perf_multi_writer(void)
 		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 			goto error;
 
-	printf("Total LPM Adds: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
-			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
-		);
+		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+		/ TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
@@ -711,11 +700,10 @@ test_lpm_rcu_perf(void)
 	}
 	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
+		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
@@ -771,11 +759,10 @@ test_lpm_rcu_perf(void)
 	}
 	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
+		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v4 2/4] test/lpm: return error on failure in rcu qsbr perf
  2020-11-03 22:23     ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
@ 2020-11-03 22:23       ` Dharmik Thakkar
  2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 3/4] test/lpm: remove error checking " Dharmik Thakkar
                         ` (2 subsequent siblings)
  4 siblings, 0 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03 22:23 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Honnappa Nagarahalli,
	Ruifeng Wang, Gavin Hu
  Cc: dev, nd, Dharmik Thakkar, stable
Return error if Add/Delete fail in multiwriter perf test
Return error if single or multi writer test fails
Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org
Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 app/test/test_lpm_perf.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 45164b23214b..873ecf511c97 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -453,6 +453,7 @@ test_lpm_rcu_qsbr_writer(void *arg)
 					next_hop_add) != 0) {
 				printf("Failed to add iteration %d, route# %d\n",
 					i, j);
+				goto error;
 			}
 			pthread_mutex_unlock(&lpm_mutex);
 		}
@@ -464,6 +465,7 @@ test_lpm_rcu_qsbr_writer(void *arg)
 				large_ldepth_route_table[j].depth) != 0) {
 				printf("Failed to delete iteration %d, route# %d\n",
 					i, j);
+				goto error;
 			}
 			pthread_mutex_unlock(&lpm_mutex);
 		}
@@ -474,6 +476,10 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	__atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED);
 
 	return 0;
+
+error:
+	pthread_mutex_unlock(&lpm_mutex);
+	return -1;
 }
 
 /*
@@ -947,9 +953,11 @@ test_lpm_perf(void)
 	rte_lpm_delete_all(lpm);
 	rte_lpm_free(lpm);
 
-	test_lpm_rcu_perf();
+	if (test_lpm_rcu_perf() < 0)
+		return -1;
 
-	test_lpm_rcu_perf_multi_writer();
+	if (test_lpm_rcu_perf_multi_writer() < 0)
+		return -1;
 
 	return 0;
 }
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v4 3/4] test/lpm: remove error checking in rcu qsbr perf
  2020-11-03 22:23     ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
  2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 2/4] test/lpm: return error on failure " Dharmik Thakkar
@ 2020-11-03 22:23       ` Dharmik Thakkar
  2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
  2020-11-04 18:58       ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  4 siblings, 0 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03 22:23 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Honnappa Nagarahalli,
	Gavin Hu, Ruifeng Wang
  Cc: dev, nd, Dharmik Thakkar, stable
Remove redundant error checking for reader threads
since they never return error.
Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org
Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 app/test/test_lpm_perf.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)
diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 873ecf511c97..c8e70ec89ff5 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -556,11 +556,10 @@ test_lpm_rcu_perf_multi_writer(void)
 		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
 		/ TOTAL_WRITES);
 
-	/* Wait and check return value from reader threads */
 	writer_done = 1;
+	/* Wait until all readers have exited */
 	for (i = 2; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 	rte_free(rv);
@@ -605,10 +604,9 @@ test_lpm_rcu_perf_multi_writer(void)
 		/ TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 2; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 
@@ -712,10 +710,9 @@ test_lpm_rcu_perf(void)
 		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 	rte_free(rv);
@@ -771,11 +768,9 @@ test_lpm_rcu_perf(void)
 		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			printf("Warning: lcore %u not finished.\n",
-				enabled_core_ids[i]);
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-03 22:23     ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
                         ` (2 preceding siblings ...)
  2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 3/4] test/lpm: remove error checking " Dharmik Thakkar
@ 2020-11-03 22:23       ` Dharmik Thakkar
  2020-11-03 22:35         ` Honnappa Nagarahalli
  2020-11-04 15:46         ` Medvedkin, Vladimir
  2020-11-04 18:58       ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  4 siblings, 2 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-03 22:23 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin; +Cc: dev, nd, Dharmik Thakkar
Avoid code duplication by combining single and multi threaded tests
Also, enable support for more than 2 writers
Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 359 +++++++++------------------------------
 1 file changed, 84 insertions(+), 275 deletions(-)
diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index c8e70ec89ff5..a1485e74e77f 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -23,6 +23,7 @@ static struct rte_rcu_qsbr *rv;
 static volatile uint8_t writer_done;
 static volatile uint32_t thr_id;
 static uint64_t gwrite_cycles;
+static uint32_t single_insert;
 /* LPM APIs are not thread safe, use mutex to provide thread safety */
 static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
 
@@ -430,24 +431,21 @@ test_lpm_rcu_qsbr_writer(void *arg)
 {
 	unsigned int i, j, si, ei;
 	uint64_t begin, total_cycles;
-	uint8_t core_id = (uint8_t)((uintptr_t)arg);
 	uint32_t next_hop_add = 0xAA;
+	bool single_writer = (single_insert == NUM_LDEPTH_ROUTE_ENTRIES) ?
+				true : false;
+	uint8_t pos_core = (uint8_t)((uintptr_t)arg);
 
-	/* 2 writer threads are used */
-	if (core_id % 2 == 0) {
-		si = 0;
-		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
-	} else {
-		si = NUM_LDEPTH_ROUTE_ENTRIES / 2;
-		ei = NUM_LDEPTH_ROUTE_ENTRIES;
-	}
+	si = pos_core * single_insert;
+	ei = si + single_insert;
 
 	/* Measure add/delete. */
 	begin = rte_rdtsc_precise();
 	for (i = 0; i < RCU_ITERATIONS; i++) {
 		/* Add all the entries */
 		for (j = si; j < ei; j++) {
-			pthread_mutex_lock(&lpm_mutex);
+			if (!single_writer)
+				pthread_mutex_lock(&lpm_mutex);
 			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
 					large_ldepth_route_table[j].depth,
 					next_hop_add) != 0) {
@@ -455,19 +453,22 @@ test_lpm_rcu_qsbr_writer(void *arg)
 					i, j);
 				goto error;
 			}
-			pthread_mutex_unlock(&lpm_mutex);
+			if (!single_writer)
+				pthread_mutex_unlock(&lpm_mutex);
 		}
 
 		/* Delete all the entries */
 		for (j = si; j < ei; j++) {
-			pthread_mutex_lock(&lpm_mutex);
+			if (!single_writer)
+				pthread_mutex_lock(&lpm_mutex);
 			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
 				large_ldepth_route_table[j].depth) != 0) {
 				printf("Failed to delete iteration %d, route# %d\n",
 					i, j);
 				goto error;
 			}
-			pthread_mutex_unlock(&lpm_mutex);
+			if (!single_writer)
+				pthread_mutex_unlock(&lpm_mutex);
 		}
 	}
 
@@ -478,22 +479,24 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	return 0;
 
 error:
-	pthread_mutex_unlock(&lpm_mutex);
+	if (!single_writer)
+		pthread_mutex_unlock(&lpm_mutex);
 	return -1;
 }
 
 /*
  * Functional test:
- * 2 writers, rest are readers
+ * 1/2 writers, rest are readers
  */
 static int
-test_lpm_rcu_perf_multi_writer(void)
+test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
 {
 	struct rte_lpm_config config;
 	size_t sz;
-	unsigned int i;
+	unsigned int i, j;
 	uint16_t core_id;
 	struct rte_lpm_rcu_config rcu_cfg = {0};
+	int (*reader_f)(void *arg) = NULL;
 
 	if (rte_lcore_count() < 3) {
 		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
@@ -506,273 +509,79 @@ test_lpm_rcu_perf_multi_writer(void)
 		num_cores++;
 	}
 
-	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
-		num_cores - 2);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	/* Init RCU variable */
-	sz = rte_rcu_qsbr_get_memsize(num_cores);
-	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-						RTE_CACHE_LINE_SIZE);
-	rte_rcu_qsbr_init(rv, num_cores);
-
-	rcu_cfg.v = rv;
-	/* Assign the RCU variable to LPM */
-	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
-		printf("RCU variable assignment failed\n");
-		goto error;
-	}
-
-	writer_done = 0;
-	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Launch writer threads */
-	for (i = 0; i < 2; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-					(void *)(uintptr_t)i,
-					enabled_core_ids[i]);
-
-	/* Wait for writer threads */
-	for (i = 0; i < 2; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
-		/ TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-	lpm = NULL;
-	rv = NULL;
-
-	/* Test without RCU integration */
-	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
-		num_cores - 2);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	writer_done = 0;
-	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Launch writer threads */
-	for (i = 0; i < 2; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-					(void *)(uintptr_t)i,
-					enabled_core_ids[i]);
-
-	/* Wait for writer threads */
-	for (i = 0; i < 2; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
-		/ TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-
-	return 0;
-
-error:
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	rte_eal_mp_wait_lcore();
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-
-	return -1;
-}
-
-/*
- * Functional test:
- * Single writer, rest are readers
- */
-static int
-test_lpm_rcu_perf(void)
-{
-	struct rte_lpm_config config;
-	uint64_t begin, total_cycles;
-	size_t sz;
-	unsigned int i, j;
-	uint16_t core_id;
-	uint32_t next_hop_add = 0xAA;
-	struct rte_lpm_rcu_config rcu_cfg = {0};
-
-	if (rte_lcore_count() < 2) {
-		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
-		return TEST_SKIPPED;
-	}
-
-	num_cores = 0;
-	RTE_LCORE_FOREACH_WORKER(core_id) {
-		enabled_core_ids[num_cores] = core_id;
-		num_cores++;
-	}
-
-	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
-		num_cores);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	/* Init RCU variable */
-	sz = rte_rcu_qsbr_get_memsize(num_cores);
-	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-						RTE_CACHE_LINE_SIZE);
-	rte_rcu_qsbr_init(rv, num_cores);
-
-	rcu_cfg.v = rv;
-	/* Assign the RCU variable to LPM */
-	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
-		printf("RCU variable assignment failed\n");
-		goto error;
-	}
-
-	writer_done = 0;
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Measure add/delete. */
-	begin = rte_rdtsc_precise();
-	for (i = 0; i < RCU_ITERATIONS; i++) {
-		/* Add all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-					large_ldepth_route_table[j].depth,
-					next_hop_add) != 0) {
-				printf("Failed to add iteration %d, route# %d\n",
-					i, j);
-				goto error;
-			}
-
-		/* Delete all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-				large_ldepth_route_table[j].depth) != 0) {
-				printf("Failed to delete iteration %d, route# %d\n",
-					i, j);
+	for (j = 1; j < 3; j++) {
+		if (use_rcu)
+			printf("\nPerf test: %d writer(s), %d reader(s),"
+			       " RCU integration enabled\n", j, num_cores - j);
+		else
+			printf("\nPerf test: %d writer(s), %d reader(s),"
+			       " RCU integration disabled\n", j, num_cores - j);
+
+		/* Calculate writes by each writer */
+		single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j;
+
+		/* Create LPM table */
+		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
+		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
+		config.flags = 0;
+		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
+		TEST_LPM_ASSERT(lpm != NULL);
+
+		/* Init RCU variable */
+		if (use_rcu) {
+			sz = rte_rcu_qsbr_get_memsize(num_cores);
+			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
+							RTE_CACHE_LINE_SIZE);
+			rte_rcu_qsbr_init(rv, num_cores);
+
+			rcu_cfg.v = rv;
+			/* Assign the RCU variable to LPM */
+			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
+				printf("RCU variable assignment failed\n");
 				goto error;
 			}
-	}
-	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / TOTAL_WRITES);
+			reader_f = test_lpm_rcu_qsbr_reader;
+		} else
+			reader_f = test_lpm_reader;
 
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-	lpm = NULL;
-	rv = NULL;
+		writer_done = 0;
+		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
 
-	/* Test without RCU integration */
-	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
-		num_cores);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
+		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
-	writer_done = 0;
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
+		/* Launch reader threads */
+		for (i = j; i < num_cores; i++)
+			rte_eal_remote_launch(reader_f, NULL,
+						enabled_core_ids[i]);
 
-	/* Launch reader threads */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_reader, NULL,
-					enabled_core_ids[i]);
+		/* Launch writer threads */
+		for (i = 0; i < j; i++)
+			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
+						(void *)(uintptr_t)i,
+						enabled_core_ids[i]);
 
-	/* Measure add/delete. */
-	begin = rte_rdtsc_precise();
-	for (i = 0; i < RCU_ITERATIONS; i++) {
-		/* Add all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-					large_ldepth_route_table[j].depth,
-					next_hop_add) != 0) {
-				printf("Failed to add iteration %d, route# %d\n",
-					i, j);
+		/* Wait for writer threads */
+		for (i = 0; i < j; i++)
+			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 				goto error;
-			}
 
-		/* Delete all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-				large_ldepth_route_table[j].depth) != 0) {
-				printf("Failed to delete iteration %d, route# %d\n",
-					i, j);
-				goto error;
-			}
+		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
+		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
+			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+			/ TOTAL_WRITES);
+
+		writer_done = 1;
+		/* Wait until all readers have exited */
+		for (i = j; i < num_cores; i++)
+			rte_eal_wait_lcore(enabled_core_ids[i]);
+
+		rte_lpm_free(lpm);
+		rte_free(rv);
+		lpm = NULL;
+		rv = NULL;
 	}
-	total_cycles = rte_rdtsc_precise() - begin;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
 
 	return 0;
 
@@ -948,10 +757,10 @@ test_lpm_perf(void)
 	rte_lpm_delete_all(lpm);
 	rte_lpm_free(lpm);
 
-	if (test_lpm_rcu_perf() < 0)
+	if (test_lpm_rcu_perf_multi_writer(0) < 0)
 		return -1;
 
-	if (test_lpm_rcu_perf_multi_writer() < 0)
+	if (test_lpm_rcu_perf_multi_writer(1) < 0)
 		return -1;
 
 	return 0;
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
@ 2020-11-03 22:35         ` Honnappa Nagarahalli
  2020-11-04 15:46         ` Medvedkin, Vladimir
  1 sibling, 0 replies; 52+ messages in thread
From: Honnappa Nagarahalli @ 2020-11-03 22:35 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson, Vladimir Medvedkin
  Cc: dev, nd, Dharmik Thakkar, Honnappa Nagarahalli, nd
> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Dharmik Thakkar
> Sent: Tuesday, November 3, 2020 4:24 PM
> To: Bruce Richardson <bruce.richardson@intel.com>; Vladimir Medvedkin
> <vladimir.medvedkin@intel.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>; Dharmik Thakkar
> <Dharmik.Thakkar@arm.com>
> Subject: [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication in rcu
> qsbr perf
> 
> Avoid code duplication by combining single and multi threaded tests
> 
> Also, enable support for more than 2 writers
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Looks good
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
>  app/test/test_lpm_perf.c | 359 +++++++++------------------------------
>  1 file changed, 84 insertions(+), 275 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
> c8e70ec89ff5..a1485e74e77f 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -23,6 +23,7 @@ static struct rte_rcu_qsbr *rv;  static volatile uint8_t
> writer_done;  static volatile uint32_t thr_id;  static uint64_t gwrite_cycles;
> +static uint32_t single_insert;
>  /* LPM APIs are not thread safe, use mutex to provide thread safety */
> static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
> 
> @@ -430,24 +431,21 @@ test_lpm_rcu_qsbr_writer(void *arg)  {
>  	unsigned int i, j, si, ei;
>  	uint64_t begin, total_cycles;
> -	uint8_t core_id = (uint8_t)((uintptr_t)arg);
>  	uint32_t next_hop_add = 0xAA;
> +	bool single_writer = (single_insert ==
> NUM_LDEPTH_ROUTE_ENTRIES) ?
> +				true : false;
> +	uint8_t pos_core = (uint8_t)((uintptr_t)arg);
> 
> -	/* 2 writer threads are used */
> -	if (core_id % 2 == 0) {
> -		si = 0;
> -		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> -	} else {
> -		si = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> -		ei = NUM_LDEPTH_ROUTE_ENTRIES;
> -	}
> +	si = pos_core * single_insert;
> +	ei = si + single_insert;
> 
>  	/* Measure add/delete. */
>  	begin = rte_rdtsc_precise();
>  	for (i = 0; i < RCU_ITERATIONS; i++) {
>  		/* Add all the entries */
>  		for (j = si; j < ei; j++) {
> -			pthread_mutex_lock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_lock(&lpm_mutex);
>  			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>  					large_ldepth_route_table[j].depth,
>  					next_hop_add) != 0) {
> @@ -455,19 +453,22 @@ test_lpm_rcu_qsbr_writer(void *arg)
>  					i, j);
>  				goto error;
>  			}
> -			pthread_mutex_unlock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_unlock(&lpm_mutex);
>  		}
> 
>  		/* Delete all the entries */
>  		for (j = si; j < ei; j++) {
> -			pthread_mutex_lock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_lock(&lpm_mutex);
>  			if (rte_lpm_delete(lpm,
> large_ldepth_route_table[j].ip,
>  				large_ldepth_route_table[j].depth) != 0) {
>  				printf("Failed to delete iteration %d, route#
> %d\n",
>  					i, j);
>  				goto error;
>  			}
> -			pthread_mutex_unlock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_unlock(&lpm_mutex);
>  		}
>  	}
> 
> @@ -478,22 +479,24 @@ test_lpm_rcu_qsbr_writer(void *arg)
>  	return 0;
> 
>  error:
> -	pthread_mutex_unlock(&lpm_mutex);
> +	if (!single_writer)
> +		pthread_mutex_unlock(&lpm_mutex);
>  	return -1;
>  }
> 
>  /*
>   * Functional test:
> - * 2 writers, rest are readers
> + * 1/2 writers, rest are readers
>   */
>  static int
> -test_lpm_rcu_perf_multi_writer(void)
> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>  {
>  	struct rte_lpm_config config;
>  	size_t sz;
> -	unsigned int i;
> +	unsigned int i, j;
>  	uint16_t core_id;
>  	struct rte_lpm_rcu_config rcu_cfg = {0};
> +	int (*reader_f)(void *arg) = NULL;
> 
>  	if (rte_lcore_count() < 3) {
>  		printf("Not enough cores for lpm_rcu_perf_autotest,
> expecting at least 3\n"); @@ -506,273 +509,79 @@
> test_lpm_rcu_perf_multi_writer(void)
>  		num_cores++;
>  	}
> 
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration
> enabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> -		/ TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> -
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration
> disabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> -		/ TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -
> -	return 0;
> -
> -error:
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	rte_eal_mp_wait_lcore();
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -
> -	return -1;
> -}
> -
> -/*
> - * Functional test:
> - * Single writer, rest are readers
> - */
> -static int
> -test_lpm_rcu_perf(void)
> -{
> -	struct rte_lpm_config config;
> -	uint64_t begin, total_cycles;
> -	size_t sz;
> -	unsigned int i, j;
> -	uint16_t core_id;
> -	uint32_t next_hop_add = 0xAA;
> -	struct rte_lpm_rcu_config rcu_cfg = {0};
> -
> -	if (rte_lcore_count() < 2) {
> -		printf("Not enough cores for lpm_rcu_perf_autotest,
> expecting at least 2\n");
> -		return TEST_SKIPPED;
> -	}
> -
> -	num_cores = 0;
> -	RTE_LCORE_FOREACH_WORKER(core_id) {
> -		enabled_core_ids[num_cores] = core_id;
> -		num_cores++;
> -	}
> -
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
> -		num_cores);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route#
> %d\n",
> -					i, j);
> -				goto error;
> -			}
> -
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm,
> large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route#
> %d\n",
> -					i, j);
> +	for (j = 1; j < 3; j++) {
> +		if (use_rcu)
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration enabled\n", j, num_cores - j);
> +		else
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration disabled\n", j, num_cores - j);
> +
> +		/* Calculate writes by each writer */
> +		single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j;
> +
> +		/* Create LPM table */
> +		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.flags = 0;
> +		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> +		TEST_LPM_ASSERT(lpm != NULL);
> +
> +		/* Init RCU variable */
> +		if (use_rcu) {
> +			sz = rte_rcu_qsbr_get_memsize(num_cores);
> +			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> +
> 	RTE_CACHE_LINE_SIZE);
> +			rte_rcu_qsbr_init(rv, num_cores);
> +
> +			rcu_cfg.v = rv;
> +			/* Assign the RCU variable to LPM */
> +			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> +				printf("RCU variable assignment failed\n");
>  				goto error;
>  			}
> -	}
> -	total_cycles = rte_rdtsc_precise() - begin;
> 
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> +			reader_f = test_lpm_rcu_qsbr_reader;
> +		} else
> +			reader_f = test_lpm_reader;
> 
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> +		writer_done = 0;
> +		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> 
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration
> disabled\n",
> -		num_cores);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> +		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> 
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> +		/* Launch reader threads */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_remote_launch(reader_f, NULL,
> +						enabled_core_ids[i]);
> 
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> +		/* Launch writer threads */
> +		for (i = 0; i < j; i++)
> +			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> +						(void *)(uintptr_t)i,
> +						enabled_core_ids[i]);
> 
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route#
> %d\n",
> -					i, j);
> +		/* Wait for writer threads */
> +		for (i = 0; i < j; i++)
> +			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>  				goto error;
> -			}
> 
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm,
> large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route#
> %d\n",
> -					i, j);
> -				goto error;
> -			}
> +		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> +		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> +			__atomic_load_n(&gwrite_cycles,
> __ATOMIC_RELAXED)
> +			/ TOTAL_WRITES);
> +
> +		writer_done = 1;
> +		/* Wait until all readers have exited */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_wait_lcore(enabled_core_ids[i]);
> +
> +		rte_lpm_free(lpm);
> +		rte_free(rv);
> +		lpm = NULL;
> +		rv = NULL;
>  	}
> -	total_cycles = rte_rdtsc_precise() - begin;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> 
>  	return 0;
> 
> @@ -948,10 +757,10 @@ test_lpm_perf(void)
>  	rte_lpm_delete_all(lpm);
>  	rte_lpm_free(lpm);
> 
> -	if (test_lpm_rcu_perf() < 0)
> +	if (test_lpm_rcu_perf_multi_writer(0) < 0)
>  		return -1;
> 
> -	if (test_lpm_rcu_perf_multi_writer() < 0)
> +	if (test_lpm_rcu_perf_multi_writer(1) < 0)
>  		return -1;
> 
>  	return 0;
> --
> 2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
  2020-11-03 22:35         ` Honnappa Nagarahalli
@ 2020-11-04 15:46         ` Medvedkin, Vladimir
  2020-11-04 16:49           ` Dharmik Thakkar
  1 sibling, 1 reply; 52+ messages in thread
From: Medvedkin, Vladimir @ 2020-11-04 15:46 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson; +Cc: dev, nd
Hi Thakkar,
On 03/11/2020 22:23, Dharmik Thakkar wrote:
> Avoid code duplication by combining single and multi threaded tests
> 
> Also, enable support for more than 2 writers
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>   app/test/test_lpm_perf.c | 359 +++++++++------------------------------
>   1 file changed, 84 insertions(+), 275 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
> index c8e70ec89ff5..a1485e74e77f 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -23,6 +23,7 @@ static struct rte_rcu_qsbr *rv;
>   static volatile uint8_t writer_done;
>   static volatile uint32_t thr_id;
>   static uint64_t gwrite_cycles;
> +static uint32_t single_insert;
>   /* LPM APIs are not thread safe, use mutex to provide thread safety */
>   static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
>   
> @@ -430,24 +431,21 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   {
>   	unsigned int i, j, si, ei;
>   	uint64_t begin, total_cycles;
> -	uint8_t core_id = (uint8_t)((uintptr_t)arg);
>   	uint32_t next_hop_add = 0xAA;
> +	bool single_writer = (single_insert == NUM_LDEPTH_ROUTE_ENTRIES) ?
> +				true : false;
> +	uint8_t pos_core = (uint8_t)((uintptr_t)arg);
>   
> -	/* 2 writer threads are used */
> -	if (core_id % 2 == 0) {
> -		si = 0;
> -		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> -	} else {
> -		si = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> -		ei = NUM_LDEPTH_ROUTE_ENTRIES;
> -	}
> +	si = pos_core * single_insert;
> +	ei = si + single_insert;
> 
In this case, given that you are doing
           "single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j;"
below, the number of ldepth_routes must be a multiple of the number of 
writers, so some number of routes can be skipped in the opposite case. 
Consider something like:
number_of_writers = j;
...
si = (pos_core * NUM_LDEPTH_ROUTE_ENTRIES)/number_of_writers;
ei = ((pos_core + 1 ) * NUM_LDEPTH_ROUTE_ENTRIES)/number_of_writers;
>   	/* Measure add/delete. */
>   	begin = rte_rdtsc_precise();
>   	for (i = 0; i < RCU_ITERATIONS; i++) {
>   		/* Add all the entries */
>   		for (j = si; j < ei; j++) {
> -			pthread_mutex_lock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_lock(&lpm_mutex);
>   			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>   					large_ldepth_route_table[j].depth,
>   					next_hop_add) != 0) {
> @@ -455,19 +453,22 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   					i, j);
>   				goto error;
>   			}
> -			pthread_mutex_unlock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_unlock(&lpm_mutex);
>   		}
>   
>   		/* Delete all the entries */
>   		for (j = si; j < ei; j++) {
> -			pthread_mutex_lock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_lock(&lpm_mutex);
>   			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>   				large_ldepth_route_table[j].depth) != 0) {
>   				printf("Failed to delete iteration %d, route# %d\n",
>   					i, j);
>   				goto error;
>   			}
> -			pthread_mutex_unlock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_unlock(&lpm_mutex);
>   		}
>   	}
>   
> @@ -478,22 +479,24 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   	return 0;
>   
>   error:
> -	pthread_mutex_unlock(&lpm_mutex);
> +	if (!single_writer)
> +		pthread_mutex_unlock(&lpm_mutex);
>   	return -1;
>   }
>   
>   /*
>    * Functional test:
> - * 2 writers, rest are readers
> + * 1/2 writers, rest are readers
>    */
>   static int
> -test_lpm_rcu_perf_multi_writer(void)
> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>   {
>   	struct rte_lpm_config config;
>   	size_t sz;
> -	unsigned int i;
> +	unsigned int i, j;
>   	uint16_t core_id;
>   	struct rte_lpm_rcu_config rcu_cfg = {0};
> +	int (*reader_f)(void *arg) = NULL;
>   
>   	if (rte_lcore_count() < 3) {
>   		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
> @@ -506,273 +509,79 @@ test_lpm_rcu_perf_multi_writer(void)
>   		num_cores++;
>   	}
>   
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> -		/ TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> -
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> -		/ TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -
> -	return 0;
> -
> -error:
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	rte_eal_mp_wait_lcore();
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -
> -	return -1;
> -}
> -
> -/*
> - * Functional test:
> - * Single writer, rest are readers
> - */
> -static int
> -test_lpm_rcu_perf(void)
> -{
> -	struct rte_lpm_config config;
> -	uint64_t begin, total_cycles;
> -	size_t sz;
> -	unsigned int i, j;
> -	uint16_t core_id;
> -	uint32_t next_hop_add = 0xAA;
> -	struct rte_lpm_rcu_config rcu_cfg = {0};
> -
> -	if (rte_lcore_count() < 2) {
> -		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
> -		return TEST_SKIPPED;
> -	}
> -
> -	num_cores = 0;
> -	RTE_LCORE_FOREACH_WORKER(core_id) {
> -		enabled_core_ids[num_cores] = core_id;
> -		num_cores++;
> -	}
> -
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
> -		num_cores);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route# %d\n",
> -					i, j);
> -				goto error;
> -			}
> -
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route# %d\n",
> -					i, j);
> +	for (j = 1; j < 3; j++) {
> +		if (use_rcu)
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration enabled\n", j, num_cores - j);
> +		else
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration disabled\n", j, num_cores - j);
> +
> +		/* Calculate writes by each writer */
> +		single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j;
> +
> +		/* Create LPM table */
> +		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.flags = 0;
> +		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> +		TEST_LPM_ASSERT(lpm != NULL);
> +
> +		/* Init RCU variable */
> +		if (use_rcu) {
> +			sz = rte_rcu_qsbr_get_memsize(num_cores);
> +			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> +							RTE_CACHE_LINE_SIZE);
> +			rte_rcu_qsbr_init(rv, num_cores);
> +
> +			rcu_cfg.v = rv;
> +			/* Assign the RCU variable to LPM */
> +			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> +				printf("RCU variable assignment failed\n");
>   				goto error;
>   			}
> -	}
> -	total_cycles = rte_rdtsc_precise() - begin;
>   
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> +			reader_f = test_lpm_rcu_qsbr_reader;
> +		} else
> +			reader_f = test_lpm_reader;
>   
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> +		writer_done = 0;
> +		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>   
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
> -		num_cores);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> +		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>   
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> +		/* Launch reader threads */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_remote_launch(reader_f, NULL,
> +						enabled_core_ids[i]);
>   
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> +		/* Launch writer threads */
> +		for (i = 0; i < j; i++)
> +			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> +						(void *)(uintptr_t)i,
> +						enabled_core_ids[i]);
>   
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route# %d\n",
> -					i, j);
> +		/* Wait for writer threads */
> +		for (i = 0; i < j; i++)
> +			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>   				goto error;
> -			}
>   
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route# %d\n",
> -					i, j);
> -				goto error;
> -			}
> +		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> +		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> +			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> +			/ TOTAL_WRITES);
> +
> +		writer_done = 1;
> +		/* Wait until all readers have exited */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_wait_lcore(enabled_core_ids[i]);
> +
> +		rte_lpm_free(lpm);
> +		rte_free(rv);
> +		lpm = NULL;
> +		rv = NULL;
>   	}
> -	total_cycles = rte_rdtsc_precise() - begin;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
>   
>   	return 0;
>   
> @@ -948,10 +757,10 @@ test_lpm_perf(void)
>   	rte_lpm_delete_all(lpm);
>   	rte_lpm_free(lpm);
>   
> -	if (test_lpm_rcu_perf() < 0)
> +	if (test_lpm_rcu_perf_multi_writer(0) < 0)
>   		return -1;
>   
> -	if (test_lpm_rcu_perf_multi_writer() < 0)
> +	if (test_lpm_rcu_perf_multi_writer(1) < 0)
>   		return -1;
>   
>   	return 0;
> 
-- 
Regards,
Vladimir
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-04 15:46         ` Medvedkin, Vladimir
@ 2020-11-04 16:49           ` Dharmik Thakkar
  0 siblings, 0 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-04 16:49 UTC (permalink / raw)
  To: Medvedkin, Vladimir; +Cc: Bruce Richardson, dev, nd
> On Nov 4, 2020, at 9:46 AM, Medvedkin, Vladimir <vladimir.medvedkin@intel.com> wrote:
> 
> Hi Thakkar,
> 
> On 03/11/2020 22:23, Dharmik Thakkar wrote:
>> Avoid code duplication by combining single and multi threaded tests
>> Also, enable support for more than 2 writers
>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>> ---
>>  app/test/test_lpm_perf.c | 359 +++++++++------------------------------
>>  1 file changed, 84 insertions(+), 275 deletions(-)
>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
>> index c8e70ec89ff5..a1485e74e77f 100644
>> --- a/app/test/test_lpm_perf.c
>> +++ b/app/test/test_lpm_perf.c
>> @@ -23,6 +23,7 @@ static struct rte_rcu_qsbr *rv;
>>  static volatile uint8_t writer_done;
>>  static volatile uint32_t thr_id;
>>  static uint64_t gwrite_cycles;
>> +static uint32_t single_insert;
>>  /* LPM APIs are not thread safe, use mutex to provide thread safety */
>>  static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
>>  @@ -430,24 +431,21 @@ test_lpm_rcu_qsbr_writer(void *arg)
>>  {
>>  	unsigned int i, j, si, ei;
>>  	uint64_t begin, total_cycles;
>> -	uint8_t core_id = (uint8_t)((uintptr_t)arg);
>>  	uint32_t next_hop_add = 0xAA;
>> +	bool single_writer = (single_insert == NUM_LDEPTH_ROUTE_ENTRIES) ?
>> +				true : false;
>> +	uint8_t pos_core = (uint8_t)((uintptr_t)arg);
>>  -	/* 2 writer threads are used */
>> -	if (core_id % 2 == 0) {
>> -		si = 0;
>> -		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
>> -	} else {
>> -		si = NUM_LDEPTH_ROUTE_ENTRIES / 2;
>> -		ei = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	}
>> +	si = pos_core * single_insert;
>> +	ei = si + single_insert;
> 
> In this case, given that you are doing
>          "single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j;"
> below, the number of ldepth_routes must be a multiple of the number of writers, so some number of routes can be skipped in the opposite case. Consider something like:
> 
> number_of_writers = j;
> ...
> si = (pos_core * NUM_LDEPTH_ROUTE_ENTRIES)/number_of_writers;
> ei = ((pos_core + 1 ) * NUM_LDEPTH_ROUTE_ENTRIES)/number_of_writers;
> 
Yes, agreed some routes can be skipped. I will update the patch with the above changes. Thanks!
> 
>>  	/* Measure add/delete. */
>>  	begin = rte_rdtsc_precise();
>>  	for (i = 0; i < RCU_ITERATIONS; i++) {
>>  		/* Add all the entries */
>>  		for (j = si; j < ei; j++) {
>> -			pthread_mutex_lock(&lpm_mutex);
>> +			if (!single_writer)
>> +				pthread_mutex_lock(&lpm_mutex);
>>  			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>>  					large_ldepth_route_table[j].depth,
>>  					next_hop_add) != 0) {
>> @@ -455,19 +453,22 @@ test_lpm_rcu_qsbr_writer(void *arg)
>>  					i, j);
>>  				goto error;
>>  			}
>> -			pthread_mutex_unlock(&lpm_mutex);
>> +			if (!single_writer)
>> +				pthread_mutex_unlock(&lpm_mutex);
>>  		}
>>    		/* Delete all the entries */
>>  		for (j = si; j < ei; j++) {
>> -			pthread_mutex_lock(&lpm_mutex);
>> +			if (!single_writer)
>> +				pthread_mutex_lock(&lpm_mutex);
>>  			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>>  				large_ldepth_route_table[j].depth) != 0) {
>>  				printf("Failed to delete iteration %d, route# %d\n",
>>  					i, j);
>>  				goto error;
>>  			}
>> -			pthread_mutex_unlock(&lpm_mutex);
>> +			if (!single_writer)
>> +				pthread_mutex_unlock(&lpm_mutex);
>>  		}
>>  	}
>>  @@ -478,22 +479,24 @@ test_lpm_rcu_qsbr_writer(void *arg)
>>  	return 0;
>>    error:
>> -	pthread_mutex_unlock(&lpm_mutex);
>> +	if (!single_writer)
>> +		pthread_mutex_unlock(&lpm_mutex);
>>  	return -1;
>>  }
>>    /*
>>   * Functional test:
>> - * 2 writers, rest are readers
>> + * 1/2 writers, rest are readers
>>   */
>>  static int
>> -test_lpm_rcu_perf_multi_writer(void)
>> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>>  {
>>  	struct rte_lpm_config config;
>>  	size_t sz;
>> -	unsigned int i;
>> +	unsigned int i, j;
>>  	uint16_t core_id;
>>  	struct rte_lpm_rcu_config rcu_cfg = {0};
>> +	int (*reader_f)(void *arg) = NULL;
>>    	if (rte_lcore_count() < 3) {
>>  		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
>> @@ -506,273 +509,79 @@ test_lpm_rcu_perf_multi_writer(void)
>>  		num_cores++;
>>  	}
>>  -	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
>> -		num_cores - 2);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	/* Init RCU variable */
>> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
>> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> -						RTE_CACHE_LINE_SIZE);
>> -	rte_rcu_qsbr_init(rv, num_cores);
>> -
>> -	rcu_cfg.v = rv;
>> -	/* Assign the RCU variable to LPM */
>> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> -		printf("RCU variable assignment failed\n");
>> -		goto error;
>> -	}
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>> -
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Launch writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> -					(void *)(uintptr_t)i,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Wait for writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> -			goto error;
>> -
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> -		/ TOTAL_WRITES);
>> -
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>> -	rte_free(rv);
>> -	lpm = NULL;
>> -	rv = NULL;
>> -
>> -	/* Test without RCU integration */
>> -	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
>> -		num_cores - 2);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Launch writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> -					(void *)(uintptr_t)i,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Wait for writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> -			goto error;
>> -
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> -		/ TOTAL_WRITES);
>> -
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>> -
>> -	return 0;
>> -
>> -error:
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	rte_eal_mp_wait_lcore();
>> -
>> -	rte_lpm_free(lpm);
>> -	rte_free(rv);
>> -
>> -	return -1;
>> -}
>> -
>> -/*
>> - * Functional test:
>> - * Single writer, rest are readers
>> - */
>> -static int
>> -test_lpm_rcu_perf(void)
>> -{
>> -	struct rte_lpm_config config;
>> -	uint64_t begin, total_cycles;
>> -	size_t sz;
>> -	unsigned int i, j;
>> -	uint16_t core_id;
>> -	uint32_t next_hop_add = 0xAA;
>> -	struct rte_lpm_rcu_config rcu_cfg = {0};
>> -
>> -	if (rte_lcore_count() < 2) {
>> -		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
>> -		return TEST_SKIPPED;
>> -	}
>> -
>> -	num_cores = 0;
>> -	RTE_LCORE_FOREACH_WORKER(core_id) {
>> -		enabled_core_ids[num_cores] = core_id;
>> -		num_cores++;
>> -	}
>> -
>> -	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
>> -		num_cores);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	/* Init RCU variable */
>> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
>> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> -						RTE_CACHE_LINE_SIZE);
>> -	rte_rcu_qsbr_init(rv, num_cores);
>> -
>> -	rcu_cfg.v = rv;
>> -	/* Assign the RCU variable to LPM */
>> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> -		printf("RCU variable assignment failed\n");
>> -		goto error;
>> -	}
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Measure add/delete. */
>> -	begin = rte_rdtsc_precise();
>> -	for (i = 0; i < RCU_ITERATIONS; i++) {
>> -		/* Add all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>> -					large_ldepth_route_table[j].depth,
>> -					next_hop_add) != 0) {
>> -				printf("Failed to add iteration %d, route# %d\n",
>> -					i, j);
>> -				goto error;
>> -			}
>> -
>> -		/* Delete all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>> -				large_ldepth_route_table[j].depth) != 0) {
>> -				printf("Failed to delete iteration %d, route# %d\n",
>> -					i, j);
>> +	for (j = 1; j < 3; j++) {
>> +		if (use_rcu)
>> +			printf("\nPerf test: %d writer(s), %d reader(s),"
>> +			       " RCU integration enabled\n", j, num_cores - j);
>> +		else
>> +			printf("\nPerf test: %d writer(s), %d reader(s),"
>> +			       " RCU integration disabled\n", j, num_cores - j);
>> +
>> +		/* Calculate writes by each writer */
>> +		single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j;
>> +
>> +		/* Create LPM table */
>> +		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> +		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> +		config.flags = 0;
>> +		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> +		TEST_LPM_ASSERT(lpm != NULL);
>> +
>> +		/* Init RCU variable */
>> +		if (use_rcu) {
>> +			sz = rte_rcu_qsbr_get_memsize(num_cores);
>> +			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> +							RTE_CACHE_LINE_SIZE);
>> +			rte_rcu_qsbr_init(rv, num_cores);
>> +
>> +			rcu_cfg.v = rv;
>> +			/* Assign the RCU variable to LPM */
>> +			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> +				printf("RCU variable assignment failed\n");
>>  				goto error;
>>  			}
>> -	}
>> -	total_cycles = rte_rdtsc_precise() - begin;
>>  -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %g cycles\n",
>> -		(double)total_cycles / TOTAL_WRITES);
>> +			reader_f = test_lpm_rcu_qsbr_reader;
>> +		} else
>> +			reader_f = test_lpm_reader;
>>  -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>> -	rte_free(rv);
>> -	lpm = NULL;
>> -	rv = NULL;
>> +		writer_done = 0;
>> +		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>  -	/* Test without RCU integration */
>> -	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
>> -		num_cores);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> +		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>  -	writer_done = 0;
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> +		/* Launch reader threads */
>> +		for (i = j; i < num_cores; i++)
>> +			rte_eal_remote_launch(reader_f, NULL,
>> +						enabled_core_ids[i]);
>>  -	/* Launch reader threads */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_reader, NULL,
>> -					enabled_core_ids[i]);
>> +		/* Launch writer threads */
>> +		for (i = 0; i < j; i++)
>> +			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> +						(void *)(uintptr_t)i,
>> +						enabled_core_ids[i]);
>>  -	/* Measure add/delete. */
>> -	begin = rte_rdtsc_precise();
>> -	for (i = 0; i < RCU_ITERATIONS; i++) {
>> -		/* Add all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>> -					large_ldepth_route_table[j].depth,
>> -					next_hop_add) != 0) {
>> -				printf("Failed to add iteration %d, route# %d\n",
>> -					i, j);
>> +		/* Wait for writer threads */
>> +		for (i = 0; i < j; i++)
>> +			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>>  				goto error;
>> -			}
>>  -		/* Delete all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>> -				large_ldepth_route_table[j].depth) != 0) {
>> -				printf("Failed to delete iteration %d, route# %d\n",
>> -					i, j);
>> -				goto error;
>> -			}
>> +		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> +		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> +		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> +			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> +			/ TOTAL_WRITES);
>> +
>> +		writer_done = 1;
>> +		/* Wait until all readers have exited */
>> +		for (i = j; i < num_cores; i++)
>> +			rte_eal_wait_lcore(enabled_core_ids[i]);
>> +
>> +		rte_lpm_free(lpm);
>> +		rte_free(rv);
>> +		lpm = NULL;
>> +		rv = NULL;
>>  	}
>> -	total_cycles = rte_rdtsc_precise() - begin;
>> -
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %g cycles\n",
>> -		(double)total_cycles / TOTAL_WRITES);
>> -
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>>    	return 0;
>>  @@ -948,10 +757,10 @@ test_lpm_perf(void)
>>  	rte_lpm_delete_all(lpm);
>>  	rte_lpm_free(lpm);
>>  -	if (test_lpm_rcu_perf() < 0)
>> +	if (test_lpm_rcu_perf_multi_writer(0) < 0)
>>  		return -1;
>>  -	if (test_lpm_rcu_perf_multi_writer() < 0)
>> +	if (test_lpm_rcu_perf_multi_writer(1) < 0)
>>  		return -1;
>>    	return 0;
> 
> -- 
> Regards,
> Vladimir
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test
  2020-11-03 22:23     ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
                         ` (3 preceding siblings ...)
  2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
@ 2020-11-04 18:58       ` Dharmik Thakkar
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
                           ` (4 more replies)
  4 siblings, 5 replies; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-04 18:58 UTC (permalink / raw)
  Cc: dev, nd, Dharmik Thakkar
Fix LPM adds, LPM deletes, and cycle calculation.
Return error if LPM add/delete fails in multi-writer test.
Return error if single or multi writer test fails
Remove redundant error checking for readers.
Combine single and multi threaded test cases to avoid code duplication.
---
v5:
 - Update logic for routes inserted by each writer in multi writer
   test to avoid possibility of routes getting skipped
v4:
 - Return error if rcu qsbr test fails
 - Improve multi writer test to enable more than 2 writers
v3:
 - Add 'goto error'
 - Remove unnecessary if statement
v2:
 - Add more details about the fix to the commit message
 - Replace hard coded values with an enum
 - Remove lock acquire/release for single writer
Dharmik Thakkar (4):
  test/lpm: fix cycle calculation in rcu qsbr perf
  test/lpm: return error on failure in rcu qsbr perf
  test/lpm: remove error checking in rcu qsbr perf
  test/lpm: avoid code duplication in rcu qsbr perf
 app/test/test_lpm_perf.c | 380 +++++++++------------------------------
 1 file changed, 88 insertions(+), 292 deletions(-)
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v5 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
  2020-11-04 18:58       ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
@ 2020-11-04 18:58         ` Dharmik Thakkar
  2020-11-04 19:34           ` Medvedkin, Vladimir
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 2/4] test/lpm: return error on failure " Dharmik Thakkar
                           ` (3 subsequent siblings)
  4 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-04 18:58 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Gavin Hu, Ruifeng Wang,
	Honnappa Nagarahalli
  Cc: dev, nd, Dharmik Thakkar, stable
Fix incorrect calculations for LPM adds, LPM deletes,
and average cycles in RCU QSBR perf tests
Since, rcu qsbr tests run for 'RCU_ITERATIONS' and not
'ITERATIONS', replace 'ITERATIONS' with 'RCU_ITERATIONS'
for calculating adds, deletes, and cycles.
Also, for multi-writer perf test, each writer only writes
half of NUM_LDEPTH_ROUTE_ENTRIES.
For 2 writers, total adds (or deletes) should be
(RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of
(2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES).
Since, for both the single and multi writer tests, total adds/deletes
is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES),
this has been replaced with a macro 'TOTAL_WRITES' and furthermore,
'g_writes' has been removed since it is always a fixed value
equal to TOTAL_WRITES.
Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org
Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 app/test/test_lpm_perf.c | 45 ++++++++++++++--------------------------
 1 file changed, 16 insertions(+), 29 deletions(-)
diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index c5a238b9d1e8..45164b23214b 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -23,7 +23,6 @@ static struct rte_rcu_qsbr *rv;
 static volatile uint8_t writer_done;
 static volatile uint32_t thr_id;
 static uint64_t gwrite_cycles;
-static uint64_t gwrites;
 /* LPM APIs are not thread safe, use mutex to provide thread safety */
 static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
 
@@ -60,6 +59,8 @@ static uint32_t num_ldepth_route_entries;
 #define NUM_ROUTE_ENTRIES num_route_entries
 #define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries
 
+#define TOTAL_WRITES (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES)
+
 enum {
 	IP_CLASS_A,
 	IP_CLASS_B,
@@ -432,7 +433,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	uint8_t core_id = (uint8_t)((uintptr_t)arg);
 	uint32_t next_hop_add = 0xAA;
 
-	RTE_SET_USED(arg);
 	/* 2 writer threads are used */
 	if (core_id % 2 == 0) {
 		si = 0;
@@ -472,9 +472,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	total_cycles = rte_rdtsc_precise() - begin;
 
 	__atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED);
-	__atomic_fetch_add(&gwrites,
-			2 * NUM_LDEPTH_ROUTE_ENTRIES * RCU_ITERATIONS,
-			__ATOMIC_RELAXED);
 
 	return 0;
 }
@@ -528,7 +525,6 @@ test_lpm_rcu_perf_multi_writer(void)
 
 	writer_done = 0;
 	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
 
 	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
@@ -548,14 +544,11 @@ test_lpm_rcu_perf_multi_writer(void)
 		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 			goto error;
 
-	printf("Total LPM Adds: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
-			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
-		);
+		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+		/ TOTAL_WRITES);
 
 	/* Wait and check return value from reader threads */
 	writer_done = 1;
@@ -581,7 +574,6 @@ test_lpm_rcu_perf_multi_writer(void)
 
 	writer_done = 0;
 	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
 	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
 	/* Launch reader threads */
@@ -600,14 +592,11 @@ test_lpm_rcu_perf_multi_writer(void)
 		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 			goto error;
 
-	printf("Total LPM Adds: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
-			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
-		);
+		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+		/ TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
@@ -711,11 +700,10 @@ test_lpm_rcu_perf(void)
 	}
 	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
+		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
@@ -771,11 +759,10 @@ test_lpm_rcu_perf(void)
 	}
 	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
-	printf("Total LPM Deletes: %d\n",
-		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
+	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
 	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
+		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
 	/* Wait and check return value from reader threads */
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v5 2/4] test/lpm: return error on failure in rcu qsbr perf
  2020-11-04 18:58       ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
@ 2020-11-04 18:58         ` Dharmik Thakkar
  2020-11-04 19:35           ` Medvedkin, Vladimir
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 3/4] test/lpm: remove error checking " Dharmik Thakkar
                           ` (2 subsequent siblings)
  4 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-04 18:58 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Gavin Hu,
	Honnappa Nagarahalli, Ruifeng Wang
  Cc: dev, nd, Dharmik Thakkar, stable
Return error if Add/Delete fail in multiwriter perf test
Return error if single or multi writer test fails
Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org
Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 app/test/test_lpm_perf.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 45164b23214b..873ecf511c97 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -453,6 +453,7 @@ test_lpm_rcu_qsbr_writer(void *arg)
 					next_hop_add) != 0) {
 				printf("Failed to add iteration %d, route# %d\n",
 					i, j);
+				goto error;
 			}
 			pthread_mutex_unlock(&lpm_mutex);
 		}
@@ -464,6 +465,7 @@ test_lpm_rcu_qsbr_writer(void *arg)
 				large_ldepth_route_table[j].depth) != 0) {
 				printf("Failed to delete iteration %d, route# %d\n",
 					i, j);
+				goto error;
 			}
 			pthread_mutex_unlock(&lpm_mutex);
 		}
@@ -474,6 +476,10 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	__atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED);
 
 	return 0;
+
+error:
+	pthread_mutex_unlock(&lpm_mutex);
+	return -1;
 }
 
 /*
@@ -947,9 +953,11 @@ test_lpm_perf(void)
 	rte_lpm_delete_all(lpm);
 	rte_lpm_free(lpm);
 
-	test_lpm_rcu_perf();
+	if (test_lpm_rcu_perf() < 0)
+		return -1;
 
-	test_lpm_rcu_perf_multi_writer();
+	if (test_lpm_rcu_perf_multi_writer() < 0)
+		return -1;
 
 	return 0;
 }
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v5 3/4] test/lpm: remove error checking in rcu qsbr perf
  2020-11-04 18:58       ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 2/4] test/lpm: return error on failure " Dharmik Thakkar
@ 2020-11-04 18:58         ` Dharmik Thakkar
  2020-11-04 19:35           ` Medvedkin, Vladimir
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
  2020-11-05 15:58         ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test David Marchand
  4 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-04 18:58 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin, Honnappa Nagarahalli,
	Ruifeng Wang, Gavin Hu
  Cc: dev, nd, Dharmik Thakkar, stable
Remove redundant error checking for reader threads
since they never return error.
Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
Cc: honnappa.nagarahalli@arm.com
Cc: stable@dpdk.org
Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 app/test/test_lpm_perf.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)
diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index 873ecf511c97..c8e70ec89ff5 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -556,11 +556,10 @@ test_lpm_rcu_perf_multi_writer(void)
 		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
 		/ TOTAL_WRITES);
 
-	/* Wait and check return value from reader threads */
 	writer_done = 1;
+	/* Wait until all readers have exited */
 	for (i = 2; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 	rte_free(rv);
@@ -605,10 +604,9 @@ test_lpm_rcu_perf_multi_writer(void)
 		/ TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 2; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 
@@ -712,10 +710,9 @@ test_lpm_rcu_perf(void)
 		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 	rte_free(rv);
@@ -771,11 +768,9 @@ test_lpm_rcu_perf(void)
 		(double)total_cycles / TOTAL_WRITES);
 
 	writer_done = 1;
-	/* Wait and check return value from reader threads */
+	/* Wait until all readers have exited */
 	for (i = 0; i < num_cores; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			printf("Warning: lcore %u not finished.\n",
-				enabled_core_ids[i]);
+		rte_eal_wait_lcore(enabled_core_ids[i]);
 
 	rte_lpm_free(lpm);
 
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* [dpdk-dev] [PATCH v5 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-04 18:58       ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
                           ` (2 preceding siblings ...)
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 3/4] test/lpm: remove error checking " Dharmik Thakkar
@ 2020-11-04 18:58         ` Dharmik Thakkar
  2020-11-04 19:35           ` Medvedkin, Vladimir
  2020-11-05 15:58         ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test David Marchand
  4 siblings, 1 reply; 52+ messages in thread
From: Dharmik Thakkar @ 2020-11-04 18:58 UTC (permalink / raw)
  To: Bruce Richardson, Vladimir Medvedkin; +Cc: dev, nd, Dharmik Thakkar
Avoid code duplication by combining single and multi threaded tests
Also, enable support for more than 2 writers
Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 app/test/test_lpm_perf.c | 356 +++++++++------------------------------
 1 file changed, 81 insertions(+), 275 deletions(-)
diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index c8e70ec89ff5..2bed00d0648f 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -23,6 +23,7 @@ static struct rte_rcu_qsbr *rv;
 static volatile uint8_t writer_done;
 static volatile uint32_t thr_id;
 static uint64_t gwrite_cycles;
+static uint32_t num_writers;
 /* LPM APIs are not thread safe, use mutex to provide thread safety */
 static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
 
@@ -430,24 +431,19 @@ test_lpm_rcu_qsbr_writer(void *arg)
 {
 	unsigned int i, j, si, ei;
 	uint64_t begin, total_cycles;
-	uint8_t core_id = (uint8_t)((uintptr_t)arg);
 	uint32_t next_hop_add = 0xAA;
+	uint8_t pos_core = (uint8_t)((uintptr_t)arg);
 
-	/* 2 writer threads are used */
-	if (core_id % 2 == 0) {
-		si = 0;
-		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
-	} else {
-		si = NUM_LDEPTH_ROUTE_ENTRIES / 2;
-		ei = NUM_LDEPTH_ROUTE_ENTRIES;
-	}
+	si = (pos_core * NUM_LDEPTH_ROUTE_ENTRIES) / num_writers;
+	ei = ((pos_core + 1) * NUM_LDEPTH_ROUTE_ENTRIES) / num_writers;
 
 	/* Measure add/delete. */
 	begin = rte_rdtsc_precise();
 	for (i = 0; i < RCU_ITERATIONS; i++) {
 		/* Add all the entries */
 		for (j = si; j < ei; j++) {
-			pthread_mutex_lock(&lpm_mutex);
+			if (num_writers > 1)
+				pthread_mutex_lock(&lpm_mutex);
 			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
 					large_ldepth_route_table[j].depth,
 					next_hop_add) != 0) {
@@ -455,19 +451,22 @@ test_lpm_rcu_qsbr_writer(void *arg)
 					i, j);
 				goto error;
 			}
-			pthread_mutex_unlock(&lpm_mutex);
+			if (num_writers > 1)
+				pthread_mutex_unlock(&lpm_mutex);
 		}
 
 		/* Delete all the entries */
 		for (j = si; j < ei; j++) {
-			pthread_mutex_lock(&lpm_mutex);
+			if (num_writers > 1)
+				pthread_mutex_lock(&lpm_mutex);
 			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
 				large_ldepth_route_table[j].depth) != 0) {
 				printf("Failed to delete iteration %d, route# %d\n",
 					i, j);
 				goto error;
 			}
-			pthread_mutex_unlock(&lpm_mutex);
+			if (num_writers > 1)
+				pthread_mutex_unlock(&lpm_mutex);
 		}
 	}
 
@@ -478,22 +477,24 @@ test_lpm_rcu_qsbr_writer(void *arg)
 	return 0;
 
 error:
-	pthread_mutex_unlock(&lpm_mutex);
+	if (num_writers > 1)
+		pthread_mutex_unlock(&lpm_mutex);
 	return -1;
 }
 
 /*
  * Functional test:
- * 2 writers, rest are readers
+ * 1/2 writers, rest are readers
  */
 static int
-test_lpm_rcu_perf_multi_writer(void)
+test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
 {
 	struct rte_lpm_config config;
 	size_t sz;
-	unsigned int i;
+	unsigned int i, j;
 	uint16_t core_id;
 	struct rte_lpm_rcu_config rcu_cfg = {0};
+	int (*reader_f)(void *arg) = NULL;
 
 	if (rte_lcore_count() < 3) {
 		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
@@ -506,273 +507,78 @@ test_lpm_rcu_perf_multi_writer(void)
 		num_cores++;
 	}
 
-	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
-		num_cores - 2);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	/* Init RCU variable */
-	sz = rte_rcu_qsbr_get_memsize(num_cores);
-	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-						RTE_CACHE_LINE_SIZE);
-	rte_rcu_qsbr_init(rv, num_cores);
-
-	rcu_cfg.v = rv;
-	/* Assign the RCU variable to LPM */
-	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
-		printf("RCU variable assignment failed\n");
-		goto error;
-	}
-
-	writer_done = 0;
-	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Launch writer threads */
-	for (i = 0; i < 2; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-					(void *)(uintptr_t)i,
-					enabled_core_ids[i]);
-
-	/* Wait for writer threads */
-	for (i = 0; i < 2; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
-		/ TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-	lpm = NULL;
-	rv = NULL;
-
-	/* Test without RCU integration */
-	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
-		num_cores - 2);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	writer_done = 0;
-	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Launch writer threads */
-	for (i = 0; i < 2; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-					(void *)(uintptr_t)i,
-					enabled_core_ids[i]);
-
-	/* Wait for writer threads */
-	for (i = 0; i < 2; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
-		/ TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-
-	return 0;
-
-error:
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	rte_eal_mp_wait_lcore();
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-
-	return -1;
-}
-
-/*
- * Functional test:
- * Single writer, rest are readers
- */
-static int
-test_lpm_rcu_perf(void)
-{
-	struct rte_lpm_config config;
-	uint64_t begin, total_cycles;
-	size_t sz;
-	unsigned int i, j;
-	uint16_t core_id;
-	uint32_t next_hop_add = 0xAA;
-	struct rte_lpm_rcu_config rcu_cfg = {0};
-
-	if (rte_lcore_count() < 2) {
-		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
-		return TEST_SKIPPED;
-	}
-
-	num_cores = 0;
-	RTE_LCORE_FOREACH_WORKER(core_id) {
-		enabled_core_ids[num_cores] = core_id;
-		num_cores++;
-	}
-
-	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
-		num_cores);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	/* Init RCU variable */
-	sz = rte_rcu_qsbr_get_memsize(num_cores);
-	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-						RTE_CACHE_LINE_SIZE);
-	rte_rcu_qsbr_init(rv, num_cores);
-
-	rcu_cfg.v = rv;
-	/* Assign the RCU variable to LPM */
-	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
-		printf("RCU variable assignment failed\n");
-		goto error;
-	}
-
-	writer_done = 0;
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Measure add/delete. */
-	begin = rte_rdtsc_precise();
-	for (i = 0; i < RCU_ITERATIONS; i++) {
-		/* Add all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-					large_ldepth_route_table[j].depth,
-					next_hop_add) != 0) {
-				printf("Failed to add iteration %d, route# %d\n",
-					i, j);
-				goto error;
-			}
-
-		/* Delete all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-				large_ldepth_route_table[j].depth) != 0) {
-				printf("Failed to delete iteration %d, route# %d\n",
-					i, j);
+	for (j = 1; j < 3; j++) {
+		if (use_rcu)
+			printf("\nPerf test: %d writer(s), %d reader(s),"
+			       " RCU integration enabled\n", j, num_cores - j);
+		else
+			printf("\nPerf test: %d writer(s), %d reader(s),"
+			       " RCU integration disabled\n", j, num_cores - j);
+
+		num_writers = j;
+
+		/* Create LPM table */
+		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
+		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
+		config.flags = 0;
+		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
+		TEST_LPM_ASSERT(lpm != NULL);
+
+		/* Init RCU variable */
+		if (use_rcu) {
+			sz = rte_rcu_qsbr_get_memsize(num_cores);
+			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
+							RTE_CACHE_LINE_SIZE);
+			rte_rcu_qsbr_init(rv, num_cores);
+
+			rcu_cfg.v = rv;
+			/* Assign the RCU variable to LPM */
+			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
+				printf("RCU variable assignment failed\n");
 				goto error;
 			}
-	}
-	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / TOTAL_WRITES);
+			reader_f = test_lpm_rcu_qsbr_reader;
+		} else
+			reader_f = test_lpm_reader;
 
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-	lpm = NULL;
-	rv = NULL;
+		writer_done = 0;
+		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
 
-	/* Test without RCU integration */
-	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
-		num_cores);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
+		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
-	writer_done = 0;
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
+		/* Launch reader threads */
+		for (i = j; i < num_cores; i++)
+			rte_eal_remote_launch(reader_f, NULL,
+						enabled_core_ids[i]);
 
-	/* Launch reader threads */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_reader, NULL,
-					enabled_core_ids[i]);
+		/* Launch writer threads */
+		for (i = 0; i < j; i++)
+			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
+						(void *)(uintptr_t)i,
+						enabled_core_ids[i]);
 
-	/* Measure add/delete. */
-	begin = rte_rdtsc_precise();
-	for (i = 0; i < RCU_ITERATIONS; i++) {
-		/* Add all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-					large_ldepth_route_table[j].depth,
-					next_hop_add) != 0) {
-				printf("Failed to add iteration %d, route# %d\n",
-					i, j);
+		/* Wait for writer threads */
+		for (i = 0; i < j; i++)
+			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 				goto error;
-			}
 
-		/* Delete all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-				large_ldepth_route_table[j].depth) != 0) {
-				printf("Failed to delete iteration %d, route# %d\n",
-					i, j);
-				goto error;
-			}
+		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
+		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
+			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+			/ TOTAL_WRITES);
+
+		writer_done = 1;
+		/* Wait until all readers have exited */
+		for (i = j; i < num_cores; i++)
+			rte_eal_wait_lcore(enabled_core_ids[i]);
+
+		rte_lpm_free(lpm);
+		rte_free(rv);
+		lpm = NULL;
+		rv = NULL;
 	}
-	total_cycles = rte_rdtsc_precise() - begin;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
 
 	return 0;
 
@@ -948,10 +754,10 @@ test_lpm_perf(void)
 	rte_lpm_delete_all(lpm);
 	rte_lpm_free(lpm);
 
-	if (test_lpm_rcu_perf() < 0)
+	if (test_lpm_rcu_perf_multi_writer(0) < 0)
 		return -1;
 
-	if (test_lpm_rcu_perf_multi_writer() < 0)
+	if (test_lpm_rcu_perf_multi_writer(1) < 0)
 		return -1;
 
 	return 0;
-- 
2.17.1
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v5 1/4] test/lpm: fix cycle calculation in rcu qsbr perf
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
@ 2020-11-04 19:34           ` Medvedkin, Vladimir
  0 siblings, 0 replies; 52+ messages in thread
From: Medvedkin, Vladimir @ 2020-11-04 19:34 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson, Gavin Hu, Ruifeng Wang,
	Honnappa Nagarahalli
  Cc: dev, nd, stable
On 04/11/2020 18:58, Dharmik Thakkar wrote:
> Fix incorrect calculations for LPM adds, LPM deletes,
> and average cycles in RCU QSBR perf tests
> 
> Since, rcu qsbr tests run for 'RCU_ITERATIONS' and not
> 'ITERATIONS', replace 'ITERATIONS' with 'RCU_ITERATIONS'
> for calculating adds, deletes, and cycles.
> 
> Also, for multi-writer perf test, each writer only writes
> half of NUM_LDEPTH_ROUTE_ENTRIES.
> For 2 writers, total adds (or deletes) should be
> (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES) instead of
> (2 * RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES).
> 
> Since, for both the single and multi writer tests, total adds/deletes
> is equal to (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES),
> this has been replaced with a macro 'TOTAL_WRITES' and furthermore,
> 'g_writes' has been removed since it is always a fixed value
> equal to TOTAL_WRITES.
> 
> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
> Cc: honnappa.nagarahalli@arm.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
>   app/test/test_lpm_perf.c | 45 ++++++++++++++--------------------------
>   1 file changed, 16 insertions(+), 29 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
> index c5a238b9d1e8..45164b23214b 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -23,7 +23,6 @@ static struct rte_rcu_qsbr *rv;
>   static volatile uint8_t writer_done;
>   static volatile uint32_t thr_id;
>   static uint64_t gwrite_cycles;
> -static uint64_t gwrites;
>   /* LPM APIs are not thread safe, use mutex to provide thread safety */
>   static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
>   
> @@ -60,6 +59,8 @@ static uint32_t num_ldepth_route_entries;
>   #define NUM_ROUTE_ENTRIES num_route_entries
>   #define NUM_LDEPTH_ROUTE_ENTRIES num_ldepth_route_entries
>   
> +#define TOTAL_WRITES (RCU_ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES)
> +
>   enum {
>   	IP_CLASS_A,
>   	IP_CLASS_B,
> @@ -432,7 +433,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   	uint8_t core_id = (uint8_t)((uintptr_t)arg);
>   	uint32_t next_hop_add = 0xAA;
>   
> -	RTE_SET_USED(arg);
>   	/* 2 writer threads are used */
>   	if (core_id % 2 == 0) {
>   		si = 0;
> @@ -472,9 +472,6 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   	total_cycles = rte_rdtsc_precise() - begin;
>   
>   	__atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED);
> -	__atomic_fetch_add(&gwrites,
> -			2 * NUM_LDEPTH_ROUTE_ENTRIES * RCU_ITERATIONS,
> -			__ATOMIC_RELAXED);
>   
>   	return 0;
>   }
> @@ -528,7 +525,6 @@ test_lpm_rcu_perf_multi_writer(void)
>   
>   	writer_done = 0;
>   	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
>   
>   	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>   
> @@ -548,14 +544,11 @@ test_lpm_rcu_perf_multi_writer(void)
>   		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>   			goto error;
>   
> -	printf("Total LPM Adds: %d\n",
> -		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> -	printf("Total LPM Deletes: %d\n",
> -		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> +	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>   	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
> -			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
> -		);
> +		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> +		/ TOTAL_WRITES);
>   
>   	/* Wait and check return value from reader threads */
>   	writer_done = 1;
> @@ -581,7 +574,6 @@ test_lpm_rcu_perf_multi_writer(void)
>   
>   	writer_done = 0;
>   	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -	__atomic_store_n(&gwrites, 0, __ATOMIC_RELAXED);
>   	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>   
>   	/* Launch reader threads */
> @@ -600,14 +592,11 @@ test_lpm_rcu_perf_multi_writer(void)
>   		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>   			goto error;
>   
> -	printf("Total LPM Adds: %d\n",
> -		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> -	printf("Total LPM Deletes: %d\n",
> -		2 * ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> +	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>   	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED) /
> -			__atomic_load_n(&gwrites, __ATOMIC_RELAXED)
> -		);
> +		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> +		/ TOTAL_WRITES);
>   
>   	writer_done = 1;
>   	/* Wait and check return value from reader threads */
> @@ -711,11 +700,10 @@ test_lpm_rcu_perf(void)
>   	}
>   	total_cycles = rte_rdtsc_precise() - begin;
>   
> -	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> -	printf("Total LPM Deletes: %d\n",
> -		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> +	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>   	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
> +		(double)total_cycles / TOTAL_WRITES);
>   
>   	writer_done = 1;
>   	/* Wait and check return value from reader threads */
> @@ -771,11 +759,10 @@ test_lpm_rcu_perf(void)
>   	}
>   	total_cycles = rte_rdtsc_precise() - begin;
>   
> -	printf("Total LPM Adds: %d\n", ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> -	printf("Total LPM Deletes: %d\n",
> -		ITERATIONS * NUM_LDEPTH_ROUTE_ENTRIES);
> +	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>   	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / (NUM_LDEPTH_ROUTE_ENTRIES * ITERATIONS));
> +		(double)total_cycles / TOTAL_WRITES);
>   
>   	writer_done = 1;
>   	/* Wait and check return value from reader threads */
> 
Acked-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
-- 
Regards,
Vladimir
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v5 2/4] test/lpm: return error on failure in rcu qsbr perf
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 2/4] test/lpm: return error on failure " Dharmik Thakkar
@ 2020-11-04 19:35           ` Medvedkin, Vladimir
  0 siblings, 0 replies; 52+ messages in thread
From: Medvedkin, Vladimir @ 2020-11-04 19:35 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson, Gavin Hu,
	Honnappa Nagarahalli, Ruifeng Wang
  Cc: dev, nd, stable
On 04/11/2020 18:58, Dharmik Thakkar wrote:
> Return error if Add/Delete fail in multiwriter perf test
> 
> Return error if single or multi writer test fails
> 
> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
> Cc: honnappa.nagarahalli@arm.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
>   app/test/test_lpm_perf.c | 12 ++++++++++--
>   1 file changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
> index 45164b23214b..873ecf511c97 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -453,6 +453,7 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   					next_hop_add) != 0) {
>   				printf("Failed to add iteration %d, route# %d\n",
>   					i, j);
> +				goto error;
>   			}
>   			pthread_mutex_unlock(&lpm_mutex);
>   		}
> @@ -464,6 +465,7 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   				large_ldepth_route_table[j].depth) != 0) {
>   				printf("Failed to delete iteration %d, route# %d\n",
>   					i, j);
> +				goto error;
>   			}
>   			pthread_mutex_unlock(&lpm_mutex);
>   		}
> @@ -474,6 +476,10 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   	__atomic_fetch_add(&gwrite_cycles, total_cycles, __ATOMIC_RELAXED);
>   
>   	return 0;
> +
> +error:
> +	pthread_mutex_unlock(&lpm_mutex);
> +	return -1;
>   }
>   
>   /*
> @@ -947,9 +953,11 @@ test_lpm_perf(void)
>   	rte_lpm_delete_all(lpm);
>   	rte_lpm_free(lpm);
>   
> -	test_lpm_rcu_perf();
> +	if (test_lpm_rcu_perf() < 0)
> +		return -1;
>   
> -	test_lpm_rcu_perf_multi_writer();
> +	if (test_lpm_rcu_perf_multi_writer() < 0)
> +		return -1;
>   
>   	return 0;
>   }
> 
Acked-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
-- 
Regards,
Vladimir
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v5 3/4] test/lpm: remove error checking in rcu qsbr perf
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 3/4] test/lpm: remove error checking " Dharmik Thakkar
@ 2020-11-04 19:35           ` Medvedkin, Vladimir
  0 siblings, 0 replies; 52+ messages in thread
From: Medvedkin, Vladimir @ 2020-11-04 19:35 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson, Honnappa Nagarahalli,
	Ruifeng Wang, Gavin Hu
  Cc: dev, nd, stable
On 04/11/2020 18:58, Dharmik Thakkar wrote:
> Remove redundant error checking for reader threads
> since they never return error.
> 
> Fixes: eff30b59cc2e ("test/lpm: add RCU performance tests")
> Cc: honnappa.nagarahalli@arm.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
>   app/test/test_lpm_perf.c | 21 ++++++++-------------
>   1 file changed, 8 insertions(+), 13 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
> index 873ecf511c97..c8e70ec89ff5 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -556,11 +556,10 @@ test_lpm_rcu_perf_multi_writer(void)
>   		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>   		/ TOTAL_WRITES);
>   
> -	/* Wait and check return value from reader threads */
>   	writer_done = 1;
> +	/* Wait until all readers have exited */
>   	for (i = 2; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> +		rte_eal_wait_lcore(enabled_core_ids[i]);
>   
>   	rte_lpm_free(lpm);
>   	rte_free(rv);
> @@ -605,10 +604,9 @@ test_lpm_rcu_perf_multi_writer(void)
>   		/ TOTAL_WRITES);
>   
>   	writer_done = 1;
> -	/* Wait and check return value from reader threads */
> +	/* Wait until all readers have exited */
>   	for (i = 2; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> +		rte_eal_wait_lcore(enabled_core_ids[i]);
>   
>   	rte_lpm_free(lpm);
>   
> @@ -712,10 +710,9 @@ test_lpm_rcu_perf(void)
>   		(double)total_cycles / TOTAL_WRITES);
>   
>   	writer_done = 1;
> -	/* Wait and check return value from reader threads */
> +	/* Wait until all readers have exited */
>   	for (i = 0; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> +		rte_eal_wait_lcore(enabled_core_ids[i]);
>   
>   	rte_lpm_free(lpm);
>   	rte_free(rv);
> @@ -771,11 +768,9 @@ test_lpm_rcu_perf(void)
>   		(double)total_cycles / TOTAL_WRITES);
>   
>   	writer_done = 1;
> -	/* Wait and check return value from reader threads */
> +	/* Wait until all readers have exited */
>   	for (i = 0; i < num_cores; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			printf("Warning: lcore %u not finished.\n",
> -				enabled_core_ids[i]);
> +		rte_eal_wait_lcore(enabled_core_ids[i]);
>   
>   	rte_lpm_free(lpm);
>   
> 
Acked-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
-- 
Regards,
Vladimir
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v5 4/4] test/lpm: avoid code duplication in rcu qsbr perf
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
@ 2020-11-04 19:35           ` Medvedkin, Vladimir
  0 siblings, 0 replies; 52+ messages in thread
From: Medvedkin, Vladimir @ 2020-11-04 19:35 UTC (permalink / raw)
  To: Dharmik Thakkar, Bruce Richardson; +Cc: dev, nd
On 04/11/2020 18:58, Dharmik Thakkar wrote:
> Avoid code duplication by combining single and multi threaded tests
> 
> Also, enable support for more than 2 writers
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
>   app/test/test_lpm_perf.c | 356 +++++++++------------------------------
>   1 file changed, 81 insertions(+), 275 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
> index c8e70ec89ff5..2bed00d0648f 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -23,6 +23,7 @@ static struct rte_rcu_qsbr *rv;
>   static volatile uint8_t writer_done;
>   static volatile uint32_t thr_id;
>   static uint64_t gwrite_cycles;
> +static uint32_t num_writers;
>   /* LPM APIs are not thread safe, use mutex to provide thread safety */
>   static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
>   
> @@ -430,24 +431,19 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   {
>   	unsigned int i, j, si, ei;
>   	uint64_t begin, total_cycles;
> -	uint8_t core_id = (uint8_t)((uintptr_t)arg);
>   	uint32_t next_hop_add = 0xAA;
> +	uint8_t pos_core = (uint8_t)((uintptr_t)arg);
>   
> -	/* 2 writer threads are used */
> -	if (core_id % 2 == 0) {
> -		si = 0;
> -		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> -	} else {
> -		si = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> -		ei = NUM_LDEPTH_ROUTE_ENTRIES;
> -	}
> +	si = (pos_core * NUM_LDEPTH_ROUTE_ENTRIES) / num_writers;
> +	ei = ((pos_core + 1) * NUM_LDEPTH_ROUTE_ENTRIES) / num_writers;
>   
>   	/* Measure add/delete. */
>   	begin = rte_rdtsc_precise();
>   	for (i = 0; i < RCU_ITERATIONS; i++) {
>   		/* Add all the entries */
>   		for (j = si; j < ei; j++) {
> -			pthread_mutex_lock(&lpm_mutex);
> +			if (num_writers > 1)
> +				pthread_mutex_lock(&lpm_mutex);
>   			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>   					large_ldepth_route_table[j].depth,
>   					next_hop_add) != 0) {
> @@ -455,19 +451,22 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   					i, j);
>   				goto error;
>   			}
> -			pthread_mutex_unlock(&lpm_mutex);
> +			if (num_writers > 1)
> +				pthread_mutex_unlock(&lpm_mutex);
>   		}
>   
>   		/* Delete all the entries */
>   		for (j = si; j < ei; j++) {
> -			pthread_mutex_lock(&lpm_mutex);
> +			if (num_writers > 1)
> +				pthread_mutex_lock(&lpm_mutex);
>   			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>   				large_ldepth_route_table[j].depth) != 0) {
>   				printf("Failed to delete iteration %d, route# %d\n",
>   					i, j);
>   				goto error;
>   			}
> -			pthread_mutex_unlock(&lpm_mutex);
> +			if (num_writers > 1)
> +				pthread_mutex_unlock(&lpm_mutex);
>   		}
>   	}
>   
> @@ -478,22 +477,24 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   	return 0;
>   
>   error:
> -	pthread_mutex_unlock(&lpm_mutex);
> +	if (num_writers > 1)
> +		pthread_mutex_unlock(&lpm_mutex);
>   	return -1;
>   }
>   
>   /*
>    * Functional test:
> - * 2 writers, rest are readers
> + * 1/2 writers, rest are readers
>    */
>   static int
> -test_lpm_rcu_perf_multi_writer(void)
> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>   {
>   	struct rte_lpm_config config;
>   	size_t sz;
> -	unsigned int i;
> +	unsigned int i, j;
>   	uint16_t core_id;
>   	struct rte_lpm_rcu_config rcu_cfg = {0};
> +	int (*reader_f)(void *arg) = NULL;
>   
>   	if (rte_lcore_count() < 3) {
>   		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
> @@ -506,273 +507,78 @@ test_lpm_rcu_perf_multi_writer(void)
>   		num_cores++;
>   	}
>   
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> -		/ TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> -
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> -		/ TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -
> -	return 0;
> -
> -error:
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	rte_eal_mp_wait_lcore();
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -
> -	return -1;
> -}
> -
> -/*
> - * Functional test:
> - * Single writer, rest are readers
> - */
> -static int
> -test_lpm_rcu_perf(void)
> -{
> -	struct rte_lpm_config config;
> -	uint64_t begin, total_cycles;
> -	size_t sz;
> -	unsigned int i, j;
> -	uint16_t core_id;
> -	uint32_t next_hop_add = 0xAA;
> -	struct rte_lpm_rcu_config rcu_cfg = {0};
> -
> -	if (rte_lcore_count() < 2) {
> -		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
> -		return TEST_SKIPPED;
> -	}
> -
> -	num_cores = 0;
> -	RTE_LCORE_FOREACH_WORKER(core_id) {
> -		enabled_core_ids[num_cores] = core_id;
> -		num_cores++;
> -	}
> -
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
> -		num_cores);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route# %d\n",
> -					i, j);
> -				goto error;
> -			}
> -
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route# %d\n",
> -					i, j);
> +	for (j = 1; j < 3; j++) {
> +		if (use_rcu)
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration enabled\n", j, num_cores - j);
> +		else
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration disabled\n", j, num_cores - j);
> +
> +		num_writers = j;
> +
> +		/* Create LPM table */
> +		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.flags = 0;
> +		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> +		TEST_LPM_ASSERT(lpm != NULL);
> +
> +		/* Init RCU variable */
> +		if (use_rcu) {
> +			sz = rte_rcu_qsbr_get_memsize(num_cores);
> +			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> +							RTE_CACHE_LINE_SIZE);
> +			rte_rcu_qsbr_init(rv, num_cores);
> +
> +			rcu_cfg.v = rv;
> +			/* Assign the RCU variable to LPM */
> +			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> +				printf("RCU variable assignment failed\n");
>   				goto error;
>   			}
> -	}
> -	total_cycles = rte_rdtsc_precise() - begin;
>   
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> +			reader_f = test_lpm_rcu_qsbr_reader;
> +		} else
> +			reader_f = test_lpm_reader;
>   
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> +		writer_done = 0;
> +		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>   
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
> -		num_cores);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> +		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>   
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> +		/* Launch reader threads */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_remote_launch(reader_f, NULL,
> +						enabled_core_ids[i]);
>   
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> +		/* Launch writer threads */
> +		for (i = 0; i < j; i++)
> +			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> +						(void *)(uintptr_t)i,
> +						enabled_core_ids[i]);
>   
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route# %d\n",
> -					i, j);
> +		/* Wait for writer threads */
> +		for (i = 0; i < j; i++)
> +			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>   				goto error;
> -			}
>   
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route# %d\n",
> -					i, j);
> -				goto error;
> -			}
> +		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> +		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> +			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> +			/ TOTAL_WRITES);
> +
> +		writer_done = 1;
> +		/* Wait until all readers have exited */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_wait_lcore(enabled_core_ids[i]);
> +
> +		rte_lpm_free(lpm);
> +		rte_free(rv);
> +		lpm = NULL;
> +		rv = NULL;
>   	}
> -	total_cycles = rte_rdtsc_precise() - begin;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
>   
>   	return 0;
>   
> @@ -948,10 +754,10 @@ test_lpm_perf(void)
>   	rte_lpm_delete_all(lpm);
>   	rte_lpm_free(lpm);
>   
> -	if (test_lpm_rcu_perf() < 0)
> +	if (test_lpm_rcu_perf_multi_writer(0) < 0)
>   		return -1;
>   
> -	if (test_lpm_rcu_perf_multi_writer() < 0)
> +	if (test_lpm_rcu_perf_multi_writer(1) < 0)
>   		return -1;
>   
>   	return 0;
> 
Acked-by: Vladimir Medvedkin <vladimir.medvedkin@intel.com>
-- 
Regards,
Vladimir
^ permalink raw reply	[flat|nested] 52+ messages in thread
* Re: [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test
  2020-11-04 18:58       ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
                           ` (3 preceding siblings ...)
  2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
@ 2020-11-05 15:58         ` David Marchand
  4 siblings, 0 replies; 52+ messages in thread
From: David Marchand @ 2020-11-05 15:58 UTC (permalink / raw)
  To: Dharmik Thakkar; +Cc: dev, nd
On Wed, Nov 4, 2020 at 7:59 PM Dharmik Thakkar <dharmik.thakkar@arm.com> wrote:
>
> Fix LPM adds, LPM deletes, and cycle calculation.
> Return error if LPM add/delete fails in multi-writer test.
> Return error if single or multi writer test fails
> Remove redundant error checking for readers.
> Combine single and multi threaded test cases to avoid code duplication.
Series applied, thanks Dharmik.
-- 
David Marchand
^ permalink raw reply	[flat|nested] 52+ messages in thread
end of thread, other threads:[~2020-11-05 15:59 UTC | newest]
Thread overview: 52+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-29 15:36 [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
2020-10-29 15:36 ` [dpdk-dev] [PATCH 2/4] test/lpm: return error on failure " Dharmik Thakkar
2020-10-29 15:36 ` [dpdk-dev] [PATCH 3/4] test/lpm: remove error checking " Dharmik Thakkar
2020-10-29 15:36 ` [dpdk-dev] [PATCH 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
2020-11-02 17:17   ` Medvedkin, Vladimir
2020-11-02 22:11     ` Dharmik Thakkar
2020-11-02 10:08 ` [dpdk-dev] [PATCH 1/4] test/lpm: fix cycle calculation " David Marchand
2020-11-02 15:11 ` Bruce Richardson
2020-11-02 16:58   ` Dharmik Thakkar
2020-11-02 17:21     ` Medvedkin, Vladimir
2020-11-02 17:33     ` Bruce Richardson
2020-11-02 23:51 ` [dpdk-dev] [PATCH v2 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
2020-11-03  1:30     ` Honnappa Nagarahalli
2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 2/4] test/lpm: return error on failure " Dharmik Thakkar
2020-11-03  1:28     ` Honnappa Nagarahalli
2020-11-03  4:42       ` Dharmik Thakkar
2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 3/4] test/lpm: remove error checking " Dharmik Thakkar
2020-11-03  1:21     ` Honnappa Nagarahalli
2020-11-03  4:56       ` Dharmik Thakkar
2020-11-02 23:52   ` [dpdk-dev] [PATCH v2 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
2020-11-03  4:21     ` Honnappa Nagarahalli
2020-11-03  4:33       ` Dharmik Thakkar
2020-11-03  5:32         ` Honnappa Nagarahalli
2020-11-03 14:03           ` Dharmik Thakkar
2020-11-03 14:51             ` Honnappa Nagarahalli
2020-11-03 18:01             ` Medvedkin, Vladimir
2020-11-03  5:12   ` [dpdk-dev] [PATCH v3 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 2/4] test/lpm: return error on failure " Dharmik Thakkar
2020-11-03  5:21       ` Honnappa Nagarahalli
2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 3/4] test/lpm: remove error checking " Dharmik Thakkar
2020-11-03  5:22       ` Honnappa Nagarahalli
2020-11-03  5:12     ` [dpdk-dev] [PATCH v3 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
2020-11-03 22:23     ` [dpdk-dev] [PATCH v4 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 2/4] test/lpm: return error on failure " Dharmik Thakkar
2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 3/4] test/lpm: remove error checking " Dharmik Thakkar
2020-11-03 22:23       ` [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
2020-11-03 22:35         ` Honnappa Nagarahalli
2020-11-04 15:46         ` Medvedkin, Vladimir
2020-11-04 16:49           ` Dharmik Thakkar
2020-11-04 18:58       ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test Dharmik Thakkar
2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 1/4] test/lpm: fix cycle calculation in rcu qsbr perf Dharmik Thakkar
2020-11-04 19:34           ` Medvedkin, Vladimir
2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 2/4] test/lpm: return error on failure " Dharmik Thakkar
2020-11-04 19:35           ` Medvedkin, Vladimir
2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 3/4] test/lpm: remove error checking " Dharmik Thakkar
2020-11-04 19:35           ` Medvedkin, Vladimir
2020-11-04 18:58         ` [dpdk-dev] [PATCH v5 4/4] test/lpm: avoid code duplication " Dharmik Thakkar
2020-11-04 19:35           ` Medvedkin, Vladimir
2020-11-05 15:58         ` [dpdk-dev] [PATCH v5 0/4] test/lpm: fix rcu qsbr perf test David Marchand
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).